├── .dockerignore ├── .env.example ├── .github └── workflows │ └── tests.yaml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── TODO.md ├── data-migrate.sh ├── docker-compose.dev.yml ├── docker-compose.yml ├── docs ├── diagrams │ └── agile_metrics.drawio └── images │ ├── agile_metrics.png │ ├── agile_metrics_cloud.png │ ├── datalens_example.png │ ├── etl_metrics.jpeg │ └── logs.png ├── examples ├── extended_model │ └── main.py └── serverless │ ├── main.py │ └── requirements.txt ├── migrations ├── clickhouse │ ├── 000001_create_table_issues.down.sql │ ├── 000001_create_table_issues.up.sql │ ├── 000002_create_table_issue_metrics.down.sql │ ├── 000002_create_table_issue_metrics.up.sql │ ├── 000003_create_table_issues_changelog.down.sql │ ├── 000003_create_table_issues_changelog.up.sql │ ├── 000004_create_view_issues_view.down.sql │ └── 000004_create_view_issues_view.up.sql └── v0.1.x │ ├── 000001_create_table_issues.down.sql │ ├── 000001_create_table_issues.up.sql │ ├── 000002_create_table_issue_metrics.down.sql │ ├── 000002_create_table_issue_metrics.up.sql │ ├── 000003_create_view_issues_view.down.sql │ └── 000003_create_view_issues_view.up.sql ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── conftest.py ├── test_config.py ├── test_etl.py ├── test_helpers.py └── test_state.py └── tracker_exporter ├── __init__.py ├── _meta.py ├── _typing.py ├── config.py ├── etl.py ├── exceptions.py ├── main.py ├── models ├── __init__.py ├── base.py └── issue.py ├── services ├── __init__.py ├── clickhouse.py ├── monitoring.py └── tracker.py ├── state ├── __init__.py ├── backends.py ├── factory.py ├── managers.py └── serializers.py └── utils ├── __init__.py └── helpers.py /.dockerignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | # env/ 12 | # !/**/env 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # dotenv 81 | .env 82 | 83 | # virtualenv 84 | .venv 85 | venv* 86 | 87 | # Spyder project settings 88 | .spyderproject 89 | 90 | # Rope project settings 91 | .ropeproject 92 | 93 | .idea 94 | tests 95 | .git 96 | 97 | 98 | # Project 99 | 100 | .env 101 | .env.example 102 | data-migrate.sh 103 | Makefile 104 | docker-compose.* 105 | state.* 106 | TODO.* 107 | migrate 108 | examples 109 | docs 110 | clickhouse 111 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | EXPORTER_MONITORING__METRICS_ENABLED=false 2 | EXPORTER_MONITORING__METRICS_HOST=localhost 3 | EXPORTER_MONITORING__METRICS_PORT=8125 4 | EXPORTER_MONITORING__SENTRY_ENABLED=false 5 | EXPORTER_MONITORING__SENTRY_DSN=https://xxxxxxxxxxx@sentry.io/1 6 | 7 | EXPORTER_CLICKHOUSE__ENABLE_UPLOAD=true 8 | EXPORTER_CLICKHOUSE__HOST=localhost 9 | EXPORTER_CLICKHOUSE__PORT=8443 10 | EXPORTER_CLICKHOUSE__PROTO=https 11 | EXPORTER_CLICKHOUSE__CACERT_PATH=/etc/ssl/ca.pem 12 | EXPORTER_CLICKHOUSE__USERNAME=tracker 13 | EXPORTER_CLICKHOUSE__PASSWORD=mypassword 14 | EXPORTER_CLICKHOUSE__DATABASE=tracker 15 | EXPORTER_CLICKHOUSE__SERVERLESS_PROXY_ID=xxxxxxxxxxxxxxxx 16 | EXPORTER_CLICKHOUSE__ISSUES_TABLE=issues 17 | EXPORTER_CLICKHOUSE__ISSUE_METRICS_TABLE=issue_metrics 18 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | workflow_dispatch: 4 | pull_request: 5 | paths: 6 | - "tracker_exporter/**" 7 | - "tests/**" 8 | branches: 9 | - master 10 | push: 11 | paths: 12 | - "tracker_exporter/**" 13 | - "tests/**" 14 | branches: 15 | - master 16 | schedule: 17 | - cron: '20 4 * * 6' 18 | 19 | env: 20 | EXPORTER_TRACKER__TOKEN: ${{ secrets.EXPORTER_TRACKER__TOKEN }} 21 | EXPORTER_TRACKER__CLOUD_ORG_ID: ${{ secrets.EXPORTER_TRACKER__CLOUD_ORG_ID }} 22 | 23 | jobs: 24 | pytest: 25 | name: pytest 26 | runs-on: ${{matrix.os}} 27 | strategy: 28 | matrix: 29 | python-version: 30 | - "3.10" 31 | os: 32 | - ubuntu-latest 33 | - windows-latest 34 | - macos-latest 35 | fail-fast: false 36 | steps: 37 | - uses: actions/checkout@v4 38 | - name: Set up Python ${{ matrix.python-version }} 39 | uses: actions/setup-python@v4 40 | with: 41 | python-version: ${{ matrix.python-version }} 42 | cache: 'pip' 43 | cache-dependency-path: '**/requirements*.txt' 44 | - name: Install dependencies 45 | run: | 46 | python -W ignore -m pip install --upgrade pip 47 | python -W ignore -m pip install -U pytest-cov 48 | python -W ignore -m pip install -r requirements.txt 49 | python -W ignore -m pip install -r requirements-dev.txt 50 | python -W ignore -m pip install pytest-xdist[psutil] 51 | 52 | - name: Test with pytest 53 | run: | 54 | pytest -vv --cov=tracker_exporter --cov-append -n auto --junit-xml=.test_report.xml 55 | env: 56 | JOB_INDEX: ${{ strategy.job-index }} 57 | 58 | - name: Test Summary 59 | id: test_summary 60 | uses: test-summary/action@v2.1 61 | if: always() # always run, even if tests fail 62 | with: 63 | paths: | 64 | .test_report.xml 65 | 66 | # FIXME: not works 67 | # - name: Submit coverage 68 | # uses: codecov/codecov-action@v3 69 | # env: 70 | # CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 71 | # with: 72 | # env_vars: OS,PYTHON 73 | # name: ${{ matrix.os }}-${{ matrix.python-version }} 74 | # fail_ci_if_error: true 75 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | scripts/* 2 | /clickhouse 3 | migrate 4 | test.json 5 | state.json 6 | .ruff* 7 | exporter 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | pip-wheel-metadata/ 32 | share/python-wheels/ 33 | *.egg-info/ 34 | .installed.cfg 35 | *.egg 36 | MANIFEST 37 | 38 | # PyInstaller 39 | # Usually these files are written by a python script from a template 40 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 41 | *.manifest 42 | *.spec 43 | 44 | # Installer logs 45 | pip-log.txt 46 | pip-delete-this-directory.txt 47 | 48 | # Unit test / coverage reports 49 | htmlcov/ 50 | .tox/ 51 | .nox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | *.py,cover 59 | .hypothesis/ 60 | .pytest_cache/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # IPython 89 | profile_default/ 90 | ipython_config.py 91 | 92 | # pyenv 93 | .python-version 94 | 95 | # pipenv 96 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 97 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 98 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 99 | # install all needed dependencies. 100 | #Pipfile.lock 101 | 102 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 103 | __pypackages__/ 104 | 105 | # Celery stuff 106 | celerybeat-schedule 107 | celerybeat.pid 108 | 109 | # SageMath parsed files 110 | *.sage.py 111 | 112 | # Environments 113 | .env 114 | .venv 115 | env/ 116 | venv/ 117 | ENV/ 118 | env.bak/ 119 | venv.bak/ 120 | 121 | # Spyder project settings 122 | .spyderproject 123 | .spyproject 124 | 125 | # Rope project settings 126 | .ropeproject 127 | 128 | # mkdocs documentation 129 | /site 130 | 131 | # mypy 132 | .mypy_cache/ 133 | .dmypy.json 134 | dmypy.json 135 | 136 | # Pyre type checker 137 | .pyre/ 138 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim as builder 2 | 3 | WORKDIR /usr/src/app 4 | COPY ./requirements.txt ./ 5 | RUN apt-get update && apt-get install -y --no-install-recommends build-essential \ 6 | && pip install --no-cache-dir --prefix=/usr/src/app/dist -r requirements.txt \ 7 | && apt-get purge -y --auto-remove build-essential \ 8 | && rm -rf /var/lib/apt/lists/* 9 | 10 | 11 | FROM python:3.10-slim 12 | 13 | COPY --from=builder /usr/src/app/dist /usr/local 14 | WORKDIR /opt/exporter 15 | 16 | COPY . . 17 | RUN pip install --no-cache-dir . 18 | RUN rm -rf /opt/exporter 19 | 20 | ENV TZ=Europe/Moscow 21 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \ 22 | echo $TZ > /etc/timezone && \ 23 | apt-get update && \ 24 | apt-get install -y --no-install-recommends tzdata && \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | WORKDIR /opt/exporter 28 | CMD ["tracker-exporter"] 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Akim Faskhutdinov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-build clean-pyc dist help clickhouse test tests migration docs 2 | .DEFAULT_GOAL := help 3 | 4 | help: 5 | @echo "🪄 PREPARE ENVIRONMENT" 6 | @echo "---------------------------------------------------------------------" 7 | @echo " init Install all python requirements" 8 | @echo " pre-commit Install pre-commit hooks" 9 | @echo "" 10 | @echo "👀 CHECK" 11 | @echo "---------------------------------------------------------------------" 12 | @echo " test Run tests (pytest)" 13 | @echo " test-no-cov Run tests (pytest) without coverage report" 14 | @echo " pylint Check python syntax & style by pylint" 15 | @echo " lint Check python syntax via Flake8" 16 | @echo " black Check python syntax & style by black" 17 | @echo " black-apply Apply black linter (autoformat)" 18 | @echo " sec Security linter (bandit)" 19 | @echo "" 20 | @echo "🛠 INSTALL & RELEASE" 21 | @echo "---------------------------------------------------------------------" 22 | @echo " install Install library to site-packages" 23 | @echo " build Build package" 24 | @echo " build-docker Build docker image" 25 | @echo " release Build & push package to PyPI" 26 | @echo " clean Clean build/install artifacts" 27 | @echo "" 28 | @echo "🐳 DEV & RUN" 29 | @echo "---------------------------------------------------------------------" 30 | @echo " up Up docker composition with app & clickhouse" 31 | @echo " up-clickhouse Up docker clickhouse" 32 | @echo " down Down docker composition (full)" 33 | @echo " down-clickhouse Down docker clickhouse" 34 | @echo " clickhouse Clickhouse CLI" 35 | @echo " migration Run clickhouse migration" 36 | @echo " run Run ETL" 37 | 38 | clean: clean-build clean-pyc 39 | 40 | clean-build: 41 | rm -rf build/ 42 | rm -rf dist/ 43 | rm -rf .eggs/ 44 | find . -name '*.egg-info' -exec rm -rf {} + 45 | find . -name '*.egg' -exec rm -rf {} + 46 | find . -name '.DS_Store' -exec rm -f {} + 47 | 48 | clean-pyc: 49 | find . -name '*.pyc' -exec rm -f {} + 50 | find . -name '*.pyo' -exec rm -f {} + 51 | find . -name '*~' -exec rm -f {} + 52 | find . -name '__pycache__' -exec rm -rf {} + 53 | 54 | test: 55 | @pytest -vv --cov=tracker_exporter 56 | 57 | tests: test 58 | 59 | test-no-cov: 60 | @pytest -v 61 | 62 | lint: 63 | @flake8 --config=setup.cfg --max-line=119 64 | 65 | pylint: 66 | @pylint --max-line-length=120 --rcfile=setup.cfg tracker_exporter 67 | 68 | black: 69 | @black tracker_exporter/* --color --diff --check 70 | 71 | black-apply: 72 | @black tracker_exporter/* 73 | 74 | sec: 75 | @bandit -r tracker_exporter 76 | 77 | build: 78 | @python3 setup.py sdist bdist_wheel 79 | 80 | build-docker: 81 | @docker build . -t tracker_exporter:dev 82 | 83 | release: clean build 84 | @make clean 85 | @make build 86 | @python3 -m twine upload --repository pypi dist/* 87 | @make clean 88 | 89 | install: clean 90 | @python3 setup.py install 91 | 92 | init: 93 | @pip3 install -r requirements.txt 94 | @pip3 install -r requirements-dev.txt 95 | 96 | up: 97 | @docker compose -f docker-compose.dev.yml up -d 98 | 99 | up-clickhouse: 100 | @docker compose -f docker-compose.dev.yml up -d clickhouse 101 | 102 | down: 103 | @docker compose -f docker-compose.dev.yml down 104 | 105 | down-clickhouse: 106 | @docker compose -f docker-compose.dev.yml down clickhouse 107 | 108 | clickhouse: 109 | @docker exec -it clickhouse clickhouse-client 110 | 111 | run: 112 | @tracker-exporter --env-file .env 113 | 114 | migration: 115 | @./data-migrate.sh 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/tracker-exporter.svg)](https://pypi.org/project/tracker-exporter/) 2 | [![PyPi Package](https://img.shields.io/pypi/v/tracker-exporter.svg)](https://pypi.org/project/tracker-exporter/) 3 | [![Tests](https://github.com/akimrx/yandex-tracker-exporter/workflows/Tests/badge.svg)](https://github.com/akimrx/yandex-tracker-exporter) 4 | 5 | # Yandex.Tracker ETL 6 | 7 | Export issue metadata & agile metrics, transform and load to OLAP data storage. Metrics based on issue changelog. 8 | 9 | ⚠️ **Important** 10 | **Versions 1.x.x incompatible with 0.1.x. New versions works only on Python >= 3.10** 11 | 12 | > You can fork this repository and refine the tool the way you want. Or use it as it is - this will allow you to build basic analytics on the tasks from Yandex.Tracker. 13 | 14 | **Require:** 15 | 16 | - Python `>=3.10.*` 17 | - Clickhouse + specific [tables](/migrations/clickhouse/) (how to run [migration](#migration)) 18 | 19 | **Collects:** 20 | 21 | - Issue metadata (i.e. title, author, assignee, components, tags, status, etc) 22 | - Issue changelog (i.e the history of all the events that occurred with the task) 23 | - Calculated issue metrics by status (i.e. the time spent in a particular status) like Cycle & Lead time 24 | 25 | ## Datalens Demo 26 | 27 | **[Deploy this demo dashboard to your Datalens instance](https://datalens.yandex.ru/marketplace/f2ejcgrg2h910r7cc93u)** 28 | 29 | ## What does this tool do? 30 | 31 | **ETL** – Export, transform, load. 32 | 33 | It's simple. It doesn't do anything supernatural, it doesn't have Rocket Science in it. 34 | This is a simple ant with some mathematical abilities that takes data from one place, sorts/transforms/adapts/calculate them and puts them in another place. 35 | Sometimes he has to go to a lot of endpoints to collect what needs to be taken to the storage (that's the way Yandex.Tracker API). 36 | 37 | **Important.** 38 | 39 | By default, the exporter processes only those tasks that were changed during the sliding window specified in the `EXPORTER_TRACKER__SEARCH__RANGE` parameter. 40 | So, all tasks that have activity (changes) will be uploaded to the storage. Something like eventual consistency. 41 | 42 | If you need to upload historical data that will never be updated again, you can flexibly control behavior through the [environment variables described below](#general-settings). 43 | 44 | Here are some recipes for a one-shot export: 45 | 46 | 1. Launch a exporter with the parameter `EXPORTER_TRACKER__SEARCH__RANGE`, for example, a year ago 47 | 2. More specifically: describe the query in the tracker's QL format using the `EXPORTER_TRACKER__SEARCH__QUERY` environment variable. This way you can export point bundles of tasks and bypass with the [Tracker's strict limit of 10,000 tasks](https://github.com/yandex/yandex_tracker_client/issues/13). 48 | 49 | Finally run exporter with `--run-once` flag. 50 | 51 | ### Stateful mode 52 | 53 | By default, the exporter does not store the state, and as described above, it works within the sliding window. This behavior is not the most optimal, because the exporter performs repeated processing for previous tasks. 54 | 55 | The behavior can be changed by enabling stateful mode, which supports 3 backends: 56 | 57 | - Local JSON file 58 | - Remote JSON file (S3 object storage) 59 | - Redis 60 | 61 | #### Local JSON file 62 | 63 | ```ini 64 | EXPORTER_STATEFUL=true 65 | 66 | # used for the first run to capture historical issues 67 | # when the previous state is not exists. 68 | EXPORTER_STATEFUL_INITIAL_RANGE=7d # this is default value 69 | 70 | EXPORTER_STATE__STORAGE=jsonfile # this is default value 71 | EXPORTER_STATE__JSONFILE_STRATEGY=local # this is default value 72 | 73 | ... 74 | ``` 75 | 76 | #### Remote JSON file (S3) 77 | 78 | ```ini 79 | EXPORTER_STATEFUL=true 80 | EXPORTER_STATEFUL_INITIAL_RANGE=7d 81 | 82 | EXPORTER_STATE__STORAGE=jsonfile 83 | EXPORTER_STATE__JSONFILE_STRATEGY=s3 84 | 85 | EXPORTER_STATE__JSONFILE_S3_BUCKET=tracker-exporter-state 86 | EXPORTER_STATE__JSONFILE_S3_ACCESS_KEY=YCAxxxxxxxx 87 | EXPORTER_STATE__JSONFILE_S3_SECRET_KEY=YCxxx-xxxxxxxxxxxxxxx 88 | EXPORTER_STATE__JSONFILE_S3_ENDPOINT=https://storage.yandexcloud.net 89 | 90 | ... 91 | ``` 92 | 93 | #### Redis 94 | 95 | ```ini 96 | 97 | EXPORTER_STATEFUL=true 98 | EXPORTER_STATEFUL_INITIAL_RANGE=7d 99 | 100 | EXPORTER_STATE__STORAGE=redis 101 | EXPORTER_STATE__REDIS_DSN=redis://localhost:6379 102 | 103 | ... 104 | ``` 105 | 106 | ### Cycle time calculation algorithm 107 | 108 | Currently, status metrics are calculated based on the transition between statuses (using a issue changelog). The counting algorithm will be improved. 109 | 110 | Let's imagine that the task can be, for example, only in 5 statuses: 111 | 112 | - Open 113 | - In progress 114 | - Testing 115 | - Ready for release 116 | - Closed 117 | 118 | Employees start working on the task, the history of the task and the actions of employees is described below, with a correlation to the work of the exporter. 119 | 120 | 1. A new task has created with the initial status `Open`, metrics are not counted. 121 | 2. The developer has taken the task to work, the transition is `Open -> In progress`, the metric for the `Open` status has been calculated, while the current status `In progress` is not yet considered. 122 | 3. The developer has submitted the task to testing, the transition `In progress -> Testing`, the metric for the status `In progress` has been calculated, while the current status is being `Testing` is not yet considered. 123 | 4. QA Engineer returned the task for revision, the transition `Testing -> In progress`, the time in the status `Testing` has been calculated, the status `In progress` has the previous metric and has not changed yet. 124 | 5. The task has been finalized, re-submitted to testing, the transition `In progress -> Testing`, the delta of this transition is added incrementally to the previous value of the metric `In progress`, but `Testing` has not changed yet. 125 | 6 The task has been tested and submitted for release, the transition `Testing -> Ready for release`, the delta of this transition is incrementally added to the previous value of the metric `Testing`, the `Ready for Release` status is not considered yet. 126 | 6. The release is completed, the task is closed, the transition `Ready for release -> Closed`, the metric for the `Ready for Release` status is considered. **The metric of the final status of this task (`Closed`) will not be (re)calculated.** 127 | 128 | #### Planned improvements 129 | 130 | Consider the status metric if a transition has been made to it, even if such a status is current and the next transition has not yet been made from it. Exclude the final statuses from the innovation. 131 | 132 | ## Tech stats 133 | 134 | > Metrics based on 100,000+ constantly changing production issues 135 | 136 | - **CPU usage**: from `2%` to `10%` 137 | - **Memory usage (RSS):** from `48MB` to `256MB` 138 | - **Average processing time per issue (metrics + issue metadata)**: 1.5 seconds 139 | - **Average processing time per issue (with full changelog export):** 7 seconds 140 | 141 | ### Why is it taking so long? 142 | 143 | This is how the tracker API and the library I use work. To get additional information about the task, you need to make a subquery in the API. For example, get the status name, employee name, and so on. When collecting data about a single task, more than several dozen HTTP requests can be executed. 144 | 145 | This is also the answer to the question why the tool is not asynchronous. Limits in the API would not allow effective use of concurrency. 146 | 147 | The processing speed of one issue depends on how many changes there are in the issue in its history. More changes means longer processing. 148 | 149 | ## Extend exported issue data by your custom fields 150 | 151 | Just declare your `main.py` module in which extended the [TrackerIssue](tracker_exporter/models/issue.py#L65) model like: 152 | 153 | ```python 154 | 155 | from tracker_exporter.models.issue import TrackerIssue 156 | from tracker_exporter.utils.helpers import validate_resource 157 | from tracker_exporter import run_etl 158 | 159 | 160 | class ExtendedTrackerIssue(TrackerIssue): 161 | def __init__(self, issue: Issues) -> None: 162 | super().__init__(issue) 163 | 164 | self.foo_custom_field = validate_resource(issue, "fooCustomField") 165 | self.bar_custom_field = validate_resource(issue, "barCustomField") 166 | 167 | 168 | run_etl(issue_model=ExtendedTrackerIssue) 169 | 170 | ``` 171 | 172 | **Don't forget about adding fields to the Clickhouse migration.** 173 | 174 | See full example with mixin [here](examples/extended_model/main.py) 175 | 176 | ## Usage 177 | 178 | ### Native 179 | 180 | #### Install from source 181 | 182 | ```bash 183 | # prepare virtual environment 184 | python3 -m venv venv 185 | source venv/bin/activate 186 | make install 187 | 188 | # configure environment variables 189 | export EXPORTER_TRACKER__TOKEN=your_oauth_token # or EXPORTER_TRACKER__IAM_TOKEN 190 | export EXPORTER_TRACKER__CLOUD_ORG_ID=your_cloud_org_id # or EXPORTER_TRACKER__ORG_ID for yandex360 191 | export EXPORTER_CLICKHOUSE__HOST=localhost 192 | export EXPORTER_CLICKHOUSE__PORT=8123 193 | export EXPORTER_CLICKHOUSE__USERNAME=agile 194 | export EXPORTER_CLICKHOUSE__PASSWORD=agile 195 | 196 | 197 | # run 198 | tracker-exporter 199 | ``` 200 | 201 | #### Install from PyPI 202 | 203 | ```bash 204 | pip3 install tracker-exporter 205 | tracker-exporter 206 | ``` 207 | 208 | #### Configure via .env file 209 | 210 | Read about the settings [here](#environment-variables-settings) 211 | 212 | ```bash 213 | tracker-exporter --env-file /home/akimrx/tracker/.settings 214 | ``` 215 | 216 | ### Docker 217 | 218 | ```bash 219 | 220 | cd yandex-tracker-exporter 221 | docker-compose up -d --build 222 | 223 | # Run clickhouse migrations 224 | 225 | docker logs tracker-exporter -f 226 | ``` 227 | 228 | ## On-premise arch example 229 | 230 | ![](/docs/images/agile_metrics.png) 231 | 232 | ### On-premise Clickhouse 233 | 234 | So, you can install Clickhouse with Proxy via [Ansible role inside project (previous versions)](https://github.com/akimrx/yandex-tracker-exporter/tree/v0.1.19/ansible). 235 | Edit the inventory file `ansible/inventory/hosts.yml` and just run ansible-playbook. 236 | 237 | > **Attention:** 238 | > For the role to work correctly, docker must be installed on the target server. 239 | 240 | Example Clickhouse installation: 241 | 242 | ```bash 243 | git clone https://github.com/akimrx/yandex-tracker-exporter.git 244 | cd yandex-tracker-exporter 245 | git checkout v0.1.19 246 | python3 -m venv venv && source venv/bin/activate 247 | pip3 install -r requirements-dev.txt 248 | cd ansible 249 | ansible-playbook -i inventory/hosts.yml playbooks/clickhouse.yml --limit agile 250 | ``` 251 | 252 | Also, you can use [this extended Clickhouse role](https://github.com/akimrx/ansible-clickhouse-role) 253 | 254 | ## Yandex.Cloud – Cloud Functions 255 | 256 | ![](/docs/images/agile_metrics_cloud.png) 257 | 258 | ### Create a Managed Clickhouse cluster 259 | 260 | > How to: https://cloud.yandex.com/en/docs/managed-clickhouse/operations/cluster-create 261 | 262 | - Set user for exporter, example: `agile` 263 | - Set a database name, example: `agile` 264 | - Enable `Serverless access` flag 265 | - For testing enable host public access 266 | - Enable `Access from the management console` flag 267 | - Run migration or manual create tables (see migration block [here](#migration), see [sql](/migrations/clickhouse/)) 268 | 269 | ### Create Cloud Function 270 | 271 | > How to: https://cloud.yandex.com/en/docs/functions/quickstart/create-function/python-function-quickstart 272 | 273 | - Use Python >= 3.10 274 | - Copy/paste example content from `examples/serverless` ([code](/examples/serverless/)) 275 | - Set entrypoint: `main.handler` (for code from examples) 276 | - Set function timeout to `600`, because the launch can be long if there are a lot of updated issues during the collection period 277 | - Set memory to `512MB` or more 278 | - Add environment variables (see variables block [here](#configuration-via-environment-variables)) 279 | 280 | ```ini 281 | EXPORTER_TRACKER__TOKEN=XXXXXXXXXXXXXXXX 282 | EXPORTER_TRACKER__CLOUD_ORG_ID=123456 283 | EXPORTER_TRACKER__SEARCH__RANGE=2h 284 | EXPORTER_CLICKHOUSE__ENABLE_UPLOAD="true" 285 | EXPORTER_CLICKHOUSE__PROTO=https 286 | EXPORTER_CLICKHOUSE__CACERT_PATH=/etc/ssl/certs/ca-certificates.crt 287 | EXPORTER_CLICKHOUSE__PORT=8443 288 | EXPORTER_CLICKHOUSE__HOST=rc1b-xxxxxx.mdb.yandexcloud.net 289 | EXPORTER_CLICKHOUSE__USERNAME=agile 290 | EXPORTER_CLICKHOUSE__PASSWORD=xxxx 291 | EXPORTER_CHANGELOG_EXPORT_ENABLED="false" 292 | ``` 293 | 294 | - Release function 295 | - Run test 296 | - See logs 297 | 298 | ![](/docs/images/logs.png) 299 | 300 | ##### Serverless database connection without public access 301 | 302 | If you don't want to enable clickhouse public access, use service account with such permissions - `serverless.mdbProxies.user` and set environment variables below: 303 | 304 | ```bash 305 | EXPORTER_CLICKHOUSE__HOST=akfd3bhqk3xxxxxxxxxxx.clickhouse-proxy.serverless.yandexcloud.net 306 | EXPORTER_CLICKHOUSE__SERVERLESS_PROXY_ID=akfd3bhqk3xxxxxxxxxxxxx 307 | ``` 308 | 309 | > How to create database connection: https://cloud.yandex.com/en/docs/functions/operations/database-connection 310 | 311 | Also, the `EXPORTER_CLICKHOUSE__PASSWORD` variable with service account must be replaced by IAM-token. Keep this in mind. 312 | Probably, you should get it in the function code, because the IAM-token works for a limited period of time. 313 | 314 | ### Create Trigger 315 | 316 | > How to: https://cloud.yandex.com/en/docs/functions/quickstart/create-trigger/timer-quickstart 317 | 318 | - Create new trigger 319 | - Choose type `Timer` 320 | - Set interval every hour: `0 * ? * * *` 321 | - Select your function 322 | - Create serverless service account or use an existing one 323 | - Save trigger 324 | 325 | # Visualization 326 | 327 | You can use any BI/observability tool for visualization, for example: 328 | 329 | - Yandex DataLens (btw, this is [opensource](https://github.com/datalens-tech/datalens)). Also see [demo set](https://datalens.yandex.ru/marketplace/f2ejcgrg2h910r7cc93u) 330 | - Apache Superset 331 | - PowerBI 332 | - Grafana 333 | 334 | ![](/docs/images/datalens_example.png) 335 | 336 | # Migration 337 | 338 | Based on [go-migrate](https://github.com/golang-migrate/migrate) tool. 339 | 340 | ## Download and install go-migrate tool 341 | 342 | ### macOS 343 | 344 | ```shell 345 | wget https://github.com/golang-migrate/migrate/releases/download/v4.15.2/migrate.darwin-amd64.tar.gz -O migrate.tar.gz 346 | 347 | tar xvf migrate.tar.gz 348 | mv migrate ~/bin 349 | ``` 350 | 351 | ### Linux 352 | 353 | ```shell 354 | wget https://github.com/golang-migrate/migrate/releases/download/v4.15.2/migrate.linux-amd64.tar.gz -O migrate.tar.gz 355 | 356 | tar -xvf migrate.tar.gz 357 | mv migrate /usr/local/bin 358 | ``` 359 | 360 | ## Run migration 361 | 362 | Example bash script below. 363 | See full example script [here](/data-migrate.sh) 364 | 365 | ```bash 366 | #!/usr/bin/env bash 367 | 368 | set -Eeuo pipefail 369 | 370 | CLICKHOUSE_HOST="localhost" 371 | CLICKHOUSE_TCP_PORT=9000 372 | CLICKHOUSE_HTTP_PORT=8123 373 | CLICKHOUSE_USER="default" 374 | CLICKHOUSE_PASSWORD="strongpassword" 375 | 376 | MIGRATION_SOURCE_PATH="file://${PWD}/../migrations/clickhouse" 377 | MIGRATION_HISTORY_TABLE="ci_gomigrate_migrations" 378 | MIGRATION_DATABASE="agile" 379 | 380 | MIGRATION_CLICKHOUSE_DSN="clickhouse://${CLICKHOUSE_HOST}:${CLICKHOUSE_TCP_PORT}?username=${CLICKHOUSE_USER}&password=${CLICKHOUSE_PASSWORD}&database=${MIGRATION_DATABASE}&x-multi-statement=true&x-migrations-table=${MIGRATION_HISTORY_TABLE}" 381 | 382 | prepare_migration() { 383 | echo "CREATE DATABASE IF NOT EXISTS ${MIGRATION_DATABASE}" | \ 384 | curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}&password=${CLICKHOUSE_PASSWORD}" --data-binary @- 385 | 386 | } 387 | 388 | run_migration() { 389 | migrate -verbose \ 390 | -source $MIGRATION_SOURCE_PATH \ 391 | -database $MIGRATION_CLICKHOUSE_DSN \ 392 | up 393 | 394 | } 395 | 396 | prepare_migration 397 | run_migration 398 | ``` 399 | 400 | # Configuration via environment variables 401 | 402 | See config declaration [here](/tracker_exporter/config.py) 403 | 404 | ## General settings 405 | 406 | | variable | description | 407 | | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------ | 408 | | `EXPORTER_STATEFUL` | Enable stateful mode. Required `EXPORTER_STATE__*` params. Default is `False` | 409 | | `EXPORTER_STATEFUL_INITIAL_RANGE` | Initial search range when unknown last state. Default: `1w` | 410 | | `EXPORTER_CHANGELOG_EXPORT_ENABLED` | Enable export all issues changelog to Clickhouse. **Can greatly slow down exports** (x5 - x10). Default is `False` | 411 | | `EXPORTER_LOGLEVEL` | ETL log level. Default: `info` | 412 | | `EXPORTER_LOG_ETL_STATS` | Enable logging transform stats every N iteration. Default is `True` | 413 | | `EXPORTER_LOG_ETL_STATS_EACH_N_ITER` | How many iterations must pass to log stats. Default is `100` | 414 | | `EXPORTER_WORKDAYS` | Workdays for calculate business time. 0 - mon, 6 - sun. Default: `[0,1,2,3,4]` | 415 | | `EXPORTER_BUSINESS_HOURS_START` | Business hours start for calculate business time. Default: `09:00:00` | 416 | | `EXPORTER_BUSINESS_HOURS_END` | Business hours end for calculate business time. Default: `22:00:00` | 417 | | `EXPORTER_DATETIME_RESPONSE_FORMAT` | Yandex.Tracker datetime format in responses. Default: `%Y-%m-%dT%H:%M:%S.%f%z` | 418 | | `EXPORTER_DATETIME_QUERY_FORMAT` | Datetime format for search queries. Default: `%Y-%m-%d %H:%M:%S` | 419 | | `EXPORTER_DATETIME_CLICKHOUSE_FORMAT` | Datetime format for Clickhouse. Default: `%Y-%m-%dT%H:%M:%S.%f` | 420 | | `EXPORTER_ETL_INTERVAL_MINUTES` | Interval between run ETL. Default: `30` (minutes) | 421 | | `EXPORTER_CLOSED_ISSUE_STATUSES` | Statuses for mark issue as closed. Default: `closed,rejected,resolved,cancelled,released` | 422 | | `EXPORTER_NOT_NULLABLE_FIELDS` | Fields that should never be null (e.g. dates). Default: all datetime fields | 423 | 424 | ## Tracker settings 425 | 426 | | variable | description | 427 | | ------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------ | 428 | | `EXPORTER_TRACKER__LOGLEVEL` | Log level for Yandex.Tracker SDK. Default: `warning` | 429 | | `EXPORTER_TRACKER__TOKEN` | OAuth2 token. Required if `EXPORTER_TRACKER__IAM_TOKEN` is not passed | 430 | | `EXPORTER_TRACKER__ORG_ID` | Yandex360 organization ID. Required if `EXPORTER_TRACKER__CLOUD_ORG_ID` is not passed | 431 | | `EXPORTER_TRACKER__IAM_TOKEN` | Yandex.Cloud IAM token. Required if `EXPORTER_TRACKER__TOKEN` is not passed | 432 | | `EXPORTER_TRACKER__CLOUD_ORG_ID` | Yandex.Cloud organization ID. Required if `EXPORTER_TRACKER__ORG_ID` is not passed | 433 | | `EXPORTER_TRACKER__TIMEOUT` | Yandex.Tracker HTTP requests timeout. Default: `10` (sec) | 434 | | `EXPORTER_TRACKER__MAX_RETRIES` | Yandex.Tracker HTTP requests max retries. Default: `10` | 435 | | `EXPORTER_TRACKER__LANGUAGE` | Yandex.Tracker language. Default: `en` | 436 | | `EXPORTER_TRACKER__TIMEZONE` | Yandex.Tracker timezone. Default: `Europe/Moscow` | 437 | | `EXPORTER_TRACKER__SEARCH__QUERY` | Custom query for search issues. This variable has the highest priority and overrides other search parameters. Default is empty | 438 | | `EXPORTER_TRACKER__SEARCH__RANGE` | Search issues window. Has no effect in stateful mode. Default: `2h` | 439 | | `EXPORTER_TRACKER__SEARCH__QUEUES` | Include or exclude queues in search. Example: `DEV,SRE,!TEST,!TRASH` Default is empty (i.e. all queues) | 440 | | `EXPORTER_TRACKER__SEARCH__PER_PAGE_LIMIT` | Search results per page. Default: `100` | 441 | 442 | ## Clickhouse settings 443 | 444 | | variable | description | 445 | | --------------------------------------------- | ------------------------------------------------------------------ | 446 | | `EXPORTER_CLICKHOUSE__ENABLE_UPLOAD` | Enable upload data to Clickhouse. Default is `True` | 447 | | `EXPORTER_CLICKHOUSE__HOST` | Clickhouse host. Default: `localhost` | 448 | | `EXPORTER_CLICKHOUSE__PROTO` | Clickhouse protocol: http or https. Default: `http` | 449 | | `EXPORTER_CLICKHOUSE__PORT` | Clickhouse HTTP(S) port. Default: `8123` | 450 | | `EXPORTER_CLICKHOUSE__CACERT_PATH` | Path to CA cert. Only for HTTPS proto. Default is empty | 451 | | `EXPORTER_CLICKHOUSE__SERVERLESS_PROXY_ID` | Yandex Cloud Functions proxy ID. Default is empty | 452 | | `EXPORTER_CLICKHOUSE__USERNAME` | Clickhouse username. Default: `default` | 453 | | `EXPORTER_CLICKHOUSE__PASSWORD` | Clickhouse password. Can be empty. Default is empty | 454 | | `EXPORTER_CLICKHOUSE__DATABASE` | Clickhouse database. Default: `agile` | 455 | | `EXPORTER_CLICKHOUSE__ISSUES_TABLE` | Clickhouse table for issues metadata. Default: `issues` | 456 | | `EXPORTER_CLICKHOUSE__ISSUE_METRICS_TABLE` | Clickhouse table for issue metrics. Default: `issue_metrics` | 457 | | `EXPORTER_CLICKHOUSE__ISSUES_CHANGELOG_TABLE` | Clickhouse table for issues changelog. Default: `issues_changelog` | 458 | | `EXPORTER_CLICKHOUSE__AUTO_DEDUPLICATE` | Execute `OPTIMIZE` after each `INSERT`. Default is `True` | 459 | | `EXPORTER_CLICKHOUSE__BACKOFF_BASE_DELAY` | Base delay for backoff strategy. Default: `0.5` (sec) | 460 | | `EXPORTER_CLICKHOUSE__BACKOFF_EXPO_FACTOR` | Exponential factor for multiply every try. Default: `2.5` (sec) | 461 | | `EXPORTER_CLICKHOUSE__BACKOFF_MAX_TRIES` | Max tries for backoff strategy. Default: `3` | 462 | | `EXPORTER_CLICKHOUSE__BACKOFF_JITTER` | Enable jitter (randomize delay) for retries. Default: `True` | 463 | 464 | ## State settings 465 | 466 | | variable | description | 467 | | ---------------------------------------- | ----------------------------------------------------------------------------------------- | 468 | | `EXPORTER_STATE__STORAGE` | Storage type for StateKeeper. Can be: `jsonfile`, `redis`, `custom`. Default: `jsonfile` | 469 | | `EXPORTER_STATE__REDIS_DSN` | Connection string for Redis state storage when storage type is `redis`. Default is empty. | 470 | | `EXPORTER_STATE__JSONFILE_STRATEGY` | File store strategy for `jsonfile` storage type. Can be `s3` or `local`. Default: `local` | 471 | | `EXPORTER_STATE__JSONFILE_PATH` | Path to JSON state file. Default: `./state.json` | 472 | | `EXPORTER_STATE__JSONFILE_S3_BUCKET` | Bucket for `s3` strategy. Default is empty | 473 | | `EXPORTER_STATE__JSONFILE_S3_REGION` | Region for `s3` strategy. Default is `us-east-1` | 474 | | `EXPORTER_STATE__JSONFILE_S3_ENDPOINT` | Endpoint URL for `s3` strategy. Default is empty | 475 | | `EXPORTER_STATE__JSONFILE_S3_ACCESS_KEY` | AWS access key id for `s3` strategy. Default is empty | 476 | | `EXPORTER_STATE__JSONFILE_S3_SECRET_KEY` | AWS secret key for `s3` strategy. Default is empty | 477 | | `EXPORTER_STATE__CUSTOM_STORAGE_PARAMS` | Settings for custom storage params as `dict`. Default: `{}` | 478 | 479 | ## Observability settings 480 | 481 | | variable | description | 482 | | ------------------------------------------ | ---------------------------------------------------------------------------- | 483 | | `EXPORTER_MONITORING__METRICS_ENABLED` | Enable send statsd tagged metrics. Default is `False` | 484 | | `EXPORTER_MONITORING__METRICS_HOST` | DogStatsD / statsd host. Default: `localhost` | 485 | | `EXPORTER_MONITORING__METRICS_PORT` | DogStatsD / statsd port. Default: `8125` | 486 | | `EXPORTER_MONITORING__METRICS_BASE_PREFIX` | Prefix for metrics name. Default: `tracker_exporter` | 487 | | `EXPORTER_MONITORING__METRICS_BASE_LABELS` | List of tags for metrics. Example: `["project:internal",]`. Default is empty | 488 | | `EXPORTER_MONITORING__SENTRY_ENABLED` | Enable send exception stacktrace to Sentry. Default is `False` | 489 | | `EXPORTER_MONITORING__SENTRY_DSN` | Sentry DSN. Default is empty | 490 | 491 | # Monitoring 492 | 493 | Based on DogStatsD tagged format. VictoriaMetrics compatible. 494 | 495 | | Metric name | Metric type | Labels | Description | 496 | | ------------------------------------------------------ | ----------- | --------------- | ---------------------------------------------------------- | 497 | | `tracker_exporter_issue_transform_time_seconds` | time | - | Duration of transform per task (data packing to the model) | 498 | | `tracker_exporter_issues_total_processed_count` | count | - | Total issues processed | 499 | | `tracker_exporter_issues_search_time_seconds` | time | - | Yandex.Tracker search duration time in seconds | 500 | | `tracker_exporter_issues_without_metrics` | count | - | Issues with empty metrics (no changelog) | 501 | | `tracker_exporter_issue_prefetch_seconds` | time | - | Pre-transform data duration in seconds | 502 | | `tracker_exporter_comments_fetch_seconds` | time | - | Comments fetch duration in seconds | 503 | | `tracker_exporter_etl_duration_seconds` | time | - | ETL full pipeline duration in seconds | 504 | | `tracker_exporter_etl_upload_status` | gauge | - | Last upload status, 1 - success, 2 - fail | 505 | | `tracker_exporter_export_and_transform_time_seconds` | time | - | Overall export and transform duration in seconds | 506 | | `tracker_exporter_upload_to_storage_time_seconds` | time | - | Overall insert duration time in seconds | 507 | | `tracker_exporter_last_update_timestamp` | gauge | - | Last data update timestamp | 508 | | `tracker_exporter_clickhouse_insert_time_seconds` | time | database, table | Insert per table duration time in seconds | 509 | | `tracker_exporter_clickhouse_inserted_rows` | count | database, table | Inserted rows per table | 510 | | `tracker_exporter_clickhouse_deduplicate_time_seconds` | time | database, table | Optimize execute time duration in seconds | 511 | 512 | ### Metrics on dashboard demo 513 | 514 | ![](/docs/images/etl_metrics.jpeg) 515 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | 2 | # Project roadmap 3 | 4 | - [x] refactoring code 5 | - [x] CI 6 | - [x] export full issue changelog 7 | - [x] docker image 8 | - [x] update serverless instruction 9 | - [x] stateful mode (local json) 10 | - [x] stateful mode (s3 json) 11 | - [x] stateful mode (redis) 12 | - [ ] helm chart 13 | - [ ] replace asserts in code to if/raise 14 | - [ ] pytest 15 | -------------------------------------------------------------------------------- /data-migrate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -Eeuo pipefail 4 | 5 | SYSTEM=$(uname -s) 6 | ARCH=$(uname -p) 7 | 8 | CLICKHOUSE_HOST=localhost 9 | CLICKHOUSE_TCP_PORT=9000 10 | CLICKHOUSE_HTTP_PORT=8123 11 | CLICKHOUSE_USER=default 12 | 13 | GO_MIGRATE_VERSION="v4.16.2" 14 | MIGRATION_SOURCE_PATH="file://${PWD}/migrations/clickhouse" 15 | MIGRATION_HISTORY_TABLE="ci_gomigrate_migrations" 16 | MIGRATION_DATABASE="agile" 17 | 18 | MIGRATION_CLICKHOUSE_DSN="clickhouse://${CLICKHOUSE_HOST}:${CLICKHOUSE_TCP_PORT}?username=${CLICKHOUSE_USER}&database=${MIGRATION_DATABASE}&x-multi-statement=true&x-migrations-table=${MIGRATION_HISTORY_TABLE}" 19 | 20 | install_go_migrate() { 21 | echo "System is ${SYSTEM} (${ARCH})" 22 | if command -v ./migrate >/dev/null 2>&1; then 23 | echo "Tool for migration already installed, skipping installation" 24 | else 25 | echo "Installing go migrate tool..." 26 | if [ "${SYSTEM}" = "Darwin" ]; then 27 | if [ "${ARCH}" = "arm" ]; then 28 | wget https://github.com/golang-migrate/migrate/releases/download/${GO_MIGRATE_VERSION}/migrate.darwin-amd64.tar.gz -O migrate.tar.gz 29 | else 30 | wget https://github.com/golang-migrate/migrate/releases/download/${GO_MIGRATE_VERSION}/migrate.darwin-arm64.tar.gz -O migrate.tar.gz 31 | fi 32 | tar xvf migrate.tar.gz migrate 33 | elif [ "${SYSTEM}" = "Linux" ]; then 34 | wget https://github.com/golang-migrate/migrate/releases/download/${GO_MIGRATE_VERSION}/migrate.linux-amd64.tar.gz -O migrate.tar.gz 35 | tar -xvf migrate.tar.gz migrate 36 | fi 37 | chmod +x migrate 38 | rm ./migrate.tar.gz 39 | fi 40 | } 41 | 42 | 43 | prepare_database() { 44 | echo "CREATE DATABASE IF NOT EXISTS ${MIGRATION_DATABASE}" | \ 45 | curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @- 46 | 47 | } 48 | 49 | 50 | prepare_migration() { 51 | echo "CREATE DATABASE IF NOT EXISTS ${MIGRATION_HISTORY_TABLE}" | \ 52 | curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @- 53 | 54 | } 55 | 56 | 57 | run_migration() { 58 | ./migrate -verbose \ 59 | -source $MIGRATION_SOURCE_PATH \ 60 | -database $MIGRATION_CLICKHOUSE_DSN \ 61 | up 62 | 63 | } 64 | 65 | recreate_views() { 66 | echo "DROP VIEW IF EXISTS ${MIGRATION_DATABASE}.issues_view" | \ 67 | curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @- 68 | echo "DROP VIEW IF EXISTS ${MIGRATION_DATABASE}.issue_metrics_view" | \ 69 | curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @- 70 | echo "DROP VIEW IF EXISTS ${MIGRATION_DATABASE}.issues_changelog_view" | \ 71 | curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @- 72 | 73 | 74 | echo "CREATE VIEW IF NOT EXISTS ${MIGRATION_DATABASE}.issues_view AS SELECT * FROM ${MIGRATION_DATABASE}.issues FINAL" | \ 75 | curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @- 76 | echo "CREATE VIEW IF NOT EXISTS ${MIGRATION_DATABASE}.issue_metrics_view AS SELECT * FROM ${MIGRATION_DATABASE}.issue_metrics FINAL" | \ 77 | curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @- 78 | echo "CREATE VIEW IF NOT EXISTS ${MIGRATION_DATABASE}.issues_changelog_view AS SELECT * FROM ${MIGRATION_DATABASE}.issues_changelog FINAL" | \ 79 | curl "http://${CLICKHOUSE_HOST}:${CLICKHOUSE_HTTP_PORT}/?user=${CLICKHOUSE_USER}" --data-binary @- 80 | 81 | } 82 | 83 | 84 | install_go_migrate 85 | prepare_database 86 | prepare_migration 87 | run_migration 88 | recreate_views -------------------------------------------------------------------------------- /docker-compose.dev.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | 3 | services: 4 | tracker-exporter: 5 | container_name: tracker-exporter 6 | hostname: tracker-exporter 7 | restart: unless-stopped 8 | build: . 9 | volumes: 10 | - ./.env:/opt/exporter/.env:ro 11 | command: | 12 | tracker-exporter --env-file /opt/exporter/.env 13 | 14 | clickhouse: 15 | image: clickhouse/clickhouse-server:23.3 16 | container_name: clickhouse 17 | hostname: clickhouse 18 | restart: unless-stopped 19 | volumes: 20 | - "./clickhouse:/var/lib/clickhouse" 21 | ports: 22 | - "9000:9000" 23 | - "8123:8123" 24 | 25 | clickhouse-migrator: 26 | image: busybox 27 | container_name: migrator 28 | restart: no 29 | volumes: 30 | - ./data-migrate.sh:/opt/data-migrate.sh 31 | command: | 32 | /opt/data-migrate.sh 33 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | 3 | services: 4 | clickhouse: 5 | image: clickhouse/clickhouse-server:23.8 6 | container_name: clickhouse 7 | hostname: clickhouse 8 | restart: unless-stopped 9 | volumes: 10 | - "./clickhouse:/var/lib/clickhouse" 11 | ports: 12 | - "9000:9000" 13 | - "8123:8123" 14 | 15 | tracker-exporter: 16 | build: . 17 | container_name: tracker-exporter 18 | hostname: tracker-exporter 19 | restart: unless-stopped 20 | environment: 21 | EXPORTER_CHANGELOG_EXPORT_ENABLED: "false" 22 | EXPORTER_TRACKER__TOKEN: ${TRACKER_TOKEN} 23 | EXPORTER_TRACKER__CLOUD_ORG_ID: ${TRACKER_ORG_ID} 24 | EXPORTER_CLICKHOUSE__HOST: clickhouse 25 | EXPORTER_CLICKHOUSE__PORT: 8123 26 | EXPORTER_CLICKHOUSE__ENABLE_UPLOAD: "true" 27 | EXPORTER_STATEFUL: "true" 28 | EXPORTER_STATEFUL_INITIAL_RANGE: "1d" 29 | EXPORTER_STATE__STORAGE: jsonfile 30 | EXPORTER_STATE__JSONFILE_STRATEGY: local 31 | EXPORTER_STATE__JSONFILE_PATH: /opt/exporter/state.json 32 | volumes: 33 | - "./exporter:/opt/exporter:rw" 34 | depends_on: 35 | - clickhouse 36 | -------------------------------------------------------------------------------- /docs/diagrams/agile_metrics.drawio: -------------------------------------------------------------------------------- 1 | 7Rtrd6LI8tfk4+7h6ehHIuiQY2OMmAx+uQeBIIjiRQyPX3+ruhvFaGZn7maSyW4ykwNUF931ol7duZL763KYudslSf0guZIEv7yS9StJEhVJusL/gl8xyJcOB4RZ5HOkI2Aa1QEHChy6j/xgd4KYp2mSR9tToJduNoGXn8DcLEuLU7THNDlddeuGwRlg6rnJOfQh8vMlg3ZV4Qj/GkThsllZFPjI2m2QOWC3dP20aIFk40ruZ2mas7t12Q8SFF4jF/be4IXRA2FZsMl/5AV9qlXjtBv3ojDNUtPxgsHDH6LMpnlykz3nmFObV40IsnS/8QOcRbySr4tllAfTrevhaAFKB9gyXyd8+MClAA9h4u52/H63CnJvyR8e003OdS124NnNvOYRh93FLk32eaAdwfhSlCT9NEkzeN6kmwAnzbN0FTTAK0lWpa70+MhXGLjrKEG7uw8y3924AD4XGpfjU5DlQdkCcSEOg3Qd5FkFKAeb5grlFq00Gi+O9iF3OWzZsg2pQXS5TYaHuY9qgxuuuZ/R4l8rEfSyxdtoTQ3+GvmNwMxH7iJIbtNdlEfpBsYXaZ6n6xaClkQhDuQpapq+ru227FOjumoeHqMSjeSar6D7bg4S19ijNNg9hVfSdQmGIvVvv1rSvLpWFg/l3quFyP16J3h6+jSSfdmvVJlU6pO39p5IrBWk36v9tReZX/3t/Otdejs1axKZoTu8386lpdA8++sk8YWbp0AXItLXClMnFf2NrtfuQ7m7nd7sF5KamLGyNuXlclyXhfPtLjWH8+1iWOTe5n43t4Vo/m2eLNa91bxvho5Ubr2+WPkPZQLvJ/76Hua4W8GcK3i2iO2ExCCV1VcUUhulqWshqc3QqpTSqjXJqjSgTSutFcErw4tXtambwigOa2IbFJ/OU9/o9L04VMZT+l7B8aqxTULgCecBmLcntldYsRe28AFuSKZ+7R75NWPLNhWymg/NqAuyv4ksxbF3XW84ENw+wxp9uxEXw1nPXN9L8wf1aT6cRObQ2rnftIsSmYN0bduoR/FKhVUrIjrleBaWQIk6tpMHElsmscl+bK/Uh5VQEgoPgcPlA7HvYMzDMeU+NpSpzucZbC2Yq4S5VITBWD3VE/ewTv8wJjN8pyRCyOdeIp5yimfyOTh8kEZcyz1z46fut7vkVCIjGeyxVppP5+/5B4gBf6onHkIWpTMPoV5wEOqv8g/Sv8w/WM/8g3XZP0j095f6h5D5B7BUUs/wGwZaNLBK8A3svhwbBK8cB/xAjF8IERl9hoLXSR3uie5wmNbgsHt7qcO8zZzsyvyB8mb+ADytWY1iT7R1o+TfYkX6iohejdjm3rI1AXAKGCvpdwqeC3wicGKAN1shp/Cte4gD7xPVMkgJ3lIl9oTiWHVYoocbxZPCQr8zxW9/FVIJwzj6m0ltqKN4pp6M4/x6KLF5PYXgWKRUVjNWr4RJjbSHEmoB3ivH6I1jsrd0pOtAM3jvA0/g54Cn+l4n8YTiwloVp72knh78GX1nRvmQxjrSCRZgmy38o9yor0Rr6CuyNStKRj/6TQdkRyrgDfBmhYVRIEK5eCH6WfiFaIJ+0VDRmkCuMsWplHpsWyD7CaxlCAzHU1oy5D6by5D55bYMj+MHGTZzIM2mOJ621gF5jHVHatFSAS0ClSXwADTIGM3Y+rOaJA7jVU+YDO0Vrl+2ZQiybulYa+iDiOgo3L74uNl6H2WKft8Txvd8jfutxXAmoLOJ2rJRWAf1OaA2SnRNhIjNYVRWwCc81yhrtOGJQKZ0vAB+OeyevRsjHWCnuobrC9yeSqBROMpYQ5sCHpBHg2YGh3G0t1oDGSFtk4Iw+akWegS2NtBN5VdR+QFv1on8ZvuDrmoqA4nHaPq9oc1AfKb2gBlDe5xmKGhvzT2bt+U5bmR3mAjzqfl0GzlxMDS+3DKvC17kFSKmrD7LqL8cIugbRMzVxlGySfc/q951ZC70xKqX6h/yWYAMfKgL+WOa5cs0TDduYhyh18c6CePeEWeUYmikJUwc5HnFSxp3n6entRNIMKu+8ffpg4MPIAv+qJftQb1qqqlWYQQ10GPXCzzvUnW06KqKKhzKIGTpxWKSg3bpPvOC7+QWXFK5m4VB/h2JvmACWZC4efR0SserJ0A/UOX+oxKg1y2QJj1zJdACY1JrvOggAoQZdPUSqScCS0TAVQ2cAtwbLW6s2KEJD7iUFh6hrgnSc5gL71u4MYQqSHDAbbKiCa+JU5MVefa+1sYtmvkegMf5OtktgD+znlVj/WZ4F7Vc1tfrpT8MGc7lpC1eDJPicgpEZQAhR+PlhVnBtbiPaZlCQzmUGyKDGxXQU0FIRzcrw3PJ4FgC+YivYoiYUropvgr4Qpt+X1pu/eEsGtWNLoonT55vbkM01lcpUzryaZHytk734leqvq/TPfpZpz32yk73//GxyrmP/V4f6Jlffy+n2/nAMdR3g+7jRXV2vG6weHwTdUq/lTqVf1kM/RBNhOmlJsLsZ5oI4lkTYfpOTQSI8hMsLkXICGQi0CJYhIImxOISCvUSij8ojkATvIhnhY/BGgCUUoMXkBPFGtACSST2nU5xsPDRZworsDRWAEPxRXSTFm5ExwIVCzfIbIwCC1uRFbNLgsUxFm6cNoU2J2hxa4oNjOgkZHihhE0GKKgUKN54cd7iK3H4mMUbl6gtKN70ELIHvh5mNzZteHB6aIsZCjjCi0vCC/MJLYobXizboeswnFkJMsCGhjDWQQZ0nUZOmHV52J7mcmTZl0WbFFTWlU2L6Fl1oHtVYDF94JcV1axpwIvJmjcuJGqBNsGitbQP/BsFZHGsIYJNijgMx1B0WnZYEFqkAh3w3jMY0z8UzlAEizalm2CbCxsdItcTbwI41JrfOIs67FMesqjzzSBVvpBDyb/IS3fPvPS1662CjX/mrIHD/Nne3Ums41tsF3bdXO6tPZBbkF1w4+vI92k4v7RZeBriX9inQ3Ar6MpyrzcYvNL+nfrlWX9eFcUzpXUuJL7yr0p8e2dKc1yQUfmnnaHysg+tu8GgAz+vpLsvvWe6k7rnH5woKm+ovMYDtLTXTyJvtUz3u+BDa+41vzpZOHWU7/7NNXtyLbXpkEyOgs3uQyvtNT+3jvxMaZJ6oUfwtmq7VIZ0ElTP9kRpnf/u8WTNdRJtgj8a0jRAEf+UqHQaBLgL8WqihqDUBBRz85i5zbRA5rbB+WnLwIKxwxXyvGOg4r93+rDxBwtsmCvKEbeDbyM341PQa+RIHfHUijrnNiQKvXMjktRfZESHSv236DQdmxEvtCZe0OkbdixE3sv5YC2Lhux/Tc/is+//2ff/C2+syKfeWBHfuesvfjn7SIeZ+4hO7iPnYa+ZPCtS51RnnfOi502zsAt+9HfeqvkNAmj3YwbQ83bSZwD9DKCfAfSkKL7Q8n3TAHrePpzut0G2A0fzGUEvR1D1Qp/+V0XQi8ehlHcNoOLVm2yO/+wBs5cPjv1o5HyDQPk9Ij/j5GecPGxn8ximjmI8h73a49li4I2fKSfN2XW2+Y7brvzMOGF4uJVdM5nQK/CxChs8ehYdrwbBc9B03KRrOWyruw9zTSlcxL+bAfrL57TgVu2RFpPTcnOghZ1BpngNLQWjJdyP2RY8wBkP4wMdIT2LzQ4osHPdMAf9251JbYjIAz03PtVUyo+NW8WDBzy/TLf8cWs3cUprFiLNKD/IFczCBj7YNrP/QGI8rz07ymKqqJw+PGNeP+MTz1Ef+aTn9fHIwF3DJzvXz/A4n2bF+KRn/5t1MIep6PXAq1kcZB5rLZmbJZe5/FO5CXr43OVuAHLvY//1LNBdcKUvx77ee570u+gu1TN32d4yE26ztKz+SbnL39KferYLc0l/r5S9wOPxT5LpWOsPu2Xjfw== -------------------------------------------------------------------------------- /docs/images/agile_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/docs/images/agile_metrics.png -------------------------------------------------------------------------------- /docs/images/agile_metrics_cloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/docs/images/agile_metrics_cloud.png -------------------------------------------------------------------------------- /docs/images/datalens_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/docs/images/datalens_example.png -------------------------------------------------------------------------------- /docs/images/etl_metrics.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/docs/images/etl_metrics.jpeg -------------------------------------------------------------------------------- /docs/images/logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/docs/images/logs.png -------------------------------------------------------------------------------- /examples/extended_model/main.py: -------------------------------------------------------------------------------- 1 | from tracker_exporter.models.issue import TrackerIssue 2 | from tracker_exporter.utils.helpers import to_snake_case, validate_resource 3 | from tracker_exporter import configure_sentry, run_etl 4 | 5 | from yandex_tracker_client.collections import Issues 6 | 7 | 8 | class CustomIssueFieldsMixin: 9 | """ 10 | Additional custom fields for Yandex Tracker issue. 11 | Must be created in the Clickhouse issue table. 12 | """ 13 | 14 | def __init__(self, issue: Issues) -> None: 15 | self.foo_custom_field = to_snake_case(validate_resource(issue, "fooCustomField")) 16 | self.bar_custom_field = validate_resource(issue, "barCustomField") 17 | self.baz = True if "baz" in issue.tags else False 18 | 19 | 20 | class ExtendedTrackerIssue(CustomIssueFieldsMixin, TrackerIssue): 21 | """Extended Yandex Tracker issue model with custom fields.""" 22 | 23 | def __init__(self, issue: Issues) -> None: 24 | super().__init__(issue) 25 | 26 | 27 | def main() -> None: 28 | """Entry point.""" 29 | run_etl(ignore_exceptions=False, issue_model=ExtendedTrackerIssue) 30 | 31 | 32 | if __name__ == "__main__": 33 | configure_sentry() 34 | main() 35 | -------------------------------------------------------------------------------- /examples/serverless/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from tracker_exporter import run_etl 3 | 4 | logging.getLogger().setLevel(logging.INFO) 5 | 6 | 7 | def handler(event, context): 8 | try: 9 | run_etl(ignore_exceptions=False) 10 | response = {"statusCode": 200, "message": "success"} 11 | except Exception as exc: 12 | response = {"statusCode": 500, "message": exc} 13 | finally: 14 | return response 15 | -------------------------------------------------------------------------------- /examples/serverless/requirements.txt: -------------------------------------------------------------------------------- 1 | tracker-exporter 2 | -------------------------------------------------------------------------------- /migrations/clickhouse/000001_create_table_issues.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS `issues`; 2 | -------------------------------------------------------------------------------- /migrations/clickhouse/000001_create_table_issues.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS `issues` 2 | ( 3 | `version` DateTime64(3, 'UTC') DEFAULT now() COMMENT 'Row version', 4 | 5 | `queue` LowCardinality(String) COMMENT 'Queue key', 6 | `title` String DEFAULT '' COMMENT 'Issue summary', 7 | `issue_key` String COMMENT 'Unique issue key like TEST-1', 8 | `issue_type` LowCardinality(String) COMMENT 'Issue type', 9 | `priority` LowCardinality(String) COMMENT 'Issue priority', 10 | `status` LowCardinality(String) COMMENT 'Last issue status', 11 | `resolution` LowCardinality(String) DEFAULT '' COMMENT 'Issue resolution', 12 | 13 | `assignee` String DEFAULT '' COMMENT 'Issue assignee', 14 | `author` String DEFAULT '' COMMENT 'Issue creator', 15 | `qa_engineer` String DEFAULT '' COMMENT 'QA engineer who conducted the testing', 16 | 17 | `tags` Array(String) COMMENT 'Issue labels', 18 | `components` Array(String) COMMENT 'Issue components', 19 | `project` LowCardinality(String) DEFAULT '' COMMENT 'Related project', 20 | 21 | `created_at` DateTime64(3, 'UTC') COMMENT 'Issue creation date', 22 | `updated_at` DateTime64(3, 'UTC') COMMENT 'Date of the last update of the issue', 23 | `deadline` Date DEFAULT 0 COMMENT 'Deadline for completing the issue', 24 | `closed_at` DateTime64(3, 'UTC') DEFAULT 0 COMMENT 'Closing date of the issue without resolution, based on custom closing statuses', 25 | `resolved_at` DateTime64(3, 'UTC') DEFAULT 0 COMMENT 'Closing date of the issue with the resolution', 26 | `start_date` Date DEFAULT 0 COMMENT 'Start date (fact, manual field, gantt)', 27 | `end_date` Date DEFAULT 0 COMMENT 'End date (fact, manual field, gantt)', 28 | 29 | `is_subtask` UInt8 DEFAULT 0 COMMENT 'Subtask flag', 30 | `is_closed` UInt8 DEFAULT 0 COMMENT 'Issue completion flag (based on custom closing statuses)', 31 | `is_resolved` UInt8 DEFAULT 0 COMMENT 'Issue completion flag (with resolution)', 32 | 33 | `story_points` Float32 DEFAULT 0.0 COMMENT 'Estimating the cost of the issue', 34 | `sprints` Array(String) COMMENT 'Sprints in which the issue participated', 35 | `parent_issue_key` String DEFAULT '' COMMENT 'The key of the parent issue, like TEST-1', 36 | `epic_issue_key` String DEFAULT '' COMMENT 'Epic key, like GOAL-1', 37 | 38 | `aliases` Array(String) COMMENT 'All previous issue keys', 39 | `was_moved` UInt8 DEFAULT 0 COMMENT 'Has the task been moved from another queue', 40 | `moved_at` DateTime64(3, 'UTC') DEFAULT 0 COMMENT 'The date the queue was changed if the task was moved', 41 | `moved_by` String DEFAULT '' COMMENT 'The employee who moved the task' 42 | ) 43 | ENGINE = ReplacingMergeTree(version) 44 | PARTITION BY toYYYYMM(updated_at) 45 | ORDER BY issue_key 46 | -------------------------------------------------------------------------------- /migrations/clickhouse/000002_create_table_issue_metrics.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS `issue_metrics`; 2 | -------------------------------------------------------------------------------- /migrations/clickhouse/000002_create_table_issue_metrics.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS `issue_metrics` 2 | ( 3 | `version` DateTime64(3, 'UTC') DEFAULT now(), 4 | `last_seen` DateTime64(3, 'UTC') COMMENT 'The date when the issue was last in this status', 5 | 6 | `issue_key` String COMMENT 'Issue key', 7 | `status_name` LowCardinality(String) COMMENT 'Status name', 8 | `status_transitions_count` UInt8 COMMENT 'The number of transitions to this status', 9 | 10 | `duration` UInt32 COMMENT 'Time spent in the status in seconds (for all time)', 11 | `human_readable_duration` String DEFAULT '' COMMENT 'Human - readable format for duration', 12 | `busdays_duration` UInt32 COMMENT 'Time spent in the status in seconds (busdays only)', 13 | `human_readable_busdays_duration` String DEFAULT '' COMMENT 'Human - readable format for busdays_duration' 14 | ) 15 | ENGINE = ReplacingMergeTree(version) 16 | PARTITION BY toYYYYMM(last_seen) 17 | ORDER BY (issue_key, status_name, last_seen) 18 | -------------------------------------------------------------------------------- /migrations/clickhouse/000003_create_table_issues_changelog.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS `issues_changelog`; 2 | -------------------------------------------------------------------------------- /migrations/clickhouse/000003_create_table_issues_changelog.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS `issues_changelog` 2 | ( 3 | `version` DateTime64(3, 'UTC') DEFAULT now(), 4 | `event_time` DateTime64(3, 'UTC') COMMENT 'Changelog event time', 5 | 6 | `issue_key` String COMMENT 'Issue key', 7 | `queue` LowCardinality(String) COMMENT 'Queue', 8 | `event_type` LowCardinality(String) COMMENT 'Event type', 9 | `transport` LowCardinality(String) COMMENT 'Event source, i.e. api, front, etc', 10 | `actor` String DEFAULT '' COMMENT 'Event initiator, i.e. employee name, robot name, etc', 11 | 12 | `changed_field` String COMMENT 'The field that was changed', 13 | `changed_from` String DEFAULT '' COMMENT 'Previous field value', 14 | `changed_to` String COMMENT 'New field value' 15 | ) 16 | ENGINE = ReplacingMergeTree(version) 17 | PARTITION BY toYYYYMM(event_time) 18 | ORDER BY (issue_key, event_time, event_type, changed_field) 19 | -------------------------------------------------------------------------------- /migrations/clickhouse/000004_create_view_issues_view.down.sql: -------------------------------------------------------------------------------- 1 | DROP VIEW IF EXISTS `issues_view`; 2 | DROP VIEW IF EXISTS `issue_metrics_view`; 3 | DROP VIEW IF EXISTS `issues_changelog_view`; 4 | -------------------------------------------------------------------------------- /migrations/clickhouse/000004_create_view_issues_view.up.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW IF NOT EXISTS `issues_view` AS 2 | SELECT * 3 | FROM `issues` 4 | FINAL; 5 | 6 | CREATE VIEW IF NOT EXISTS `issue_metrics_view` AS 7 | SELECT * 8 | FROM `issue_metrics` 9 | FINAL; 10 | 11 | CREATE VIEW IF NOT EXISTS `issues_changelog_view` AS 12 | SELECT * 13 | FROM `issues_changelog` 14 | FINAL; 15 | -------------------------------------------------------------------------------- /migrations/v0.1.x/000001_create_table_issues.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS `issues`; 2 | -------------------------------------------------------------------------------- /migrations/v0.1.x/000001_create_table_issues.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS `issues` 2 | ( 3 | `version` DateTime DEFAULT now(), 4 | 5 | `queue` LowCardinality(String) COMMENT 'Queue key', 6 | `title` String DEFAULT '' COMMENT 'Issue summary', 7 | `issue_key` String COMMENT 'Unique issue key like TEST-1', 8 | `issue_type` LowCardinality(String) COMMENT 'Issue type', 9 | `priority` LowCardinality(String) COMMENT 'Issue priority', 10 | `status` LowCardinality(String) COMMENT 'Last issue status', 11 | `resolution` LowCardinality(String) DEFAULT '' COMMENT 'Issue resolution', 12 | 13 | `assignee` String DEFAULT '' COMMENT 'Issue assignee', 14 | `author` String DEFAULT '' COMMENT 'Issue creator', 15 | `qa_engineer` String DEFAULT '' COMMENT 'QA engineer who conducted the testing', 16 | 17 | `tags` Array(String) COMMENT 'Issue labels', 18 | `components` Array(String) COMMENT 'Issue components', 19 | 20 | `created_at` Date COMMENT 'Issue creation date', 21 | `updated_at` Date COMMENT 'Date of the last update of the issue', 22 | `deadline` Date DEFAULT toDate('1970-01-01') COMMENT 'Deadline for completing the issue', 23 | `closed_at` Date DEFAULT toDate('1970-01-01') COMMENT 'Closing date of the issue without resolution, based on custom closing statuses', 24 | `resolved_at` Date DEFAULT toDate('1970-01-01') COMMENT 'Closing date of the issue with the resolution', 25 | `start_date` Date DEFAULT toDate('1970-01-01') COMMENT 'Start date (fact, manual field, gantt)', 26 | `end_date` Date DEFAULT toDate('1970-01-01') COMMENT 'End date (fact, manual field, gantt)', 27 | 28 | `is_subtask` UInt8 DEFAULT 0 COMMENT 'Subtask flag', 29 | `is_closed` UInt8 DEFAULT 0 COMMENT 'Issue completion flag (based on custom closing statuses)', 30 | `is_resolved` UInt8 DEFAULT 0 COMMENT 'Issue completion flag (with resolution)', 31 | 32 | `story_points` Float32 DEFAULT 0.0 COMMENT 'Estimating the cost of the issue', 33 | `sprints` Array(String) COMMENT 'Sprints in which the issue participated', 34 | `parent_issue_key` String DEFAULT '' COMMENT 'The key of the parent issue, like TEST-1', 35 | `epic_issue_key` String DEFAULT '' COMMENT 'Epic key, like GOAL-1' 36 | ) 37 | ENGINE = ReplacingMergeTree(version) 38 | PARTITION BY toYYYYMM(updated_at) 39 | ORDER BY issue_key 40 | -------------------------------------------------------------------------------- /migrations/v0.1.x/000002_create_table_issue_metrics.down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS `issue_metrics`; 2 | -------------------------------------------------------------------------------- /migrations/v0.1.x/000002_create_table_issue_metrics.up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS `issue_metrics` 2 | ( 3 | `version` DateTime DEFAULT now(), 4 | `last_seen` DateTime COMMENT 'The date when the issue was last in this status', 5 | 6 | `issue_key` String COMMENT 'Issue key', 7 | `status_name` LowCardinality(String) COMMENT 'Status name', 8 | `status_transitions_count` UInt8 COMMENT 'The number of transitions to this status', 9 | 10 | `duration` UInt32 COMMENT 'Time spent in the status in seconds (for all time)', 11 | `human_readable_duration` String DEFAULT '' COMMENT 'Human - readable format for duration', 12 | `busdays_duration` UInt32 COMMENT 'Time spent in the status in seconds (busdays only)', 13 | `human_readable_busdays_duration` String DEFAULT '' COMMENT 'Human - readable format for busdays_duration' 14 | ) 15 | ENGINE = ReplacingMergeTree(version) 16 | PARTITION BY toYYYYMM(last_seen) 17 | ORDER BY (issue_key, status_name, last_seen) 18 | -------------------------------------------------------------------------------- /migrations/v0.1.x/000003_create_view_issues_view.down.sql: -------------------------------------------------------------------------------- 1 | DROP VIEW IF EXISTS `issues_view`; -------------------------------------------------------------------------------- /migrations/v0.1.x/000003_create_view_issues_view.up.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW IF NOT EXISTS `issues_view` AS 2 | SELECT * 3 | FROM `issues` 4 | FINAL 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 119 3 | target-version = ['py310'] 4 | include = '\.pyi?$' 5 | 6 | 7 | [tool.pytest.ini_options] 8 | pythonpath = [ 9 | ".", "tracker_exporter", 10 | ] -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | wheel 2 | twine 3 | pytest 4 | pytest-cov 5 | bandit 6 | settings_doc 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | yandex_tracker_client==2.* 2 | boto3==1.34.* 3 | redis==5.0.* 4 | datadog==0.47.* 5 | APScheduler==3.10.* 6 | requests==2.31.* 7 | numpy==1.26.0 8 | pandas==2.1.1 9 | businesstimedelta==1.0.1 10 | holidays==0.34 11 | sentry-sdk==1.32.* 12 | python-dotenv 13 | pydantic==2.4.* 14 | pydantic-settings==2.0.* 15 | psutil==5.9.* 16 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = tracker-exporter 3 | description-file = README 4 | 5 | [flake8] 6 | extend-ignore = E203 7 | ignore = 8 | D203, 9 | W503, 10 | E722, 11 | W605, 12 | E402 13 | exclude = 14 | ansible, 15 | scripts, 16 | docs, 17 | migrations, 18 | .git, 19 | .env, 20 | build, 21 | dist, 22 | venv, 23 | .eggs, 24 | tests, 25 | scripts 26 | setup.py, 27 | .example, 28 | .yaml, 29 | .vscode 30 | max-complexity = 15 31 | max-line-length = 120 32 | 33 | [pylint.message-control] 34 | disable = 35 | W0511, 36 | C0114, 37 | C0115, 38 | C0116, 39 | W1203, 40 | W0703, 41 | R0903, 42 | C0116, 43 | R0913, 44 | R0902, 45 | R1719 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from os import path, environ 4 | from setuptools import find_packages, setup 5 | 6 | 7 | def readme(): 8 | with open("README.md", "r") as fh: 9 | long_description = fh.read() 10 | 11 | return long_description 12 | 13 | cwd = path.abspath(path.dirname(__file__)) 14 | 15 | 16 | def metadata(): 17 | meta = {} 18 | with open(path.join(cwd, "tracker_exporter", "_meta.py"), "r") as fh: 19 | exec(fh.read(), meta) # nosec 20 | return meta 21 | 22 | 23 | def requirements(): 24 | requirements_list = [] 25 | 26 | with open("requirements.txt") as requirements: 27 | for install in requirements: 28 | requirements_list.append(install.strip()) 29 | 30 | return requirements_list 31 | 32 | metadata = metadata() 33 | readme = readme() 34 | packages = find_packages() 35 | requirements = requirements() 36 | 37 | if environ.get("PYPI_FROM_GITHUB", 0) == 1: 38 | version = "{{PKG_VERSION}}" 39 | else: 40 | version = metadata.get("version") 41 | 42 | 43 | def main(): 44 | setup( 45 | name="tracker-exporter", 46 | version=version, 47 | author=metadata.get("author"), 48 | author_email=metadata.get("author_email"), 49 | license=metadata.get("license"), 50 | description=metadata.get("description"), 51 | long_description=readme, 52 | long_description_content_type="text/markdown", 53 | url=metadata.get("url"), 54 | download_url=metadata.get("download_url"), 55 | keywords=["yandex tracker exporter", "yandex", "tracker", "etl", "agile", "cycle time"], 56 | platforms=["osx", "linux"], 57 | packages=packages, 58 | classifiers = [ 59 | "Programming Language :: Python :: 3.10", 60 | ], 61 | install_requires=requirements, 62 | include_package_data=True, 63 | python_requires=">=3.10", 64 | entry_points={ 65 | "console_scripts": [ 66 | "tracker-exporter=tracker_exporter.main:main" 67 | ] 68 | }, 69 | zip_safe=False 70 | ) 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | 4 | from tracker_exporter.config import Settings 5 | from tracker_exporter.etl import YandexTrackerETL 6 | from tracker_exporter.services.clickhouse import ClickhouseClient 7 | from tracker_exporter.services.tracker import YandexTrackerClient 8 | 9 | # Token & OrgID from Github 10 | os.environ["EXPORTER_TRACKER__SEARCH__QUEUES"] = "OSSPYTEST" 11 | os.environ["EXPORTER_CLICKHOUSE__ENABLE_UPLOAD"] = "false" 12 | 13 | 14 | @pytest.fixture(scope="function") 15 | def etl() -> YandexTrackerETL: 16 | """Returns YandexTrackerETL for tests.""" 17 | return YandexTrackerETL( 18 | tracker_client=YandexTrackerClient(), 19 | clickhouse_client=ClickhouseClient(), 20 | ) 21 | 22 | 23 | @pytest.fixture(scope="function") 24 | def config() -> Settings: 25 | return Settings() 26 | -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class test_valid_config(): 4 | pass 5 | 6 | 7 | 8 | class test_invalid_config(): 9 | pass 10 | -------------------------------------------------------------------------------- /tests/test_etl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import pytest 4 | from tracker_exporter.etl import YandexTrackerETL 5 | 6 | NEW_ISSUE = None 7 | 8 | 9 | @pytest.mark.skip("Later") 10 | def test_prepare_issue(etl: YandexTrackerETL): 11 | global NEW_ISSUE 12 | n = str(time.time()).split(".")[0] 13 | new_issue = etl.tracker.client.issues.create(queue="OSSPYTEST", summary=f"TEST-{n}") 14 | NEW_ISSUE = new_issue.key 15 | # os.environ["EXPORTER_TRACKER__SEARCH_QUERY"] = f"Issue: {new_issue.key}" 16 | # time.sleep(5) 17 | # etl.tracker.client.issues[new_issue.key] # todo status change to In Progress 18 | # time.sleep(5) 19 | 20 | 21 | def test_query_builder(etl: YandexTrackerETL): 22 | pass 23 | 24 | 25 | def test_issue_transform(etl: YandexTrackerETL): 26 | pass 27 | 28 | 29 | def test_export_and_transform(etl: YandexTrackerETL): 30 | pass 31 | 32 | 33 | def test_upload_to_storage(etl: YandexTrackerETL): 34 | pass 35 | 36 | 37 | def test_full_run(etl: YandexTrackerETL): 38 | pass 39 | -------------------------------------------------------------------------------- /tests/test_helpers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tracker_exporter.utils.helpers as helpers 3 | 4 | from tracker_exporter.config import Settings 5 | from datetime import datetime 6 | from contextlib import nullcontext as does_not_raise 7 | 8 | 9 | class StringTestObject: 10 | def __init__(self): 11 | self.name = "stringTestObject" 12 | 13 | 14 | class IntTestObject: 15 | def __init__(self): 16 | self.name = 1 17 | 18 | 19 | @pytest.mark.parametrize( 20 | "end_time, start_time, unit, expected, expectation", 21 | [ 22 | ( 23 | datetime(2023, 1, 1, 10, 1, 0), 24 | datetime(2023, 1, 1, 10, 0, 0), 25 | helpers.TimeDeltaOut.SECONDS, 26 | 60, 27 | does_not_raise() 28 | ), 29 | ( 30 | datetime(2023, 1, 1, 10, 1, 0), 31 | datetime(2023, 1, 1, 10, 0, 0), 32 | helpers.TimeDeltaOut.MINUTES, 33 | 1, 34 | does_not_raise() 35 | ), 36 | ( 37 | "2023-01-01 10:01:00", 38 | "2023-01-01 10:00:00", 39 | helpers.TimeDeltaOut.MINUTES, 40 | 1, 41 | pytest.raises(AssertionError) 42 | ), 43 | ] 44 | ) 45 | def test_get_timedelta(end_time, start_time, unit, expected, expectation): 46 | with expectation: 47 | assert expected == helpers.get_timedelta(end_time, start_time, unit) 48 | 49 | 50 | @pytest.mark.parametrize( 51 | "start_date, end_date, busdays_only, expected", 52 | [ 53 | ( 54 | datetime(2023, 1, 1, 10, 0, 0), 55 | datetime(2023, 1, 1, 10, 30, 0), 56 | True, 57 | 0 58 | ), 59 | ( 60 | datetime(2023, 1, 1, 10, 0, 0), 61 | datetime(2023, 1, 1, 10, 30, 0), 62 | False, 63 | 30 * 60 64 | ), 65 | ( 66 | "2023-01-01 10:00:00", 67 | "2023-01-01 10:30:00", 68 | True, 69 | 0 70 | ), 71 | ( 72 | "2023-01-01 10:00:00", 73 | "2023-01-01 10:30:00", 74 | False, 75 | 30 * 60 76 | ), 77 | ( 78 | "2023-10-16 10:00:00", 79 | "2023-10-16 23:00:00", 80 | True, 81 | 12 * 60 * 60 82 | ), 83 | ] 84 | ) 85 | def test_calculate_time_spent(start_date, end_date, busdays_only, expected): 86 | assert expected == helpers.calculate_time_spent(start_date, end_date, busdays_only) 87 | 88 | 89 | def test_fix_null_dates(config: Settings): 90 | data = {"a": "b"} 91 | for i in range(0, len(config.not_nullable_fields)): 92 | data[config.not_nullable_fields[i]] = None 93 | assert data[config.not_nullable_fields[i]] is None 94 | 95 | cleaned_data = helpers.fix_null_dates(data) 96 | assert data == cleaned_data 97 | 98 | 99 | @pytest.mark.parametrize( 100 | "resource, attribute, low, expected", 101 | [ 102 | ( 103 | StringTestObject(), 104 | "name", 105 | True, 106 | "stringtestobject", 107 | ), 108 | ( 109 | StringTestObject(), 110 | "name", 111 | False, 112 | "stringTestObject", 113 | ), 114 | ( 115 | StringTestObject(), 116 | "age", 117 | False, 118 | None, 119 | ), 120 | ( 121 | IntTestObject(), 122 | "name", 123 | True, 124 | 1, 125 | ), 126 | ( 127 | IntTestObject(), 128 | "age", 129 | False, 130 | None, 131 | ), 132 | ] 133 | ) 134 | def test_validate_resource(resource, attribute, low, expected): 135 | assert expected == helpers.validate_resource(resource, attribute, low) 136 | 137 | 138 | @pytest.mark.parametrize( 139 | "text, expected", 140 | [ 141 | ("русскаястрока", "русскаястрока"), 142 | ("РусскийВерблюд", "русский_верблюд"), 143 | ("русскийВерблюд2", "русский_верблюд_2"), 144 | ("Русские пробелы", "русские_пробелы"), 145 | ("русский-кебаб", "русский_кебаб"), 146 | ("РУССКИЕ_БОЛЬШИЕ", "русские_большие"), 147 | ("русская_змея", "русская_змея"), 148 | ("РусскийДлинныйВерблюдПлюсЧисло1", "русский_длинный_верблюд_плюс_число_1"), 149 | ("singlestring", "singlestring"), 150 | ("camelCase", "camel_case"), 151 | ("longCamelCase", "long_camel_case"), 152 | ("longCamelCaseWithNumber1", "long_camel_case_with_number_1"), 153 | ("PascalCase", "pascal_case"), 154 | ("LongPascalCase", "long_pascal_case"), 155 | ("LongPascalCaseWithNumber1", "long_pascal_case_with_number_1"), 156 | ("snake_case", "snake_case"), 157 | ("kebab-case", "kebab_case"), 158 | ("CONSTANT_CASE", "constant_case"), 159 | ("camelCase-kebab_snakePascalCaseCONSTANT_case", "camel_case_kebab_snake_pascal_case_constant_case"), 160 | ("separated string case", "separated_string_case"), 161 | (None, None), 162 | (" ", ""), 163 | ] 164 | ) 165 | def test_to_snake_case(text, expected): 166 | assert expected == helpers.to_snake_case(text) 167 | 168 | 169 | @pytest.mark.parametrize( 170 | "dtime, date_only, timezone, expected", 171 | [ 172 | ( 173 | "2023-01-01T10:00:00.123+0000", 174 | True, 175 | "UTC", 176 | "2023-01-01" 177 | ), 178 | ( 179 | "2023-01-01T10:00:00.123+0300", 180 | False, 181 | "UTC", 182 | "2023-01-01T07:00:00.123" 183 | ), 184 | ( 185 | "2023-01-01T10:00:00.123+0000", 186 | False, 187 | "Europe/Moscow", 188 | "2023-01-01T13:00:00.123" 189 | ), 190 | ( 191 | None, 192 | False, 193 | "UTC", 194 | None 195 | ), 196 | ] 197 | ) 198 | def test_convert_datetime(dtime, date_only, timezone, expected): 199 | assert expected == helpers.convert_datetime(dtime, date_only=date_only, timezone=timezone) 200 | 201 | 202 | @pytest.mark.skip("Later") 203 | def test_backoff(exceptions, base_delay, expo_factor, max_tries, jitter): 204 | pass 205 | 206 | 207 | @pytest.mark.parametrize( 208 | "seconds, verbosity, expected", 209 | [ 210 | (60, 2, "1m"), 211 | (300, 2, "5m"), 212 | (320, 2, "5m 20s"), 213 | (86700, 2, "1d 5m"), 214 | (3200400, 3, "1mo 1w 1h") 215 | ] 216 | ) 217 | def test_to_human_time(seconds, verbosity, expected): 218 | assert expected == helpers.to_human_time(seconds, verbosity) 219 | 220 | 221 | @pytest.mark.parametrize( 222 | "timestr, expected", 223 | [ 224 | ("1m", 60), 225 | ("5m", 300), 226 | ("5m 20s", 320), 227 | ("1d 5m", 86700), 228 | ("1mo 1w 1h", 3200400), 229 | ] 230 | ) 231 | def test_from_human_time(timestr, expected): 232 | assert expected == helpers.from_human_time(timestr) 233 | 234 | 235 | @pytest.mark.parametrize( 236 | "text, expected", 237 | [ 238 | ("normalized", "normalized"), 239 | ("emoji😎", "emoji"), 240 | ] 241 | ) 242 | def test_string_normalize(text, expected): 243 | assert expected == helpers.string_normalize(text) 244 | 245 | -------------------------------------------------------------------------------- /tests/test_state.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class TestJSONFileStateStorage: 4 | pass 5 | 6 | 7 | class TestRedisStateStorage: 8 | pass 9 | -------------------------------------------------------------------------------- /tracker_exporter/__init__.py: -------------------------------------------------------------------------------- 1 | from tracker_exporter.main import ( 2 | run_etl, 3 | configure_sentry, 4 | configure_state_manager, 5 | ) 6 | from tracker_exporter.etl import YandexTrackerETL 7 | from tracker_exporter.services.clickhouse import ClickhouseClient 8 | from tracker_exporter.services.tracker import YandexTrackerClient 9 | 10 | __all__ = [ 11 | "ClickhouseClient", 12 | "YandexTrackerClient", 13 | "YandexTrackerETL", 14 | "run_etl", 15 | "configure_sentry", 16 | "configure_state_manager", 17 | ] 18 | -------------------------------------------------------------------------------- /tracker_exporter/_meta.py: -------------------------------------------------------------------------------- 1 | version = "2.0.0" 2 | url = "https://github.com/akimrx/yandex-tracker-exporter" 3 | download_url = "https://pypi.org/project/tracker-exporter/" 4 | appname = "yandex_tracker_exporter" 5 | description = "Yandex.Tracker issue metrics exporter" 6 | author = "Akim Faskhutdinov" 7 | author_email = "akimstrong@yandex.ru" 8 | license = "MIT" 9 | -------------------------------------------------------------------------------- /tracker_exporter/_typing.py: -------------------------------------------------------------------------------- 1 | from typing import TypeVar, Union, Sequence 2 | 3 | T = TypeVar("T") 4 | DateTimeISO8601Str = Union[str, int] 5 | DateStr = str 6 | _Sequence = Union[T, Sequence[T]] 7 | -------------------------------------------------------------------------------- /tracker_exporter/config.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | 4 | from functools import lru_cache 5 | from typing import Literal, Optional, Union 6 | from pydantic import validator, root_validator 7 | from pydantic_settings import BaseSettings 8 | 9 | from tracker_exporter.models.base import YandexTrackerLanguages, LogLevels 10 | from tracker_exporter.exceptions import ConfigurationError 11 | from tracker_exporter.services.monitoring import DogStatsdClient 12 | 13 | YANDEX_TRACKER_API_SEARCH_HARD_LIMIT = 10000 14 | YANDEX_TRACKER_HARD_LIMIT_ISSUE_URL = "https://github.com/yandex/yandex_tracker_client/issues/13" 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class MonitoringSettings(BaseSettings): 20 | """Observability settings.""" 21 | 22 | metrics_enabled: Optional[bool] = False 23 | metrics_host: Optional[str] = "localhost" 24 | metrics_port: Optional[int] = 8125 25 | metrics_base_prefix: Optional[str] = "tracker_exporter" 26 | metrics_base_labels: Optional[list[str]] = [] 27 | sentry_enabled: Optional[bool] = False 28 | sentry_dsn: Optional[str] = None 29 | 30 | @validator("sentry_dsn", pre=True, always=True) 31 | def validate_sentry_dsn(cls, value: str | None, values: dict) -> str: 32 | sentry_enabled = values.get("sentry_enabled") 33 | if sentry_enabled and not value: 34 | raise ConfigurationError("Sentry DSN must not be empty when Sentry is enabled") 35 | return value 36 | 37 | class Config: 38 | extra = "ignore" 39 | 40 | 41 | class ClickhouseSettings(BaseSettings): 42 | """Settings for Clickhouse storage.""" 43 | 44 | enable_upload: Optional[bool] = True 45 | host: Optional[str] = "localhost" 46 | proto: Optional[str] = "http" 47 | port: Optional[int] = 8123 48 | cacert_path: Optional[str] = None 49 | serverless_proxy_id: str | None = None 50 | username: Optional[str] = "default" 51 | password: Optional[str] = None 52 | database: Optional[str] = "agile" 53 | issues_table: Optional[str] = "issues" 54 | issue_metrics_table: Optional[str] = "issue_metrics" 55 | issues_changelog_table: Optional[str] = "issues_changelog" 56 | auto_deduplicate: Optional[bool] = True 57 | backoff_base_delay: Optional[Union[int, float]] = 0.5 58 | backoff_expo_factor: Optional[Union[int, float]] = 2.5 59 | backoff_max_tries: Optional[int] = 3 60 | backoff_jitter: Optional[bool] = True 61 | 62 | @validator("serverless_proxy_id", pre=True, always=True) 63 | def validate_serverless_proxy_id(cls, value: str | None, values: dict) -> str: 64 | http = values.get("proto") == "http" 65 | if http and value is not None: 66 | raise ConfigurationError("Clickhouse proto must be HTTPS when serverless used") 67 | return value 68 | 69 | @validator("cacert_path", pre=True, always=True) 70 | def validate_cacert_path(cls, value: str | None, values: dict) -> str: 71 | https = values.get("proto") == "https" 72 | if https and not value: 73 | raise ConfigurationError("CA cert path must not be empty when Clickhouse proto is HTTPS") 74 | return value 75 | 76 | class Config: 77 | extra = "ignore" 78 | 79 | 80 | class IssuesSearchSettings(BaseSettings): 81 | """Settings for search & export.""" 82 | 83 | query: Optional[str] = None 84 | range: Optional[str] = "2h" 85 | queues: Optional[Union[str, list[str]]] = None 86 | per_page_limit: Optional[int] = 100 87 | 88 | @validator("queues", pre=True, always=True) 89 | def validate_queues(cls, value: str) -> list: 90 | if value is None: 91 | return None 92 | 93 | if not isinstance(value, (str, list)): 94 | raise ConfigurationError("Invalid QUEUES. Example: TEST,TRASH. Received: %s", value) 95 | 96 | queues = value.split(",") if isinstance(value, str) else value 97 | return ", ".join([f"{q.upper()}" for q in queues]) 98 | 99 | class Config: 100 | extra = "ignore" 101 | 102 | 103 | class TrackerSettings(BaseSettings): 104 | """Settings for Yandex.Tracker client.""" 105 | 106 | loglevel: Optional[LogLevels] = LogLevels.warning 107 | token: Optional[str] = None 108 | org_id: Optional[str] = None 109 | iam_token: Optional[str] = None 110 | cloud_org_id: Optional[str] = None 111 | timeout: Optional[int] = 10 112 | max_retries: Optional[int] = 10 113 | language: Optional[YandexTrackerLanguages] = YandexTrackerLanguages.en 114 | timezone: Optional[str] = "Europe/Moscow" 115 | search: IssuesSearchSettings = IssuesSearchSettings() 116 | 117 | @root_validator(pre=True) 118 | def validate_tokens_and_orgs(cls, values) -> str: 119 | token = values.get("token") 120 | iam_token = values.get("iam_token") 121 | org_id = values.get("org_id") 122 | cloud_org_id = values.get("cloud_org_id") 123 | 124 | if all((token, iam_token)): 125 | raise ConfigurationError("Two tokens passed. Please use one of: TOKEN or IAM_TOKEN") 126 | elif not any((token, iam_token)): 127 | raise ConfigurationError("Empty tokens. Please use one of: TOKEN or IAM_TOKEN") 128 | 129 | if all((cloud_org_id, org_id)): 130 | raise ConfigurationError("Two orgs id passed. Please use one of: ORG_ID or CLOUD_ORG_ID") 131 | elif not any((cloud_org_id, org_id)): 132 | raise ConfigurationError("Empty orgs id. Please use one of: ORG_ID or CLOUD_ORG_ID") 133 | 134 | return values 135 | 136 | class Config: 137 | extra = "ignore" 138 | 139 | 140 | class StateSettings(BaseSettings): 141 | """Settings for stateful mode.""" 142 | 143 | storage: Optional[Literal["redis", "jsonfile", "custom"]] = "jsonfile" 144 | redis_dsn: Optional[str] = "redis://localhost:6379" 145 | jsonfile_strategy: Optional[Literal["s3", "local"]] = "local" 146 | jsonfile_path: Optional[str] = "state.json" 147 | jsonfile_s3_bucket: Optional[str] = None 148 | jsonfile_s3_region: Optional[str] = "us-east-1" 149 | jsonfile_s3_endpoint: Optional[str] = None 150 | jsonfile_s3_access_key: Optional[str] = None 151 | jsonfile_s3_secret_key: Optional[str] = None 152 | custom_storage_params: Optional[dict] = {} 153 | 154 | @root_validator(pre=True) 155 | def validate_state(cls, values) -> str: 156 | jsonfile_strategy = values.get("jsonfile_strategy") 157 | jsonfile_s3_bucket = values.get("jsonfile_s3_bucket") 158 | jsonfile_s3_endpoint = values.get("jsonfile_s3_endpoint") 159 | jsonfile_s3_access_key = values.get("jsonfile_s3_access_key") 160 | jsonfile_s3_secret_key = values.get("jsonfile_s3_secret_key") 161 | s3_is_configured = all( 162 | ( 163 | jsonfile_s3_bucket, 164 | jsonfile_s3_endpoint, 165 | jsonfile_s3_access_key, 166 | jsonfile_s3_secret_key, 167 | ) 168 | ) 169 | 170 | if jsonfile_strategy == "s3" and not s3_is_configured: 171 | raise ConfigurationError("S3 must be configured for JSONFileStorage with S3 strategy.") 172 | 173 | return values 174 | 175 | class Config: 176 | extra = "ignore" 177 | 178 | 179 | class Settings(BaseSettings): 180 | """Global merged config.""" 181 | 182 | monitoring: MonitoringSettings = MonitoringSettings() 183 | clickhouse: ClickhouseSettings = ClickhouseSettings() 184 | tracker: TrackerSettings = TrackerSettings # TODO (akimrx): research, called class not see TOKEN's 185 | state: StateSettings = StateSettings() 186 | stateful: Optional[bool] = False 187 | stateful_initial_range: Optional[str] = "1w" 188 | changelog_export_enabled: Optional[bool] = False 189 | log_etl_stats: Optional[bool] = True 190 | log_etl_stats_each_n_iter: Optional[int] = 100 191 | 192 | loglevel: Optional[LogLevels] = LogLevels.info 193 | workdays: Optional[list[int]] = [0, 1, 2, 3, 4] 194 | business_hours_start: Optional[datetime.time] = datetime.time(9) 195 | business_hours_end: Optional[datetime.time] = datetime.time(22) 196 | datetime_response_format: Optional[str] = "%Y-%m-%dT%H:%M:%S.%f%z" 197 | datetime_query_format: Optional[str] = "%Y-%m-%d %H:%M:%S" 198 | datetime_clickhouse_format: Optional[str] = "%Y-%m-%dT%H:%M:%S.%f" 199 | 200 | etl_interval_minutes: Optional[int] = 30 201 | closed_issue_statuses: Optional[Union[str, list]] = "closed,rejected,resolved,cancelled,released" 202 | not_nullable_fields: Optional[Union[tuple, list, str]] = ( 203 | "created_at", 204 | "resolved_at", 205 | "closed_at", 206 | "updated_at", 207 | "released_at", 208 | "deadline", 209 | "start_date", 210 | "end_date", 211 | "start_time", 212 | "end_time", 213 | "moved_at", 214 | ) 215 | 216 | @validator("closed_issue_statuses", pre=True, always=True) 217 | def validate_closed_issue_statuses(cls, value: str) -> list: 218 | if not isinstance(value, (str, list)): 219 | raise ConfigurationError( 220 | "Invalid CLOSED_ISSUES_STATUSES. Example: closed,released,cancelled. Received: %s", 221 | value, 222 | ) 223 | 224 | if isinstance(value, str): 225 | return value.split(",") 226 | return value 227 | 228 | @validator("not_nullable_fields", pre=True, always=True) 229 | def validate_not_nullable_fields(cls, value: str) -> list: 230 | if not isinstance(value, (str, list, tuple)): 231 | raise ConfigurationError( 232 | "Invalid NOT_NULLABLE_FIELDS. Example: created_at,deadline,updated_at. Received: %s", 233 | value, 234 | ) 235 | 236 | if isinstance(value, str): 237 | return value.split(",") 238 | return value 239 | 240 | class Config: 241 | env_prefix = "EXPORTER_" 242 | case_sensitive = False 243 | env_nested_delimiter = "__" 244 | env_file = ".env" 245 | extra = "ignore" 246 | 247 | 248 | @lru_cache 249 | def _get_settings(): 250 | cfg = Settings() 251 | return cfg 252 | 253 | 254 | config = _get_settings() 255 | monitoring = DogStatsdClient( 256 | host=config.monitoring.metrics_host, 257 | port=config.monitoring.metrics_port, 258 | base_labels=config.monitoring.metrics_base_labels, 259 | metric_name_prefix=config.monitoring.metrics_base_prefix, 260 | use_ms=True, 261 | enabled=config.monitoring.metrics_enabled, 262 | ) 263 | -------------------------------------------------------------------------------- /tracker_exporter/etl.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | from datetime import datetime, timedelta 4 | from typing import Tuple, List, Optional 5 | from yandex_tracker_client.collections import Issues 6 | from yandex_tracker_client.objects import SeekablePaginatedList 7 | from yandex_tracker_client.exceptions import Forbidden 8 | 9 | from tracker_exporter.config import config, monitoring 10 | from tracker_exporter.models.issue import TrackerIssue 11 | from tracker_exporter.models.base import ClickhousePayload 12 | from tracker_exporter.state.managers import AbstractStateManager 13 | from tracker_exporter.services.tracker import YandexTrackerClient 14 | from tracker_exporter.services.clickhouse import ClickhouseClient 15 | from tracker_exporter.exceptions import ConfigurationError, UploadError, ExportOrTransformError 16 | from tracker_exporter.utils.helpers import ( 17 | fix_null_dates, 18 | from_human_time, 19 | convert_datetime, 20 | log_etl_stats, 21 | ) 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | 26 | class YandexTrackerETL: 27 | """Export, transform, load facade.""" 28 | 29 | def __init__( 30 | self, 31 | *, 32 | tracker_client: YandexTrackerClient, 33 | clickhouse_client: ClickhouseClient, 34 | state_manager: Optional[AbstractStateManager] = None, 35 | issue_model: TrackerIssue = TrackerIssue, 36 | database: str = config.clickhouse.database, 37 | issues_table: str = config.clickhouse.issues_table, 38 | metrics_table: str = config.clickhouse.issue_metrics_table, 39 | changelogs_table: str = config.clickhouse.issues_changelog_table, 40 | upload_to_storage: bool = config.clickhouse.enable_upload, 41 | state_key: str = "tracker_etl_default", 42 | ) -> None: 43 | self.tracker = tracker_client 44 | self.clickhouse = clickhouse_client 45 | self.state = state_manager 46 | self.issue_model = issue_model 47 | self.database = database 48 | self.issues_table = issues_table 49 | self.metrics_table = metrics_table 50 | self.changelogs_table = changelogs_table 51 | self.upload_to_storage = upload_to_storage 52 | self.state_key = state_key 53 | 54 | def _get_possible_new_state(self, issue: TrackerIssue | ClickhousePayload): 55 | try: 56 | last_state = issue.updated_at 57 | except AttributeError: 58 | last_state = issue.issue.updated_at 59 | return convert_datetime( 60 | last_state, 61 | source_dt_format=config.datetime_clickhouse_format, 62 | output_format=config.datetime_query_format, 63 | timezone=config.tracker.timezone, 64 | ) 65 | 66 | def _build_search_query( 67 | self, 68 | stateful: bool = False, 69 | queues: str | None = None, 70 | search_query: str | None = None, 71 | search_range: str | None = None, 72 | ) -> str | dict: 73 | """Prepare search query for Yandex.Tracker.""" 74 | default_order = ["updated"] 75 | sort_by_updated_asc = ' "Sort by": Updated ASC' 76 | 77 | def append_sort_by(query: str, sort_by: str) -> str: 78 | return f"{query} {sort_by}" if "ort by" not in query else query 79 | 80 | def build_stateful_query() -> str: 81 | if self.state is None: 82 | raise ConfigurationError("StateKeeper is not configured for stateful ETL mode.") 83 | queue_query = f"Queue: {queues} and " if queues else "" 84 | if (last_state := self.state.get(self.state_key)) is None: 85 | last_state = ( 86 | datetime.now() - timedelta(seconds=from_human_time(config.stateful_initial_range)) 87 | ).strftime(config.datetime_query_format) 88 | updated_query = f'Updated: >= "{last_state}"' 89 | return f"{queue_query} {updated_query} {sort_by_updated_asc}".strip() 90 | 91 | def build_query_from_filters() -> str: 92 | queue_query = f"Queue: {queues}" if queues else "" 93 | from_ = datetime.now() - timedelta(seconds=from_human_time(search_range)) 94 | updated_query = f'Updated: >= "{from_.strftime(config.datetime_query_format)}"' if search_range else "" 95 | and_ = " and" if all((queues, search_range)) else "" 96 | return f"{queue_query}{and_} {updated_query} {sort_by_updated_asc}".strip() 97 | 98 | params = {"query": None, "filter": {}, "order": default_order} 99 | if search_query: 100 | logger.info("Search query received, ignoring other filter params") 101 | params["query"] = append_sort_by(search_query, sort_by_updated_asc) 102 | elif stateful: 103 | params["query"] = build_stateful_query() 104 | elif queues or search_range: 105 | params["query"] = build_query_from_filters() 106 | else: 107 | raise ConfigurationError( 108 | "Pass one of param: search_query, queues, search_range. Or run ETL in stateful mode." 109 | ) 110 | logger.debug(f"Builded search query: {params}") 111 | return params 112 | 113 | @monitoring.send_time_metric("issue_transform_time_seconds") 114 | def _transform(self, issue: Issues) -> ClickhousePayload: 115 | """Transform issue to storage-compatible payload format.""" 116 | _issue = self.issue_model(issue) 117 | changelog = _issue._changelog_events 118 | metrics = _issue.metrics() 119 | 120 | return ClickhousePayload( 121 | issue=fix_null_dates(_issue.to_dict()), 122 | changelog=[c.model_dump() for c in changelog] if changelog else [], 123 | metrics=[m.to_dict() for m in metrics] if metrics else [], 124 | ) 125 | 126 | @monitoring.send_time_metric("export_and_transform_time_seconds") 127 | def _export_and_transform( 128 | self, 129 | query: str | None = None, 130 | filter: dict | list | None = None, 131 | order: dict | list | None = None, 132 | limit: int = 100, 133 | ) -> Tuple[List[dict], List[dict], List[dict], str | None]: 134 | """Collects and transforms metrics for found tasks.""" 135 | issues = [] 136 | metrics = [] 137 | changelog_events = [] 138 | issues_without_metrics = 0 139 | possible_new_state = None 140 | logger.info("Searching, exporting and transform issues...") 141 | 142 | found_issues = self.tracker.search_issues(query=query, filter=filter, order=order, limit=limit) 143 | if len(found_issues) == 0: 144 | logger.info("Nothing to export. Skipping ETL") 145 | return issues, changelog_events, metrics, possible_new_state 146 | 147 | if isinstance(found_issues, SeekablePaginatedList): 148 | pagination = True 149 | logger.info("Paginated list received, possible new state will be calculated later") 150 | else: 151 | pagination = False 152 | possible_new_state = self._get_possible_new_state(self.issue_model(found_issues[-1])) 153 | 154 | et_start_time = time.time() 155 | for i, tracker_issue in enumerate(found_issues): 156 | if config.log_etl_stats: 157 | if i == 0: 158 | pass 159 | elif i % config.log_etl_stats_each_n_iter == 0: 160 | elapsed_time = time.time() - et_start_time 161 | log_etl_stats(iteration=i, remaining=len(found_issues), elapsed=elapsed_time) 162 | 163 | try: 164 | issue, changelog, issue_metrics = self._transform(tracker_issue).model_dump().values() 165 | 166 | if pagination and i == len(found_issues) - 1: 167 | logger.info("Trying to get new state from last iteration") 168 | possible_new_state = self._get_possible_new_state(self.issue_model(tracker_issue)) 169 | 170 | issues.append(issue) 171 | changelog_events.extend(changelog) 172 | 173 | if not issue_metrics: 174 | logger.debug(f"Ignore {tracker_issue.key} because metrics is empty") 175 | issues_without_metrics += 1 176 | else: 177 | metrics.extend(issue_metrics) 178 | 179 | monitoring.send_count_metric("issues_total_processed_count", 1) 180 | except Forbidden as forbidden: 181 | logger.warning(f"Can't read {tracker_issue.key}, permission denied. Details: {forbidden}") 182 | except Exception as exc: 183 | logger.exception(f"Issue {tracker_issue.key} can't be transformed, details: {exc}") 184 | 185 | monitoring.send_gauge_metric("issues_without_metrics", value=issues_without_metrics) 186 | logger.info( 187 | f"Total issues: {len(issues)}, total metrics: {len(metrics)}, " 188 | f"total changelog events: {len(changelog_events)}, " 189 | f"ignored issues with empty metrics: {issues_without_metrics}" 190 | ) 191 | return issues, changelog_events, metrics, possible_new_state 192 | 193 | @monitoring.send_time_metric("upload_to_storage_time_seconds") 194 | def _load_to_storage(self, database: str, table: str, payload: list, deduplicate: bool = True) -> dict: 195 | """Load transformed payload to storage.""" 196 | logger.info(f"Inserting batch ({len(payload)}) to {database}.{table}...") 197 | self.clickhouse.insert_batch(database, table, payload) 198 | if deduplicate: 199 | logger.info(f"Optimizing {database}.{table} for deduplication...") 200 | self.clickhouse.deduplicate(database, table) 201 | 202 | @monitoring.send_time_metric("etl_duration_seconds") 203 | def run( 204 | self, 205 | *, 206 | stateful: bool = False, 207 | queues: str | None = None, 208 | search_query: str | None = None, 209 | search_range: str | None = None, 210 | limit: int = 100, 211 | ignore_exceptions: bool = True, 212 | auto_deduplicate: bool = True, 213 | ) -> None: 214 | """Runs main ETL process.""" 215 | query = self._build_search_query(stateful, queues, search_query, search_range) 216 | try: 217 | issues, changelogs, metrics, possible_new_state = self._export_and_transform(**query, limit=limit) 218 | if stateful and possible_new_state is not None: 219 | logger.info(f"Stateful mode enabled, fetching possible new state: {possible_new_state}") 220 | last_saved_state = self.state.get(self.state_key) 221 | if last_saved_state == possible_new_state and len(issues) <= 1 and len(metrics) <= 1: 222 | logger.info("Data already is up-to-date, skipping upload stage") 223 | return 224 | except Exception as exc: 225 | logger.error(f"An error occured in ETL while exporting and transform: {exc}") 226 | if not ignore_exceptions: 227 | raise ExportOrTransformError(str(exc)) 228 | 229 | if self.upload_to_storage and (issues or metrics or changelogs): 230 | try: 231 | if issues: 232 | self._load_to_storage(self.database, self.issues_table, issues, deduplicate=auto_deduplicate) 233 | if metrics: 234 | self._load_to_storage(self.database, self.metrics_table, metrics, deduplicate=auto_deduplicate) 235 | if changelogs: 236 | self._load_to_storage( 237 | self.database, 238 | self.changelogs_table, 239 | changelogs, 240 | deduplicate=auto_deduplicate, 241 | ) 242 | success = True 243 | except Exception as exc: 244 | logger.error(f"An exception occured in ETL while uploading: {exc}") 245 | success = False 246 | if not ignore_exceptions: 247 | raise UploadError(str(exc)) 248 | else: 249 | if all((stateful, self.state, possible_new_state)): 250 | logger.info(f"Saving last ETL timestamp {possible_new_state}") 251 | self.state.set(self.state_key, possible_new_state) 252 | else: 253 | logger.info( 254 | "The state snapshot will not be saved. Not all conditions are met " 255 | f"{stateful=} {self.state=} {possible_new_state=}" 256 | ) 257 | monitoring.send_gauge_metric("last_update_timestamp", value=int(time.time())) 258 | finally: 259 | monitoring.send_gauge_metric("etl_upload_status", value=1 if success else 2) 260 | else: 261 | logger.info("The state snapshot will not be saved because the upload to the storage is disabled.") 262 | print(issues if issues else "Empty issues") 263 | print(metrics if metrics else "Empty metrics") 264 | print(changelogs if changelogs else "Empty changelogs") 265 | -------------------------------------------------------------------------------- /tracker_exporter/exceptions.py: -------------------------------------------------------------------------------- 1 | class TrackerExporterError(Exception): 2 | pass 3 | 4 | 5 | class ClickhouseError(TrackerExporterError): 6 | pass 7 | 8 | 9 | class TrackerError(TrackerExporterError): 10 | pass 11 | 12 | 13 | class ExportOrTransformError(TrackerExporterError): 14 | pass 15 | 16 | 17 | class UploadError(TrackerExporterError): 18 | pass 19 | 20 | 21 | class ConfigurationError(Exception): 22 | pass 23 | 24 | 25 | class JsonFileNotFound(Exception): 26 | pass 27 | 28 | 29 | class InvalidJsonFormat(Exception): 30 | pass 31 | 32 | 33 | class SerializerError(Exception): 34 | pass 35 | -------------------------------------------------------------------------------- /tracker_exporter/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import signal 6 | import logging 7 | import warnings 8 | import argparse 9 | 10 | from datetime import datetime, timedelta 11 | from dotenv import load_dotenv, find_dotenv 12 | 13 | import sentry_sdk 14 | from apscheduler.schedulers.background import BackgroundScheduler 15 | 16 | parser = argparse.ArgumentParser("tracker-exporter") 17 | parser.add_argument( 18 | "-e", 19 | "--env-file", 20 | metavar="file", 21 | dest="env_file", 22 | type=str, 23 | required=False, 24 | help="Path to .env file", 25 | ) 26 | parser.add_argument("--run-once", dest="run_once", action="store_true", help="Run ETL once.") 27 | args, _ = parser.parse_known_args() 28 | warnings.filterwarnings("ignore") 29 | 30 | if args.env_file: 31 | load_dotenv(args.env_file) 32 | else: 33 | load_dotenv(find_dotenv()) 34 | 35 | # pylint: disable=C0413 36 | from tracker_exporter.services.monitoring import sentry_events_filter 37 | from tracker_exporter.state.managers import AbstractStateManager 38 | from tracker_exporter.state.factory import StateManagerFactory, IObjectStorageProps 39 | from tracker_exporter.models.issue import TrackerIssue 40 | from tracker_exporter.etl import YandexTrackerETL 41 | from tracker_exporter.services.tracker import YandexTrackerClient 42 | from tracker_exporter.services.clickhouse import ClickhouseClient 43 | from tracker_exporter._meta import appname, version 44 | from tracker_exporter.config import config 45 | 46 | logging.basicConfig( 47 | level=config.loglevel.upper(), 48 | datefmt="%Y-%m-%d %H:%M:%S", 49 | format="%(asctime)s.%(msecs)03d [%(levelname)s] [%(name)s.%(funcName)s] %(message)s", 50 | ) 51 | logging.getLogger("yandex_tracker_client").setLevel(config.tracker.loglevel.upper()) 52 | logger = logging.getLogger(__name__) 53 | logger.debug(f"Environment: {os.environ.items()}") 54 | logger.debug(f"Configuration dump: {config.model_dump()}") 55 | 56 | scheduler = BackgroundScheduler() 57 | 58 | 59 | def signal_handler(sig, frame) -> None: # pylint: disable=W0613 60 | """Graceful shutdown.""" 61 | if sig in ( 62 | signal.SIGINT, 63 | signal.SIGTERM, 64 | ): 65 | logger.warning(f"Received {signal.Signals(sig).name}, graceful shutdown...") 66 | scheduler.shutdown() 67 | sys.exit(0) 68 | 69 | 70 | def configure_sentry() -> None: 71 | """Configure Sentry client for send exception stacktraces.""" 72 | if config.monitoring.sentry_enabled: 73 | assert config.monitoring.sentry_dsn is not None 74 | sentry_sdk.init( 75 | dsn=config.monitoring.sentry_dsn, 76 | traces_sample_rate=1.0, 77 | release=f"{appname}@{version}", 78 | before_send=sentry_events_filter, 79 | ) 80 | logger.info(f"Sentry send traces is {'enabled' if config.monitoring.sentry_enabled else 'disabled'}") 81 | 82 | 83 | def configure_state_manager() -> AbstractStateManager | None: 84 | """Configure StateKeeper for ETL stateful mode.""" 85 | if not config.stateful: 86 | return 87 | 88 | match config.state.storage: 89 | case "jsonfile": 90 | s3_props: IObjectStorageProps = IObjectStorageProps( 91 | bucket_name=config.state.jsonfile_s3_bucket, 92 | access_key_id=config.state.jsonfile_s3_access_key, 93 | secret_key=config.state.jsonfile_s3_secret_key, 94 | endpoint_url=config.state.jsonfile_s3_endpoint, 95 | region=config.state.jsonfile_s3_region, 96 | ) 97 | return StateManagerFactory.create_file_state_manager( 98 | strategy=config.state.jsonfile_strategy, filename=config.state.jsonfile_path, **s3_props 99 | ) 100 | case "redis": 101 | return StateManagerFactory.create_redis_state_manager(config.state.redis_dsn) 102 | case "custom": 103 | raise NotImplementedError 104 | case _: 105 | raise ValueError 106 | 107 | 108 | def run_etl(ignore_exceptions: bool = False, issue_model: TrackerIssue = TrackerIssue) -> None: 109 | """Start ETL process.""" 110 | etl = YandexTrackerETL( 111 | tracker_client=YandexTrackerClient(), 112 | clickhouse_client=ClickhouseClient(), 113 | state_manager=configure_state_manager(), 114 | issue_model=issue_model, 115 | ) 116 | etl.run( 117 | stateful=config.stateful, 118 | queues=config.tracker.search.queues, 119 | search_query=config.tracker.search.query, 120 | search_range=config.tracker.search.range, 121 | limit=config.tracker.search.per_page_limit, 122 | ignore_exceptions=ignore_exceptions, 123 | auto_deduplicate=config.clickhouse.auto_deduplicate, 124 | ) 125 | 126 | 127 | def main() -> None: 128 | """Entry point for CLI command.""" 129 | configure_sentry() 130 | 131 | if args.run_once: 132 | logger.info("A one-time launch command is received, the scheduler setting will be skipped") 133 | run_etl() 134 | sys.exit(0) 135 | 136 | signal.signal(signal.SIGINT, signal_handler) 137 | signal.signal(signal.SIGTERM, signal_handler) 138 | scheduler.start() 139 | scheduler.add_job( 140 | run_etl, 141 | trigger="interval", 142 | name="tracker_etl_default", 143 | minutes=int(config.etl_interval_minutes), 144 | max_instances=1, 145 | next_run_time=datetime.now() + timedelta(seconds=5), 146 | ) 147 | signal.pause() 148 | 149 | 150 | if __name__ == "__main__": 151 | main() 152 | -------------------------------------------------------------------------------- /tracker_exporter/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/tracker_exporter/models/__init__.py -------------------------------------------------------------------------------- /tracker_exporter/models/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | from abc import ABCMeta 3 | from enum import Enum 4 | from typing import Any 5 | 6 | from pydantic import BaseModel 7 | 8 | 9 | class ClickhousePayload(BaseModel): 10 | issue: dict 11 | changelog: list 12 | metrics: list 13 | 14 | 15 | class LogLevels(str, Enum): 16 | debug = "debug" 17 | info = "info" 18 | warning = "warning" 19 | error = "error" 20 | critical = "critical" 21 | 22 | 23 | class TrackerChangelogEvents: 24 | ISSUE_WORKFLOW = "IssueWorkflow" 25 | ISSUE_MOVED = "IssueMoved" 26 | 27 | 28 | class TrackerWorkflowTypes: 29 | TRANSITION = "status" 30 | RESOLVE_ISSUE = "resolution" 31 | 32 | 33 | class YandexTrackerLanguages(str, Enum): 34 | ru = "ru" 35 | en = "en" 36 | 37 | 38 | class TimeDeltaOut: 39 | SECONDS = "seconds" 40 | MINUTES = "minutes" 41 | 42 | 43 | class ClickhouseProto: 44 | HTTPS = "https" 45 | HTTP = "http" 46 | 47 | 48 | class Base: 49 | """Base class for objects.""" 50 | 51 | __metaclass__ = ABCMeta 52 | 53 | def __str__(self) -> str: 54 | return str(self.to_dict()) 55 | 56 | def __repr__(self) -> str: 57 | return str(self) 58 | 59 | def __getitem__(self, item): 60 | return self.__dict__[item] 61 | 62 | @classmethod 63 | def de_json(cls, data) -> dict: 64 | """Deserialize object.""" 65 | if not data: 66 | return None 67 | 68 | data = data.copy() 69 | return data 70 | 71 | def to_json(self) -> dict: 72 | """Serialize object to json.""" 73 | return json.dumps(self.to_dict()) 74 | 75 | def to_dict(self) -> dict: 76 | """Recursive serialize object.""" 77 | 78 | def null_cleaner(value: Any): 79 | if value is None: 80 | return "" 81 | return value 82 | 83 | def parse(val): 84 | if isinstance(val, list): 85 | return [parse(it) for it in val] 86 | if isinstance(val, dict): 87 | return {key: null_cleaner(parse(value)) for key, value in val.items() if not key.startswith("_")} 88 | return val 89 | 90 | data = self.__dict__.copy() 91 | return parse(data) 92 | -------------------------------------------------------------------------------- /tracker_exporter/models/issue.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from pydantic import BaseModel 4 | from typing import List, Any 5 | from tracker_exporter._typing import DateTimeISO8601Str, DateStr 6 | 7 | from yandex_tracker_client.collections import Issues, IssueChangelog 8 | from yandex_tracker_client.exceptions import NotFound 9 | 10 | from tracker_exporter.models.base import Base 11 | from tracker_exporter.models.base import ( 12 | TrackerChangelogEvents, 13 | TrackerWorkflowTypes, 14 | ) 15 | from tracker_exporter.utils.helpers import ( 16 | calculate_time_spent, 17 | string_normalize, 18 | validate_resource, 19 | extract_changelog_field, 20 | convert_datetime, 21 | to_snake_case, 22 | to_human_time, 23 | ) 24 | from tracker_exporter.config import config 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | class TrackerIssueChangelog(BaseModel): 30 | """This object represents a issue changelog events.""" 31 | 32 | issue_key: str 33 | queue: str 34 | event_time: DateTimeISO8601Str 35 | event_type: str 36 | transport: str 37 | actor: str 38 | changed_field: Any 39 | changed_from: Any 40 | changed_to: Any 41 | 42 | 43 | class TrackerIssueMetric(Base): 44 | """This object represents a issue metrics.""" 45 | 46 | def __init__( 47 | self, 48 | issue_key: str, 49 | status_name: str, 50 | status_transitions_count: int, 51 | duration: int, 52 | busdays_duration: int, 53 | last_seen: str, 54 | ) -> None: 55 | self.issue_key = issue_key 56 | self.status_name = status_name 57 | self.status_transitions_count = status_transitions_count 58 | self.duration = duration 59 | self.human_readable_duration = to_human_time(self.duration) 60 | self.busdays_duration = busdays_duration 61 | self.human_readable_busdays_duration = to_human_time(self.busdays_duration) 62 | self.last_seen = last_seen 63 | 64 | 65 | class TrackerIssue(Base): 66 | """This object represents a issue from Yandex.Tracker.""" 67 | 68 | def __init__(self, issue: Issues) -> None: 69 | self._changelog_events: List[TrackerIssueChangelog] = [] 70 | self._issue: Issues = issue 71 | self._metrics: dict = {} 72 | self._transform(self._issue) 73 | 74 | def _transform(self, issue: Issues) -> None: 75 | """Transformation of a issue into useful data.""" 76 | logger.debug(f"Transforming issue {issue.key}...") 77 | 78 | self.queue: str = issue.queue.key 79 | self.issue_key: str = issue.key 80 | self.title: str = string_normalize(issue.summary) 81 | self.issue_type: str = to_snake_case(validate_resource(issue.type, "name")) 82 | self.priority: str = validate_resource(issue.priority, "name") 83 | self.assignee: str = validate_resource(issue.assignee, "email") 84 | self.author: str = validate_resource(issue.createdBy, "email") 85 | self.status: str = to_snake_case(validate_resource(issue.status, "name")) 86 | self.resolution: str = to_snake_case(validate_resource(issue.resolution, "name")) 87 | self.tags: list = issue.tags or [] 88 | self.components: list = [c.name for c in issue.components if issue.components] 89 | self.is_resolved: bool = True if self.resolution is not None else False 90 | self.is_closed: bool = True if self.status in config.closed_issue_statuses or self.is_resolved else False 91 | self.created_at: DateTimeISO8601Str = convert_datetime(issue.createdAt) 92 | self.updated_at: DateTimeISO8601Str = convert_datetime(issue.updatedAt) 93 | self.resolved_at: DateTimeISO8601Str = convert_datetime(issue.resolvedAt) 94 | self.closed_at: DateTimeISO8601Str = self.resolved_at if self.is_resolved else None 95 | self.start_date: DateStr = validate_resource(issue, "start") 96 | self.end_date: DateStr = validate_resource(issue, "end") 97 | self.deadline: DateStr = validate_resource(issue, "deadline") 98 | self.story_points: int = validate_resource(issue, "storyPoints") or 0 99 | self.parent_issue_key: str = validate_resource(issue.parent, "key", low=False) 100 | self.epic_issue_key: str = validate_resource(issue.epic, "key", low=False) 101 | self.is_subtask: bool = True if any((self.parent_issue_key,)) else False 102 | self.qa_engineer: str = validate_resource(issue.qaEngineer, "email") 103 | self.aliases: list = validate_resource(issue, "aliases") or [] 104 | self.was_moved: bool = False 105 | self.moved_at: DateTimeISO8601Str = None 106 | self.moved_by: str = None 107 | self._handle_strange_tracker_artifacts(self._issue) 108 | 109 | def _handle_strange_tracker_artifacts(self, issue: Issues): 110 | """ 111 | Handling strange artifacts in the Yandex.Tracker. 112 | For some reason, the tracker can't find the project or sprint specified in the issue, 113 | like yandex_tracker_client.exceptions.NotFound: Sprint does not exist. 114 | """ 115 | try: 116 | self.project = validate_resource(issue.project, "name") 117 | except NotFound as exc: 118 | logger.warning(f"Can't get info about specified project for issue {self.issue_key}. Details: {exc}") 119 | self.project = "" 120 | try: 121 | self.sprints: list = [s.name for s in issue.sprint if issue.sprint] 122 | except NotFound as exc: 123 | logger.warning(f"Can't get info about specified sprint for issue {self.issue_key}. Details: {exc}") 124 | self.sprints = [] 125 | 126 | def _convert_and_save_changelog(self, event: IssueChangelog) -> None: 127 | """Convert issue changelog events to compatible format.""" 128 | metadata = { 129 | "issue_key": event.issue.key, 130 | "queue": event.issue.queue.key, 131 | "event_time": convert_datetime(event.updatedAt), 132 | "event_type": event.type, 133 | "transport": event.transport, 134 | "actor": validate_resource(event.updatedBy, "email") or validate_resource(event.updatedBy, "name") or "", 135 | } 136 | 137 | for change in event.fields: 138 | try: # Ah shit, here we go again 139 | changed_field = extract_changelog_field(change.get("field")) 140 | changed_from = extract_changelog_field(change.get("from")) 141 | changed_to = extract_changelog_field(change.get("to")) 142 | except NotFound as exc: 143 | logger.warning( 144 | f"Tracker BUG, can't get info about '{changed_field}' in " 145 | f"{self.issue_key}, the entity may have been deleted. Details: {exc}" 146 | ) 147 | continue 148 | 149 | if changed_field is None or not any((changed_from, changed_to)): 150 | logger.debug(f"Skipping bad changelog event for {self.issue_key} ({changed_field}): {change}") 151 | continue 152 | 153 | self._changelog_events.append( 154 | TrackerIssueChangelog( 155 | **metadata, 156 | changed_field=changed_field, 157 | changed_from=changed_from, 158 | changed_to=changed_to, 159 | ) 160 | ) 161 | 162 | def _on_changelog_issue_moved(self, event: IssueChangelog) -> None: 163 | """Actions whe 'issue moved' event triggered.""" 164 | logger.debug(f"Moved issue found: {self.issue_key}") 165 | self.was_moved = True 166 | self.moved_by = validate_resource(event.updatedBy, "email") 167 | self.moved_at = convert_datetime(event.updatedAt) 168 | 169 | def _on_changelog_issue_workflow(self, event: IssueChangelog) -> None: 170 | """Actions whe 'issue wofklow' event triggered.""" 171 | logger.debug(f"Issue workflow fields found: {event.fields}") 172 | 173 | if len(event.fields) < 2: 174 | logger.debug(f"Not interesting event, skipping: {event.fields}") 175 | return 176 | 177 | # Keep only status transition events 178 | worklow_type = event.fields[0].get("field").id 179 | if worklow_type != TrackerWorkflowTypes.TRANSITION: 180 | logger.debug(f"Skipping {event.fields[0].get('field').id} for {self.issue_key}") 181 | return 182 | 183 | # Find datetimes between transition from status A to status B 184 | status = to_snake_case(event.fields[0].get("from").name.lower()) 185 | event_start_time = event.fields[1].get("from") or self._issue.createdAt # transition from the initial status 186 | event_end_time = event.fields[1].get("to") 187 | 188 | if event_start_time is None or event_end_time is None: 189 | logger.warning( 190 | f"Found corrupted changelog event with bad datetime range. " 191 | f"Perhaps this field is not a status. See details: " 192 | f"{self.issue_key}: {event.fields[1]}. All fields: {event.fields}" 193 | ) 194 | return 195 | 196 | # Calculation of the time spent in the status 197 | start_time = convert_datetime(event_start_time) 198 | end_time = convert_datetime(event_end_time) 199 | total_status_time = calculate_time_spent(start_time, end_time) 200 | # TODO (akimrx): get workhours from queue settings? 201 | busdays_status_time = calculate_time_spent(start_time, end_time, busdays_only=True) 202 | 203 | # Custom logic for calculating the finish date of the issue, 204 | # because not everyone uses resolutions, sadly 205 | # Also, resolved tasks will be flagged as is_closed with closed_at the same as resoluition time 206 | transition_status = to_snake_case(event.fields[0].get("to").name.lower()) 207 | if self.is_resolved and self.resolved_at: 208 | self.closed_at = self.resolved_at 209 | elif transition_status in config.closed_issue_statuses and self.status in config.closed_issue_statuses: 210 | self.closed_at = convert_datetime(event_end_time) 211 | 212 | try: 213 | self._metrics[status]["duration"] += total_status_time 214 | self._metrics[status]["busdays_duration"] += busdays_status_time 215 | self._metrics[status]["status_transitions_count"] += 1 216 | except (KeyError, AttributeError): 217 | self._metrics[status] = { 218 | "issue_key": self.issue_key, 219 | "status_name": status, 220 | "status_transitions_count": 1, 221 | "duration": total_status_time, 222 | "busdays_duration": busdays_status_time, 223 | "last_seen": convert_datetime(event_end_time), 224 | } 225 | 226 | def metrics(self) -> List[TrackerIssueMetric]: 227 | """ 228 | All metrics are based on status change events in the task history. 229 | 230 | The metric of being in the status is considered 231 | only after the end of being in the calculated status. 232 | 233 | For example, the task has moved from the status "Open" 234 | to the status "In progress", in this case only the metric 235 | for "Open" will be considered. 236 | As soon as the status "In progress" is changed to any other, 237 | it will be calculated as a metric for "In progress". 238 | 239 | In other words, the current status of the task will not be 240 | calculated. 241 | """ 242 | for event in self._issue.changelog: 243 | if config.changelog_export_enabled: 244 | self._convert_and_save_changelog(event) 245 | match event.type: 246 | case TrackerChangelogEvents.ISSUE_MOVED: 247 | self._on_changelog_issue_moved(event) 248 | 249 | case TrackerChangelogEvents.ISSUE_WORKFLOW: 250 | self._on_changelog_issue_workflow(event) 251 | 252 | case _: # not interesting event 253 | pass 254 | 255 | logger.debug(f"Metrics for {self.issue_key}: {self._metrics}") 256 | metrics = [TrackerIssueMetric(**metric) for _, metric in self._metrics.items()] 257 | 258 | return metrics 259 | -------------------------------------------------------------------------------- /tracker_exporter/services/__init__.py: -------------------------------------------------------------------------------- 1 | from tracker_exporter.services.clickhouse import ClickhouseClient 2 | from tracker_exporter.services.monitoring import DogStatsdClient 3 | from tracker_exporter.services.tracker import YandexTrackerClient 4 | 5 | __all__ = [ 6 | "ClickhouseClient", 7 | "DogStatsdClient", 8 | "YandexTrackerClient", 9 | ] 10 | -------------------------------------------------------------------------------- /tracker_exporter/services/clickhouse.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | 4 | from typing import List, Dict 5 | 6 | import requests 7 | from requests import Response, ConnectionError, Timeout 8 | 9 | from tracker_exporter.exceptions import ClickhouseError 10 | from tracker_exporter.utils.helpers import backoff 11 | from tracker_exporter.models.base import ClickhouseProto 12 | from tracker_exporter.config import config, monitoring 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class ClickhouseClient: 18 | """This class provide simple facade interface for Clickhouse.""" 19 | 20 | def __init__( 21 | self, 22 | host: str = config.clickhouse.host, 23 | port: int = config.clickhouse.port, 24 | username: str = config.clickhouse.username, 25 | password: str = config.clickhouse.password, 26 | proto: ClickhouseProto = config.clickhouse.proto, 27 | cacert: str = config.clickhouse.cacert_path, 28 | serverless_proxy_id: str = config.clickhouse.serverless_proxy_id, 29 | params: dict = {}, 30 | http_timeout: int = 10, 31 | ) -> None: 32 | self.host = host 33 | self.port = port 34 | self.username = username 35 | self.password = password 36 | self.proto = proto 37 | self.cacert = cacert 38 | self.serverless_proxy_id = serverless_proxy_id 39 | self.params = params 40 | self.timeout = int(http_timeout) 41 | self.headers = {} 42 | 43 | self._prepare_headers() 44 | if self.proto == ClickhouseProto.HTTPS: 45 | assert self.cacert is not None 46 | 47 | def _prepare_headers(self): 48 | # fmt: off 49 | self.headers = { 50 | "Content-Type": "application/json", 51 | "X-Clickhouse-User": self.username 52 | } # fmt: on 53 | if self.password is not None: 54 | self.headers["X-Clickhouse-Key"] = self.password 55 | 56 | def _prepare_query_params(self): 57 | params = self.params.copy() 58 | 59 | if params.get("user") is not None: 60 | logger.warning("Removed 'user' key:value from params, please pass 'user' via arg") 61 | del params["user"] 62 | 63 | if params.get("password") is not None: 64 | logger.warning("Removed 'password' key:value from params, please pass 'password' via arg") 65 | del params["password"] 66 | 67 | if self.serverless_proxy_id: 68 | self.params["database"] = self.serverless_proxy_id 69 | 70 | return params 71 | 72 | @backoff( 73 | exceptions=(ConnectionError, Timeout), 74 | base_delay=config.clickhouse.backoff_base_delay, 75 | expo_factor=config.clickhouse.backoff_expo_factor, 76 | max_tries=config.clickhouse.backoff_max_tries, 77 | jitter=config.clickhouse.backoff_jitter, 78 | ) 79 | def execute(self, query: str) -> Response | None: 80 | url = f"{self.proto}://{self.host}:{self.port}" 81 | params = self._prepare_query_params() 82 | 83 | try: 84 | if self.proto == ClickhouseProto.HTTPS: 85 | response = requests.post( 86 | url=url, 87 | headers=self.headers, 88 | params=params, 89 | data=query, 90 | timeout=self.timeout, 91 | verify=self.cacert, 92 | ) 93 | else: 94 | response = requests.post( 95 | url=url, headers=self.headers, params=params, data=query, timeout=self.timeout 96 | ) 97 | except (Timeout, ConnectionError): 98 | raise 99 | except Exception as exc: 100 | logger.exception(f"Could not execute query in Clickhouse: {exc}") 101 | raise ClickhouseError(exc) from exc 102 | else: 103 | if not response.ok: 104 | msg = f"Could not execute query in Clickhouse. Status: {response.status_code}. {response.text}" 105 | logger.error(msg) 106 | raise ClickhouseError(msg) 107 | return response 108 | 109 | # TODO (akimrx): add sort by partition key (i.e. `updated_at`)? for best insert perfomance 110 | def insert_batch(self, database: str, table: str, payload: List[Dict]) -> Response | None: 111 | if not isinstance(payload, list): 112 | raise ClickhouseError("Payload must be list") 113 | 114 | tags = [f"database:{database}", f"table:{table}"] 115 | batch_size = len(payload) 116 | data = " ".join([json.dumps(row) for row in payload]) 117 | logger.debug(f"Inserting batch ({batch_size}): {data}") 118 | 119 | with monitoring.send_time_metric("clickhouse_insert_time_seconds", tags): 120 | query_result = self.execute(f"INSERT INTO {database}.{table} FORMAT JSONEachRow {data}") 121 | 122 | monitoring.send_gauge_metric("clickhouse_inserted_rows", batch_size, tags) 123 | return query_result 124 | 125 | def deduplicate(self, database: str, table: str) -> None: 126 | tags = [f"database:{database}", f"table:{table}"] 127 | with monitoring.send_time_metric("clickhouse_deduplicate_time_seconds", tags): 128 | self.execute(f"OPTIMIZE TABLE {database}.{table} FINAL") 129 | -------------------------------------------------------------------------------- /tracker_exporter/services/monitoring.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=W0102 2 | import logging 3 | 4 | from contextlib import contextmanager 5 | from typing import Callable, ContextManager 6 | from functools import wraps 7 | from datadog import DogStatsd 8 | 9 | from yandex_tracker_client.exceptions import ( 10 | TrackerError, 11 | TrackerServerError, 12 | TrackerRequestError, 13 | TrackerClientError, 14 | ) 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class DogStatsdClient: 20 | """This class represents interface for DataDog statsd UDP client.""" 21 | 22 | def __init__( 23 | self, 24 | host: str, 25 | port: int, 26 | base_labels: list = [], # pylint: disable=W0102 27 | metric_name_prefix: str = "tracker_exporter", 28 | use_ms: bool = True, 29 | enabled: bool = True, 30 | ) -> None: 31 | self.host = host 32 | self.port = port 33 | self.base_labels = base_labels 34 | self.prefix = metric_name_prefix 35 | self._enabled = enabled 36 | self._use_ms = use_ms 37 | 38 | if self._enabled: 39 | assert self.host is not None 40 | assert self.port is not None 41 | 42 | self.client = DogStatsd(host=self.host, port=self.port, use_ms=self._use_ms, constant_tags=self.base_labels) 43 | 44 | def send_count_metric(self, name: str, value: int, tags: list = []) -> Callable: 45 | metric = f"{self.prefix}_{name}" 46 | 47 | def metric_wrapper(func): 48 | @wraps(func) 49 | def wrapper(*args, **kwargs): 50 | if not self._enabled: 51 | return func(*args, **kwargs) 52 | 53 | self.client.increment(metric, value, tags=tags) 54 | logger.debug(f"Success sent count metric: {metric}") 55 | return func(*args, **kwargs) 56 | 57 | return wrapper 58 | 59 | return metric_wrapper 60 | 61 | def send_gauge_metric(self, name: str, value: int, tags: list = []) -> None: 62 | if not self._enabled: 63 | return 64 | 65 | metric = f"{self.prefix}_{name}" 66 | self.client.gauge(metric, value, tags=tags) 67 | logger.debug(f"Success sent gauge metric: {metric}") 68 | 69 | @contextmanager 70 | def _dummy_send_time_metric(self): 71 | yield 72 | 73 | def send_time_metric(self, name: str, tags: list = [], **kwargs) -> Callable | ContextManager: 74 | metric = f"{self.prefix}_{name}" 75 | if self._enabled: 76 | return self.client.timed(metric, tags=tags, **kwargs) 77 | return self._dummy_send_time_metric() 78 | 79 | 80 | def sentry_events_filter(event, hint): # pylint: disable=R1710 81 | # Drop all events without exception trace 82 | if "exc_info" not in hint: 83 | return 84 | 85 | exception = hint["exc_info"][1] 86 | if isinstance(exception, (TrackerError, TrackerClientError, TrackerRequestError, TrackerServerError)): 87 | event["fingerprint"] = ["tracker-error"] 88 | 89 | return event 90 | -------------------------------------------------------------------------------- /tracker_exporter/services/tracker.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from typing import List 4 | from yandex_tracker_client import TrackerClient 5 | from yandex_tracker_client.collections import Issues, IssueComments 6 | 7 | from tracker_exporter.models.base import YandexTrackerLanguages 8 | from tracker_exporter.config import ( 9 | config, 10 | monitoring, 11 | YANDEX_TRACKER_API_SEARCH_HARD_LIMIT, 12 | YANDEX_TRACKER_HARD_LIMIT_ISSUE_URL, 13 | ) 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class YandexTrackerClient: 19 | """This class provide simple wrapper over default Yandex.Tracker client.""" 20 | 21 | def __init__( 22 | self, 23 | *, 24 | token: str | None = config.tracker.token, 25 | iam_token: str | None = config.tracker.iam_token, 26 | org_id: str | None = config.tracker.org_id, 27 | cloud_org_id: str | None = config.tracker.cloud_org_id, 28 | timeout: int = config.tracker.timeout, 29 | retries: int = config.tracker.max_retries, 30 | lang: YandexTrackerLanguages = config.tracker.language, 31 | ) -> None: 32 | self.client = TrackerClient( 33 | token=token, 34 | iam_token=iam_token, 35 | org_id=org_id, 36 | cloud_org_id=cloud_org_id, 37 | timeout=timeout, 38 | retries=retries, 39 | headers={"Accept-Language": lang}, 40 | ) 41 | 42 | @monitoring.send_time_metric("issue_prefetch_seconds") 43 | def get_issue(self, issue_key: str) -> Issues: 44 | return self.client.issues[issue_key] 45 | 46 | @monitoring.send_time_metric("comments_fetch_seconds") 47 | def get_comments(self, issue_key: str) -> IssueComments: 48 | return self.client.issues[issue_key].comments.get_all() 49 | 50 | @monitoring.send_time_metric("issues_search_time_seconds") 51 | def search_issues( 52 | self, 53 | query: str | None = None, 54 | filter: dict | list | None = None, 55 | order: dict | list | None = None, 56 | limit: int = 100, 57 | ) -> List[Issues]: 58 | # https://github.com/yandex/yandex_tracker_client/issues/13 59 | issues_count = self.client.issues.find(query=query, filter=filter, order=order, count_only=True) 60 | if issues_count > YANDEX_TRACKER_API_SEARCH_HARD_LIMIT: 61 | logger.warning( 62 | f"The number of tasks found ({issues_count}) exceeds the hard limit " 63 | f"({YANDEX_TRACKER_API_SEARCH_HARD_LIMIT}) of the Yandex.Tracker API. " 64 | f"Issue on Github - {YANDEX_TRACKER_HARD_LIMIT_ISSUE_URL}" 65 | ) 66 | logger.info(f"Found {issues_count} issues by query: {query} | filter: {filter} | order: {order}'") 67 | return self.client.issues.find(query=query, filter=filter, order=order, per_page=limit) 68 | -------------------------------------------------------------------------------- /tracker_exporter/state/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/tracker_exporter/state/__init__.py -------------------------------------------------------------------------------- /tracker_exporter/state/backends.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | from abc import ABC, abstractmethod 5 | from typing import Any, ContextManager 6 | 7 | import boto3 8 | 9 | from tracker_exporter.state.serializers import AbstractSerializer, JsonSerializer 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class AbstractFileStorageBackend(ABC): 15 | """ 16 | An abstract base class for file storage systems, enforcing a common interface for file operations. 17 | 18 | :param serializer: The serializer instance used for serializing and deserializing data. 19 | :param raise_if_not_exists: Raise :exc:`FileNotFound` if file not exists. Defaults to True. 20 | :param auto_sub_ext_by_serializer: Automatically substitute the file extension based on the serializer. Defaults is ``False``. 21 | 22 | """ 23 | 24 | def __init__( 25 | self, 26 | serializer: AbstractSerializer, 27 | raise_if_not_exists: bool = True, 28 | auto_sub_ext_by_serializer: bool = False, 29 | ) -> None: 30 | self.serializer = serializer if hasattr(serializer, "is_initialized") else serializer() 31 | self.raise_if_not_exists = raise_if_not_exists 32 | self.auto_sub_ext_by_serializer = auto_sub_ext_by_serializer 33 | 34 | def path_with_ext(self, path: str) -> str: 35 | """Appends the file extension from the serializer if not present in the path.""" 36 | if not path.endswith(f".{self.serializer.ext}"): 37 | return f"{path}.{self.serializer.ext}" 38 | return path 39 | 40 | @abstractmethod 41 | def read(self, path: str, deserialize: bool = False) -> Any: 42 | """Abstract method for reading data from a given file path.""" 43 | 44 | @abstractmethod 45 | def write(self, path: str, data: Any) -> None: 46 | """Abstract method for writing data to a given file path.""" 47 | 48 | 49 | class AbstractKeyValueStorageBackend(ABC): 50 | """An abstract base class for key value storage backends like Redis, Consul, etc.""" 51 | 52 | @abstractmethod 53 | def client(self, *args, **kwargs) -> ContextManager: 54 | """An abstract method that returns client context manager.""" 55 | 56 | @abstractmethod 57 | def get(self, key: str | list, *args, **kwargs) -> Any: 58 | """An abstract method for get value(s) by key from storage.""" 59 | 60 | @abstractmethod 61 | def set(self, key: str, value: Any, *args, **kwargs) -> None: 62 | """An abstract method for save key:value pair to storage.""" 63 | 64 | @abstractmethod 65 | def delete(self, key: str | list, *args, **kwargs) -> None: 66 | """An abstract method for deletes key(s) from storage.""" 67 | 68 | 69 | class LocalFileStorageBackend(AbstractFileStorageBackend): 70 | """ 71 | A concrete synchronous implementation of AbstractFileStorage for local file storage operations. 72 | Overrides the read and write asynchronous methods for file operations using the aiofiles package. 73 | 74 | :param serializer: The serializer instance used for serializing and deserializing data. 75 | :param raise_if_not_exists: Raise :exc:`FileNotFound` if file not exists. Defaults to True. 76 | :param auto_sub_ext_by_serializer: Automatically substitute the file extension based on the serializer. Defaults is ``False``. 77 | 78 | Default serializer: :class:`JsonSerializer` 79 | 80 | Usage:: 81 | 82 | storage = LocalFileStorage() 83 | 84 | storage.write("myfile.json", data={"foo": "bar"}) 85 | r = storage.read("myfile.json", deserialize=True) 86 | 87 | print(r) # {"foo": "bar"} 88 | 89 | """ 90 | 91 | def __init__( 92 | self, 93 | serializer: AbstractSerializer | None = None, 94 | raise_if_not_exists: bool = True, 95 | auto_sub_ext_by_serializer: bool = False, 96 | ) -> None: 97 | super().__init__( 98 | serializer or JsonSerializer, 99 | raise_if_not_exists=raise_if_not_exists, 100 | auto_sub_ext_by_serializer=auto_sub_ext_by_serializer, 101 | ) 102 | 103 | def read(self, path: str, deserialize: bool = False) -> Any: 104 | """ 105 | Reads data from a local file, deserializes it using the provided serializer, 106 | and returns the deserialized data. 107 | 108 | :param path: A local file path for read content from. 109 | :param deserialize: Deserialize readed file content via serializer. 110 | 111 | """ 112 | if self.auto_sub_ext_by_serializer: 113 | path = self.path_with_ext(path) 114 | 115 | if not os.path.isfile(path) and not os.path.exists(path): 116 | if self.raise_if_not_exists: 117 | raise FileNotFoundError(f"File with name {path} not found") 118 | logger.debug(f"File with name '{path}' not found") 119 | return {} 120 | 121 | with open(path, "r") as file: 122 | data = file.read() 123 | 124 | if deserialize: 125 | return self.serializer.deserialize(data) 126 | return data 127 | 128 | def write(self, path: str, data: Any) -> None: 129 | """ 130 | Serializes the given data using the provided serializer and writes it to a local file. 131 | 132 | :param path: An local path for write content to. 133 | :param data: Content that will be written to file. 134 | 135 | """ 136 | 137 | if self.auto_sub_ext_by_serializer: 138 | path = self.path_with_ext(path) 139 | 140 | with open(path, "w") as file: 141 | file.write(self.serializer.serialize(data)) 142 | 143 | 144 | class S3FileStorageBackend(AbstractFileStorageBackend): 145 | """ 146 | A concrete synchronous implementation of AbstractFileStorage for S3 object storage operations. 147 | Initializes an aioboto3 session and provides read and write operations for files stored in an S3 bucket. 148 | 149 | Default serializer: :class:`JsonSerializer` 150 | 151 | :param bucket_name: The name of the S3 bucket. 152 | :param access_key_id: Service account ID, if empty using ``AWS_ACCESS_KEY_ID`` environment variable. 153 | :param secret_key: Secret key for service account, if empty using ``AWS_SECRET_ACCESS_KEY`` environment variable. 154 | :param endpoint_url: S3 endpoint for use with Yandex.Cloud, Minio and other providers. 155 | :param region: S3 region. Default: ``us-east1`` 156 | :param serializer: The serializer instance used for serializing and deserializing data. 157 | :param raise_if_not_exists: Raise FileNotFound if file not exists. Defaults to ``True``. 158 | :param auto_sub_ext_by_serializer: Automatically substitute the file extension based on the serializer. Defaults is ``False``. 159 | 160 | Usage:: 161 | 162 | storage = S3FileStorage( 163 | bucket_name="my-bucket", 164 | access_key_id="XXXX", 165 | secret_key="XXXX", 166 | endpoint_url="https://storage.yandexcloud.net", 167 | region="ru-central1" 168 | ) 169 | 170 | storage.write("myfile.json", data={"foo": "bar"}) 171 | r = storage.read("myfile.json", deserialize=True) 172 | 173 | print(r) # {"foo": "bar"} 174 | 175 | """ 176 | 177 | def __init__( 178 | self, 179 | bucket_name: str, 180 | serializer: AbstractSerializer | None = None, 181 | raise_if_not_exists: bool = True, 182 | auto_sub_ext_by_serializer: bool = False, 183 | access_key_id: str | None = None, 184 | secret_key: str | None = None, 185 | region: str | None = None, 186 | endpoint_url: str | None = None, 187 | **kwargs, 188 | ) -> None: 189 | super().__init__( 190 | serializer or JsonSerializer, 191 | raise_if_not_exists=raise_if_not_exists, 192 | auto_sub_ext_by_serializer=auto_sub_ext_by_serializer, 193 | ) 194 | self.bucket_name = bucket_name 195 | self.endpoint_url = endpoint_url 196 | self.session = boto3.Session( 197 | aws_access_key_id=access_key_id, 198 | aws_secret_access_key=secret_key, 199 | region_name=region or "us-east1", 200 | **kwargs, 201 | ) 202 | 203 | @property 204 | def client(self): 205 | """Returns a resource client for S3 operations.""" 206 | return self.session.client("s3", endpoint_url=self.endpoint_url) 207 | 208 | def read(self, path: str, deserialize: bool = False) -> Any: 209 | """ 210 | Reads data from an S3 object, deserializes it using the provided serializer, 211 | and returns the deserialized data. 212 | 213 | :param path: A local file path for read content from. 214 | :param deserialize: Deserialize readed file content via serializer. 215 | 216 | """ 217 | if self.auto_sub_ext_by_serializer: 218 | path = self.path_with_ext(path) 219 | 220 | try: 221 | response = self.client.get_object(Bucket=self.bucket_name, Key=path) 222 | except Exception as exc: 223 | error_msg = f"Exception while reading file '{path}'. Possible file not exists. Error: {exc}" 224 | 225 | if self.raise_if_not_exists: 226 | raise FileNotFoundError(error_msg) from exc 227 | 228 | logger.debug(error_msg) 229 | return {} 230 | 231 | with response["Body"] as stream: 232 | data = stream.read() 233 | 234 | if deserialize: 235 | return self.serializer.deserialize(data.decode()) 236 | return data.decode() 237 | 238 | def write(self, path: str, data: Any) -> None: 239 | """ 240 | Serializes the given data using the provided serializer and writes it to an S3 object. 241 | 242 | :param path: An local path for write content to. 243 | :param data: Content that will be written to file. 244 | 245 | """ 246 | if self.auto_sub_ext_by_serializer: 247 | path = self.path_with_ext(path) 248 | 249 | self.client.put_object(Bucket=self.bucket_name, Key=path, Body=self.serializer.serialize(data).encode()) 250 | 251 | 252 | __all__ = [ 253 | "AbstractFileStorageBackend", 254 | "LocalFileStorageBackend", 255 | "S3FileStorageBackend", 256 | ] 257 | -------------------------------------------------------------------------------- /tracker_exporter/state/factory.py: -------------------------------------------------------------------------------- 1 | from typing import Literal, Type, TypedDict, Optional 2 | 3 | from redis import Redis 4 | 5 | from tracker_exporter.state.serializers import AbstractSerializer, JsonSerializer 6 | from tracker_exporter.state.backends import S3FileStorageBackend, LocalFileStorageBackend 7 | from tracker_exporter.state.managers import FileStateManager, RedisStateManager 8 | 9 | 10 | class IObjectStorageProps(TypedDict): 11 | bucket_name: str 12 | access_key_id: str 13 | secret_key: str 14 | region: Optional[str] 15 | endpoint_url: Optional[str] 16 | 17 | 18 | class StateManagerFactory: 19 | """Factory for easy way to create StateManager.""" 20 | 21 | @staticmethod 22 | def create_file_state_manager( 23 | strategy: Literal["local", "s3"], 24 | filename: str = "state.json", 25 | serializer: Type[AbstractSerializer] = JsonSerializer, 26 | **s3_props: Optional[IObjectStorageProps], 27 | ) -> FileStateManager: 28 | match strategy: 29 | case "local": 30 | backend = LocalFileStorageBackend(serializer=serializer, raise_if_not_exists=False) 31 | case "s3": 32 | bucket_name = s3_props["bucket_name"] 33 | del s3_props["bucket_name"] 34 | 35 | backend = S3FileStorageBackend( 36 | bucket_name, serializer=serializer, raise_if_not_exists=False, **s3_props 37 | ) 38 | case _: 39 | raise ValueError("Invalid jsonfile strategy, allowed: s3, local") 40 | 41 | return FileStateManager(backend, state_file_name=filename) 42 | 43 | @staticmethod 44 | def create_redis_state_manager( 45 | url: str, 46 | namespace: str = "tracker_exporter_default", 47 | serializer: Type[AbstractSerializer] = JsonSerializer, 48 | ) -> RedisStateManager: 49 | backend = Redis.from_url(url, decode_responses=True) 50 | return RedisStateManager(backend, namespace=namespace, serializer=serializer) 51 | -------------------------------------------------------------------------------- /tracker_exporter/state/managers.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from contextlib import suppress 3 | from typing import Any, Type 4 | 5 | from tracker_exporter.state.backends import AbstractFileStorageBackend, AbstractKeyValueStorageBackend 6 | from tracker_exporter.state.serializers import AbstractSerializer, JsonSerializer 7 | from tracker_exporter.exceptions import SerializerError 8 | 9 | 10 | class AbstractStateManager(ABC): 11 | """ 12 | Abstract class for state storage. 13 | 14 | Allows user to async save, receive, delete and flush the state. 15 | """ 16 | 17 | @abstractmethod 18 | async def set(self, key: str, value: Any) -> None: 19 | """Abstract method for save key:value pair to storage.""" 20 | 21 | @abstractmethod 22 | async def get(self, key: str, default: Any = None) -> Any: 23 | """Abstract method for get value by key from storage.""" 24 | 25 | @abstractmethod 26 | async def delete(self, key: str) -> None: 27 | """Abstract method for delete value by key from storage.""" 28 | 29 | @abstractmethod 30 | async def flush(self) -> None: 31 | """Abstract method for flush (drop) state from storage.""" 32 | 33 | 34 | class FileStateManager(AbstractStateManager): 35 | """ 36 | A state manager for handling state persistence in file storage (local, s3 or other). 37 | 38 | This class provides an abstraction for managing application state data stored within a file. 39 | It supports basic CRUD operations such as setting, getting, and deleting state information, 40 | utilizing an abstract file storage mechanism. 41 | 42 | :param storage: The file storage provider for persisting state data. 43 | :param state_file_name: The name of the file where state data is stored. Defaults to ``state``. 44 | 45 | Usage:: 46 | 47 | from datetime import datetime 48 | 49 | storage_backend = LocalFileStorage() # also, you can use S3FileStorage 50 | state = FileStateManager(storage_backend, state_file_name="my_state") 51 | 52 | 53 | def my_function() -> None: 54 | ... 55 | last_state = state.get("my_function", default={}) 56 | 57 | if last_state.get("last_run") is None: 58 | new_state = {"last_run": datetime.now().strftime("%Y-%M-%d %H:%M:%S")} 59 | state.set("myfunction", new_state) 60 | 61 | ... 62 | 63 | .. note:: 64 | The state data is managed as a dictionary (JSON-compatible), allowing for key-value pair manipulation. 65 | Other data formats is NOT SUPPORTED. 66 | 67 | """ 68 | 69 | def __init__(self, storage: AbstractFileStorageBackend, state_file_name: str = "state") -> None: 70 | self.storage = storage 71 | self.state_file_name = state_file_name 72 | self.state = {} 73 | 74 | self.storage.auto_sub_ext_by_serializer = True 75 | self.storage.raise_if_not_exists = False 76 | 77 | def get(self, key: str, default: Any = None) -> Any: 78 | """ 79 | Get state value by key. 80 | 81 | :param key: State key. 82 | :param default: Default value if specified key not found. 83 | 84 | """ 85 | self.state = self.storage.read(self.state_file_name, deserialize=True) 86 | return self.state.get(key, default) 87 | 88 | def set(self, key: str, value: str) -> None: 89 | """ 90 | Set state an value for the key. 91 | 92 | :param key: State key. 93 | :param value: Value to be saved assotiated with key. 94 | 95 | """ 96 | self.state = self.storage.read(self.state_file_name, deserialize=True) 97 | self.state[key] = value 98 | self.storage.write(self.state_file_name, self.state) 99 | 100 | def delete(self, key: str) -> None: 101 | """ 102 | 103 | Deletes state (value) by key. 104 | 105 | :param key: State key to be deleted. 106 | """ 107 | self.state = self.storage.read(self.state_file_name, deserialize=True) 108 | if self.state.get(key) is not None: 109 | del self.state[key] 110 | self.storage.write(self.state_file_name, self.state) 111 | 112 | def flush(self): 113 | """Drop all data from state.""" 114 | self.state = {} 115 | self.storage.write(self.state_file_name, self.state) 116 | 117 | 118 | class RedisStateManager(AbstractStateManager): 119 | """ 120 | A state manager for handling state persistence in the Redis storage. 121 | 122 | This class provides an abstraction layer over a Redis storage mechanism, allowing 123 | for easy setting, getting, and deletion of state information with optional serialization 124 | support. It uses an underlying key-value storage provider and supports namespacing to 125 | segregate different state data. 126 | 127 | It is recommended to use a JSON-compatible state format, such as a dict, to maintain portability 128 | between other state managers. 129 | 130 | :param storage: The storage provider for persisting state data. 131 | :param serializer: An optional serializer for converting 132 | data to and from the storage format. Defaults to JsonSerializer if not provided. 133 | :param namespace: A namespace prefix for all keys managed by this instance. 134 | Helps in avoiding key collisions. Defaults to ``tracker_exporter_default``. 135 | 136 | Usage:: 137 | 138 | from datetime import datetime 139 | from redis import Redis 140 | 141 | redis = Redis.from_url("redis://localhost:6379", decode_responses=True) 142 | state = RedisStateManager(redis, namespace="my_namespace") 143 | 144 | 145 | def my_function() -> None: 146 | ... 147 | last_state = state.get("my_function", default={}) 148 | 149 | if last_state.get("last_run") is None: 150 | new_state = {"last_run": datetime.now().strftime("%Y-%M-%d %H:%M:%S")} 151 | state.set("myfunction", new_state) 152 | 153 | ... 154 | 155 | """ 156 | 157 | def __init__( 158 | self, 159 | storage: AbstractKeyValueStorageBackend, 160 | serializer: Type[AbstractSerializer] | None = None, 161 | namespace: str = "tracker_exporter_default", 162 | ) -> None: 163 | self.storage = storage 164 | self.serializer = serializer() or JsonSerializer() 165 | self.namespace = namespace 166 | 167 | def _rkey(self, key: str) -> str: 168 | """Resolve full key path with namespace.""" 169 | return f"{self.namespace}:{key}" 170 | 171 | def set(self, key: str, value: Any) -> None: 172 | """ 173 | Set an value for the state key. 174 | 175 | :param key: State key. 176 | :param value: Value to be saved assotiated with key. 177 | 178 | """ 179 | if isinstance(value, dict): 180 | value = self.serializer.serialize(value) 181 | 182 | with self.storage.client() as session: 183 | session.set(self._rkey(key), value) 184 | 185 | def get(self, key: str) -> Any: 186 | """ 187 | Get state value by key from Redis. 188 | 189 | :param key: Key state. 190 | :param default: Default value if specified key not found. 191 | 192 | """ 193 | with self.storage.client() as session: 194 | value = session.get(self._rkey(key)) 195 | 196 | with suppress(SerializerError): 197 | value = self.serializer.deserialize(value) 198 | return value 199 | 200 | def delete(self, key: str) -> None: 201 | """ 202 | Deletes state (value) by key if exists. 203 | 204 | :param key: State key to be deleted. 205 | """ 206 | with self.storage.client() as session: 207 | session.delete(self._rkey(key)) 208 | 209 | def flush(self) -> None: 210 | """Flush all data in the namespace.""" 211 | raise NotImplementedError 212 | 213 | def execute(self, cmd: str, *args, **kwargs) -> Any: 214 | """ 215 | Common method for execute any Redis supported command. 216 | 217 | :param cmd: Redis command to execute. 218 | """ 219 | with self.storage.client() as session: 220 | return session.execute_command(cmd, *args, **kwargs) 221 | 222 | 223 | __all__ = ["AbstractStateManager", "FileStateManager", "RedisStateManager"] 224 | -------------------------------------------------------------------------------- /tracker_exporter/state/serializers.py: -------------------------------------------------------------------------------- 1 | """This module contains content serializers.""" 2 | 3 | import json 4 | import yaml 5 | 6 | from abc import ABC, abstractmethod 7 | from typing import Any 8 | 9 | from tracker_exporter.exceptions import SerializerError 10 | 11 | 12 | class AbstractSerializer(ABC): 13 | """ 14 | An abstract serializer like JSON, YAML, etc. 15 | 16 | All (de)serialize errors must be raise `SerializerError`. 17 | """ 18 | 19 | def __init__(self) -> None: 20 | self.is_initialized = True 21 | 22 | @property 23 | @abstractmethod 24 | def ext(self) -> str: 25 | """Abstract property for returns serializer file extension.""" 26 | 27 | @abstractmethod 28 | def serialize(self, data: Any, *args, **kwargs) -> str: 29 | """Abstract method for serialize data.""" 30 | 31 | @abstractmethod 32 | def deserialize(self, data: str, **kwargs) -> Any: 33 | """Abstract method for deserialize data.""" 34 | 35 | 36 | class JsonSerializer(AbstractSerializer): 37 | """ 38 | Serializer for converting between JSON and Python objects. 39 | 40 | This serializer handles serialization (Python object to JSON format) 41 | and deserialization (JSON format to Python object) processes, 42 | ensuring that data is correctly transformed for JSON storage or 43 | retrieval while maintaining the Python object's structure. 44 | 45 | :raises SerializerError: If an error occurs during the JSON (de)serialization process. 46 | """ 47 | 48 | @property 49 | def ext(self) -> str: 50 | return "json" 51 | 52 | def serialize(self, data: Any, ensure_ascii: bool = False, indent: int = 2, **kwargs) -> str: 53 | """ 54 | Serialize data to JSON format (str). 55 | 56 | :param data: Data that will be serialized to JSON. 57 | :param ensure_ascii: If ``False``, then the return value can contain non-ASCII characters if they appear in strings contained in obj. 58 | Otherwise, all such characters are escaped in JSON strings. 59 | :param indent: Spaces indent. Defaults: ``2``. 60 | 61 | :raises SerializerError: If an error occurs during the JSON serialization process. 62 | """ 63 | try: 64 | return json.dumps(data, ensure_ascii=ensure_ascii, indent=indent, **kwargs) 65 | except (json.JSONDecodeError, TypeError) as exc: 66 | raise SerializerError(exc) from exc 67 | 68 | def deserialize(self, data: str, **kwargs) -> Any: 69 | """ 70 | Derialize JSON data to Python object format. 71 | 72 | :param data: Data that will be deserialized from JSON. 73 | 74 | :raises SerializerError: If an error occurs during the JSON deserialization process. 75 | """ 76 | try: 77 | return json.loads(data, **kwargs) 78 | except (json.JSONDecodeError, TypeError) as exc: 79 | raise SerializerError(exc) from exc 80 | 81 | 82 | __all__ = ["AbstractSerializer", "JsonSerializer"] 83 | -------------------------------------------------------------------------------- /tracker_exporter/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akimrx/yandex-tracker-exporter/f3e277d4e53eadb99cef855e8eb284c1068b5637/tracker_exporter/utils/__init__.py -------------------------------------------------------------------------------- /tracker_exporter/utils/helpers.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import time 4 | import json 5 | import logging 6 | import random 7 | import pytz 8 | import psutil 9 | 10 | from functools import wraps 11 | from typing import Union, Tuple, Type, Callable, Any 12 | from datetime import datetime, timezone as dt_timezone 13 | 14 | import holidays 15 | import pandas as pd 16 | import businesstimedelta 17 | 18 | 19 | from yandex_tracker_client.objects import Reference 20 | from tracker_exporter._typing import DateTimeISO8601Str, DateStr, _Sequence 21 | from tracker_exporter.models.base import TimeDeltaOut 22 | from tracker_exporter.config import config 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | def get_timedelta(end_time: datetime, start_time: datetime, out: TimeDeltaOut = TimeDeltaOut.SECONDS) -> int: 28 | """Simple timedelta between dates.""" 29 | assert isinstance(start_time, datetime) 30 | assert isinstance(end_time, datetime) 31 | 32 | delta = int((end_time - start_time).total_seconds()) 33 | if out == TimeDeltaOut.MINUTES: 34 | return delta // 60 35 | if out == TimeDeltaOut.SECONDS: 36 | return delta 37 | return delta 38 | 39 | 40 | def calculate_time_spent( 41 | start_date: datetime, 42 | end_date: datetime, 43 | busdays_only: bool = False, 44 | workdays: list = config.workdays, 45 | business_hours: Tuple = ( 46 | config.business_hours_start, 47 | config.business_hours_end, 48 | ), 49 | ) -> int: 50 | """ 51 | Calculate timedelta between dates with business days support. 52 | Weekdays: Monday is 0, Sunday is 6, so weekends (5, 6) mean (Sat, Sun). 53 | Returns: seconds 54 | """ 55 | if not isinstance(start_date, datetime): 56 | start_date = pd.to_datetime(start_date) 57 | if not isinstance(end_date, datetime): 58 | end_date = pd.to_datetime(end_date) 59 | 60 | holiday_rules = businesstimedelta.HolidayRule(holidays.RU()) 61 | workday_rules = businesstimedelta.WorkDayRule( 62 | start_time=business_hours[0], end_time=business_hours[1], working_days=workdays 63 | ) 64 | 65 | if busdays_only: 66 | logger.debug(f"Calculating workhours. Business hours: {business_hours}. {start_date}, {end_date}") 67 | bt = businesstimedelta.Rules([workday_rules, holiday_rules]) 68 | result = bt.difference(start_date, end_date).timedelta.total_seconds() 69 | else: 70 | logger.debug("Calculating regular hours") 71 | result = (end_date - start_date).total_seconds() 72 | 73 | return abs(int(result)) 74 | 75 | 76 | def fix_null_dates(data: dict) -> dict: 77 | """Clean keys with None values from dict.""" 78 | to_remove = [] 79 | 80 | for key, value in data.items(): 81 | if key in config.not_nullable_fields and (value is None or value == ""): 82 | to_remove.append(key) 83 | 84 | for key in to_remove: 85 | del data[key] 86 | 87 | return data 88 | 89 | 90 | # pylint: disable=R1710 91 | def validate_resource(resource: object, attribute: str, low: bool = True) -> Any | None: 92 | """Validate Yandex.Tracker object attribute and return it if exists.""" 93 | if hasattr(resource, attribute): 94 | _attr = getattr(resource, attribute) 95 | if isinstance(_attr, str): 96 | if low: 97 | return _attr.lower() 98 | return _attr 99 | return _attr 100 | 101 | 102 | def to_snake_case(text: str) -> str: 103 | """Convert any string to `snake_case` format.""" 104 | if text is None: 105 | return None 106 | if not isinstance(text, str): 107 | raise ValueError(f"Expected string, received: {type(text)}") 108 | if text.strip() == "": 109 | return text.strip() 110 | 111 | text = re.sub(r"(?<=[a-zа-яё])(?=[A-ZА-ЯЁ])", "_", text) 112 | text = re.sub(r"(?<=[a-zа-яё])(?=\d)", "_", text) 113 | text = re.sub(r"(?<=\d)(?=[a-zа-яё])", "_", text) 114 | text = re.sub(r"[^a-zA-Zа-яёА-ЯЁ0-9_]", "_", text) 115 | 116 | 117 | return text.lower() 118 | 119 | 120 | def convert_datetime( 121 | dtime: str, 122 | source_dt_format: str = config.datetime_response_format, 123 | output_format: str = config.datetime_clickhouse_format, 124 | date_only: bool = False, 125 | timezone: str = "UTC", 126 | ) -> DateTimeISO8601Str | DateStr: 127 | """ 128 | Returns ISO8601 datetime (UTC). 129 | Or date format `YYYY-MM-DD` from original datetime when date_only passed. 130 | """ 131 | logger.debug(f"Timezone set to {timezone}") 132 | if dtime is None: 133 | return None 134 | 135 | dt = datetime.strptime(dtime, source_dt_format) 136 | if dt.tzinfo is None: 137 | logger.debug("Replacing datetime tzinfo to UTC") 138 | dt = dt.replace(tzinfo=dt_timezone.utc) 139 | 140 | output_datetime = dt.astimezone(pytz.timezone(timezone)) 141 | if date_only: 142 | return output_datetime.date().strftime("%Y-%d-%m") 143 | 144 | if output_format.endswith("%f"): 145 | return output_datetime.strftime(output_format)[:-3] 146 | return output_datetime.strftime(output_format) 147 | 148 | 149 | def backoff( 150 | exceptions: _Sequence[Type[Exception]], 151 | base_delay: int | float = 0.5, 152 | expo_factor: int | float = 2.5, 153 | max_tries: int = 3, 154 | jitter: bool = False, 155 | ) -> Callable: 156 | """Decorator for backoff retry function/method calls.""" 157 | 158 | def retry_decorator(func: Callable): 159 | @wraps(func) 160 | def func_retry(*args, **kwargs): 161 | logger.debug(f"Start func {func.__qualname__} with {max_tries} tries") 162 | tries, delay = max_tries, base_delay 163 | counter = 0 164 | while tries > 0: 165 | try: 166 | counter += 1 167 | return func(*args, **kwargs) 168 | except exceptions as err: 169 | tries -= 1 170 | if tries == 0: 171 | logger.error(f"{func.__qualname__} has failed {counter} times") 172 | raise 173 | logger.warning( 174 | f"Error in func {func.__qualname__}, cause: {err}. " 175 | f"Retrying ({counter}/{max_tries - 1}) in {delay:.2f}s..." 176 | ) 177 | if jitter: 178 | delay = random.uniform(delay / 2, delay * expo_factor) # nosec 179 | time.sleep(delay) 180 | else: 181 | time.sleep(delay) 182 | delay *= expo_factor 183 | 184 | return func_retry 185 | 186 | return retry_decorator 187 | 188 | 189 | def to_human_time(seconds: Union[int, float], verbosity: int = 2) -> str: 190 | """Convert seconds to human readable timedelta like a `2w 3d 1h 20m`.""" 191 | seconds = int(seconds) 192 | if seconds == 0: 193 | return "0s" 194 | 195 | negative = False 196 | if seconds < 0: 197 | negative = True 198 | seconds = abs(seconds) 199 | 200 | result = [] 201 | intervals = ( 202 | ("y", 31104000), 203 | ("mo", 2592000), 204 | ("w", 604800), 205 | ("d", 86400), 206 | ("h", 3600), 207 | ("m", 60), 208 | ("s", 1), 209 | ) 210 | for name, count in intervals: 211 | value = seconds // count 212 | if value: 213 | seconds -= value * count 214 | result.append(f"{value}{name}") 215 | delta = " ".join(result[:verbosity]) 216 | return f"-{delta}" if negative else delta 217 | 218 | 219 | def from_human_time(timestr: str) -> int: 220 | """Convert a duration string like `2w 3d 1h 20m` to seconds.""" 221 | 222 | logger.debug(f"Received human time: {timestr}") 223 | total_seconds = 0 224 | patterns = [ 225 | (r"(\d+)y", 365 * 24 * 60 * 60), # years 226 | (r"(\d+)mo", 30 * 24 * 60 * 60), # months 227 | (r"(\d+)w", 7 * 24 * 60 * 60), # weeks 228 | (r"(\d+)d", 24 * 60 * 60), # days 229 | (r"(\d+)h", 60 * 60), # hours 230 | (r"(\d+)m", 60), # minutes 231 | (r"(\d+)s", 1), # seconds 232 | ] 233 | 234 | for pattern, multiplier in patterns: 235 | matches = re.search(pattern, timestr) 236 | if matches: 237 | total_seconds += int(matches.group(1)) * multiplier 238 | timestr = re.sub(pattern, "", timestr) 239 | 240 | timestr = timestr.strip() 241 | if timestr: 242 | raise ValueError(f"Invalid format detected in the string: '{timestr}'") 243 | 244 | return total_seconds 245 | 246 | 247 | def string_normalize(text: str) -> str: 248 | """Remove all incompatible symbols.""" 249 | emoji_pattern = re.compile( 250 | "[" 251 | "\U0001F600-\U0001F64F" # emoticons 252 | "\U0001F300-\U0001F5FF" # symbols & pictographs 253 | "\U0001F680-\U0001F6FF" # transport & map symbols 254 | "\U0001F1E0-\U0001F1FF" # flags (iOS) 255 | "]+", 256 | flags=re.UNICODE, 257 | ) 258 | return emoji_pattern.sub(r"", text) 259 | 260 | 261 | def extract_changelog_field(value: Any) -> Any: 262 | """Extractor for Yandex.Tracker issue changelog.""" 263 | match value: 264 | case list(): 265 | logger.debug(f"Changelog field is list: {value}") 266 | return ", ".join(extract_changelog_field(i) for i in value) 267 | case str(): 268 | logger.debug(f"Changelog field is string: {value}") 269 | try: 270 | dtime = convert_datetime(value) 271 | except Exception: 272 | if len(value) > 100: 273 | return "text too long, see history in UI" 274 | return value 275 | else: 276 | return dtime 277 | case dict(): 278 | logger.debug(f"Changelog field is dict, dumping: {value}") 279 | return json.dumps(value, ensure_ascii=False) 280 | case None: 281 | logger.debug(f"Changelog field is None, fixing: {value}") 282 | return "" 283 | case int(): 284 | logger.debug(f"Changelog field is integer: {value}") 285 | return str(value) 286 | case float(): 287 | logger.debug(f"Changelog field is float: {value}") 288 | return str(value) 289 | case Reference(): 290 | logger.debug(f"Changelog field is Reference to object: {value}. Extracting...") 291 | return ( 292 | validate_resource(value, "key", low=False) 293 | or validate_resource(value, "email") 294 | or validate_resource(value, "name", low=False) 295 | or validate_resource(value, "id", low=False) 296 | ) 297 | case _: 298 | logger.warning(f"Unknown type of changelog field received: {type(value)}: {value}") 299 | 300 | 301 | def bytes_to_human(data: int, granularity=2): 302 | """Convert bytes to human format with binary prefix.""" 303 | _bytes = int(data) 304 | result = [] 305 | sizes = ( # fmt: off 306 | ("TB", 1024**4), 307 | ("GB", 1024**3), 308 | ("MB", 1024**2), 309 | ("KB", 1024), 310 | ("B", 1), 311 | ) # fmt: on 312 | if _bytes == 0: 313 | return 0 314 | else: 315 | for name, count in sizes: 316 | value = _bytes // count 317 | if value: 318 | _bytes -= value * count 319 | result.append(f"{value}{name}") 320 | return ", ".join(result[:granularity]) 321 | 322 | 323 | def log_etl_stats(iteration: int, remaining: int, elapsed: float, entity: str = "issues"): # pragma: no cover 324 | """Logging resources usage.""" 325 | process = psutil.Process(os.getpid()) 326 | memory = process.memory_info() 327 | memory_rss_usage = bytes_to_human(memory.rss, granularity=1) 328 | elapsed_time = to_human_time(elapsed) 329 | 330 | try: 331 | avg_time = elapsed / iteration 332 | avg_task_transform = f"{avg_time:.2f}ms" if avg_time < 1 else to_human_time(avg_time) 333 | except ZeroDivisionError: 334 | avg_task_transform = "calculating.." 335 | 336 | logger.info( 337 | f"Processed {iteration} of ~{remaining} {entity}. Avg time per issue: {avg_task_transform}. " 338 | f"Elapsed time: {elapsed_time}. MEM_RSS_USED: {memory_rss_usage}" 339 | ) 340 | --------------------------------------------------------------------------------