├── .coveragerc
├── .dockerignore
├── .envrc
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yml
    │   ├── helm-release.yml
    │   ├── helm-test.yml
    │   └── push-image-ghcr.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .pylintrc
├── .python-version
├── Dockerfile
├── Dockerfile.pyinstaller
├── LICENSE.md
├── README.md
├── Taskfile.yml
├── buildInstaller.sh
├── celery-mixin
    ├── .gitignore
    ├── Makefile
    ├── README.md
    ├── alerts.jsonnet
    ├── alerts
    │   └── alerts.libsonnet
    ├── config.libsonnet
    ├── dashboards.jsonnet
    ├── dashboards
    │   ├── celery-tasks-by-task.libsonnet
    │   ├── celery-tasks-overview.libsonnet
    │   └── dashboards.libsonnet
    ├── dashboards_out
    │   ├── celery-tasks-by-task.json
    │   ├── celery-tasks-overview.json
    │   └── celery-tasks.json
    ├── jsonnetfile.json
    ├── mixin.libsonnet
    ├── prometheus-alerts.yaml
    └── tests.yaml
├── charts
    └── celery-exporter
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── README.md
    │   ├── ci
    │       └── test-values.yaml
    │   ├── templates
    │       ├── NOTES.txt
    │       ├── _helpers.tpl
    │       ├── deployment.yaml
    │       ├── ingress.yaml
    │       ├── service.yaml
    │       ├── serviceaccount.yaml
    │       ├── servicemonitor.yaml
    │       └── tests
    │       │   └── test-connection.yaml
    │   └── values.yaml
├── cli.py
├── conftest.py
├── docker-compose.yml
├── images
    ├── celery-tasks-by-task.png
    └── celery-tasks-overview.png
├── jsonnetfile.json
├── jsonnetfile.lock.json
├── poetry.lock
├── pyproject.toml
├── pytest.ini
├── src
    ├── __init__.py
    ├── cli.py
    ├── exporter.py
    ├── help.py
    ├── http_server.py
    ├── test_cli.py
    ├── test_exporter.py
    ├── test_http_server.py
    └── test_metrics.py
└── vendor
    ├── github.com
        └── honeylogic-io
        │   └── utils-libsonnet
        │       └── lib
        │           ├── celery.libsonnet
        │           ├── django.libsonnet
        │           ├── drone.libsonnet
        │           └── ingress.libsonnet
    └── lib


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source = .
 3 | 
 4 | omit =
 5 |     .venv/*
 6 |     .virtualenv/*
 7 | 
 8 | [report]
 9 | fail_under = 80
10 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | .virtualenv
 2 | .venv
 3 | .mypy_cache
 4 | .pytest_cache
 5 | .git
 6 | build
 7 | dist
 8 | images
 9 | __pycache__
10 | vendor
11 | 


--------------------------------------------------------------------------------
/.envrc:
--------------------------------------------------------------------------------
1 | layout pyenv $(cat .python-version)
2 | layout python
3 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | ---
2 | version: 2
3 | updates:
4 |   - package-ecosystem: "pip"
5 |     directory: "/"
6 |     schedule:
7 |       interval: "monthly"
8 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | on:
  3 |   push:
  4 |     branches:
  5 |       - master
  6 |   pull_request:
  7 |     branches:
  8 |       - master
  9 | 
 10 | jobs:
 11 |   lint:
 12 |     name: Lint
 13 |     runs-on: ubuntu-latest
 14 |     steps:
 15 |       - uses: actions/checkout@v4
 16 | 
 17 |       - uses: actions/setup-python@v5
 18 |         id: setup-python
 19 |         with:
 20 |           python-version: 3.13
 21 | 
 22 |       - name: Install Poetry
 23 |         uses: snok/install-poetry@v1
 24 |         with:
 25 |           virtualenvs-create: true
 26 |           virtualenvs-in-project: true
 27 |           installer-parallel: true
 28 | 
 29 |       - name: Load cached venv
 30 |         id: cached-poetry-dependencies
 31 |         uses: actions/cache@v4
 32 |         with:
 33 |           path: .venv
 34 |           key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
 35 | 
 36 |       - name: Install dependencies
 37 |         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
 38 |         run: |
 39 |           poetry install --no-interaction --no-root
 40 | 
 41 |       - name: Format
 42 |         run: |
 43 |           source .venv/bin/activate
 44 |           black . --check
 45 | 
 46 |       - name: Type Check
 47 |         run: |
 48 |           source .venv/bin/activate
 49 |           mypy .
 50 | 
 51 |       - name: Lint
 52 |         run: |
 53 |           source .venv/bin/activate
 54 |           pylint $(git ls-files -- '*.py' ':!:**/migrations/*.py')
 55 | 
 56 |   test:
 57 |     name: Test
 58 |     runs-on: ubuntu-latest
 59 |     services:
 60 |       redis:
 61 |         image: redis:6
 62 |         ports: ['6379:6379']
 63 |       rabbitmq:
 64 |         image: rabbitmq:3
 65 |         ports: ['5672:5672']
 66 |     strategy:
 67 |       matrix:
 68 |         broker: [memory, redis, rabbitmq]
 69 |     steps:
 70 |       - uses: actions/checkout@v4
 71 | 
 72 |       - uses: actions/setup-python@v5
 73 |         id: setup-python
 74 |         with:
 75 |           python-version: 3.13
 76 | 
 77 |       - name: Install Poetry
 78 |         uses: snok/install-poetry@v1
 79 |         with:
 80 |           virtualenvs-create: true
 81 |           virtualenvs-in-project: true
 82 |           installer-parallel: true
 83 | 
 84 |       - name: Load cached venv
 85 |         id: cached-poetry-dependencies
 86 |         uses: actions/cache@v4
 87 |         with:
 88 |           path: .venv
 89 |           key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
 90 | 
 91 |       - name: Install dependencies
 92 |         if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
 93 |         run: |
 94 |           poetry install --no-interaction --no-root
 95 |           source .venv/bin/activate
 96 | 
 97 |       - name: Test
 98 |         run: |
 99 |           source .venv/bin/activate
100 |           pytest --broker=${{ matrix.broker }} --ignore .poetry --cov
101 | 


--------------------------------------------------------------------------------
/.github/workflows/helm-release.yml:
--------------------------------------------------------------------------------
 1 | name: Release Charts
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   release:
10 |     # depending on default permission settings for your org (contents being read-only or read-write for workloads), you will have to add permissions
11 |     # see: https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
12 |     permissions:
13 |       contents: write
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Checkout
17 |         uses: actions/checkout@v4
18 |         with:
19 |           fetch-depth: 0
20 | 
21 |       - name: Configure Git
22 |         run: |
23 |           git config user.name "$GITHUB_ACTOR"
24 |           git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
25 | 
26 |       - name: Set up Helm
27 |         uses: azure/setup-helm@v4.2.0
28 |         with:
29 |           version: v3.14.4
30 | 
31 |       - name: Run chart-releaser
32 |         uses: helm/chart-releaser-action@v1.6.0
33 |         env:
34 |           CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
35 |           CR_RELEASE_NAME_TEMPLATE: "{{ .Name }}-chart-{{ .Version }}"
36 | 


--------------------------------------------------------------------------------
/.github/workflows/helm-test.yml:
--------------------------------------------------------------------------------
 1 | name: Lint and Test Charts
 2 | 
 3 | on: pull_request
 4 | 
 5 | jobs:
 6 |   lint-test:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - name: Checkout
10 |         uses: actions/checkout@v4
11 |         with:
12 |           fetch-depth: 0
13 | 
14 |       - name: Set up Helm
15 |         uses: azure/setup-helm@v4.2.0
16 |         with:
17 |           version: v3.14.4
18 | 
19 |       - uses: actions/setup-python@v5
20 |         with:
21 |           python-version: "3.x"
22 |           check-latest: true
23 | 
24 |       - name: Set up chart-testing
25 |         uses: helm/chart-testing-action@v2.6.1
26 | 
27 |       - name: Run chart-testing (list-changed)
28 |         id: list-changed
29 |         run: |
30 |           changed=$(ct list-changed --target-branch ${{ github.event.repository.default_branch }})
31 |           if [[ -n "$changed" ]]; then
32 |             echo "changed=true" >> "$GITHUB_OUTPUT"
33 |           fi
34 | 
35 |       - name: Run chart-testing (lint)
36 |         if: steps.list-changed.outputs.changed == 'true'
37 |         run: ct lint --target-branch ${{ github.event.repository.default_branch }}
38 | 
39 |       - name: Create kind cluster
40 |         if: steps.list-changed.outputs.changed == 'true'
41 |         uses: helm/kind-action@v1.10.0
42 | 
43 |       - name: Run chart-testing (install)
44 |         if: steps.list-changed.outputs.changed == 'true'
45 |         run: ct install --target-branch ${{ github.event.repository.default_branch }}
46 | 


--------------------------------------------------------------------------------
/.github/workflows/push-image-ghcr.yml:
--------------------------------------------------------------------------------
 1 | name: Create and publish a Docker image to ghcr.io
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "v[0-9]+.[0-9]+.[0-9]+"
 7 | 
 8 | env:
 9 |   REGISTRY: ghcr.io
10 |   IMAGE_NAME: ${{ github.repository }}
11 | 
12 | jobs:
13 |   build-and-push-image:
14 |     runs-on: ubuntu-latest
15 |     permissions:
16 |       contents: write
17 |       packages: write
18 | 
19 |     steps:
20 |       - name: Checkout repository
21 |         uses: actions/checkout@v4
22 | 
23 |       - name: Set up QEMU
24 |         uses: docker/setup-qemu-action@v3
25 | 
26 |       - name: Set up Docker Buildx
27 |         uses: docker/setup-buildx-action@v3
28 | 
29 |       - name: Log in to the Container registry
30 |         uses: docker/login-action@v3
31 |         with:
32 |           registry: ${{ env.REGISTRY }}
33 |           username: ${{ github.actor }}
34 |           password: ${{ secrets.GITHUB_TOKEN }}
35 | 
36 |       - name: Extract metadata (tags, labels) for Docker
37 |         id: meta
38 |         uses: docker/metadata-action@v5
39 |         with:
40 |           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
41 |           tags: |
42 |             type=sha,enable=true,priority=100,prefix=,suffix=,format=short
43 |             type=semver,pattern={{version}},value=${{ github.ref_name }}
44 | 
45 |       - name: Build and push
46 |         uses: docker/build-push-action@v6
47 |         with:
48 |           context: .
49 |           platforms: linux/amd64,linux/arm64
50 |           provenance: false
51 |           push: true
52 |           tags: ${{ steps.meta.outputs.tags }}
53 |           cache-from: type=gha
54 |           cache-to: type=gha,mode=max
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.toptal.com/developers/gitignore/api/python
  3 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  4 | 
  5 | ### Python ###
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | pip-wheel-metadata/
 29 | share/python-wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .nox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | *.py,cover
 56 | .hypothesis/
 57 | .pytest_cache/
 58 | pytestdebug.log
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | doc/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | pythonenv*
117 | 
118 | # Spyder project settings
119 | .spyderproject
120 | .spyproject
121 | 
122 | # Rope project settings
123 | .ropeproject
124 | 
125 | # mkdocs documentation
126 | /site
127 | 
128 | # mypy
129 | .mypy_cache/
130 | .dmypy.json
131 | dmypy.json
132 | 
133 | # Pyre type checker
134 | .pyre/
135 | 
136 | # pytype static type analyzer
137 | .pytype/
138 | 
139 | # profiling data
140 | .prof
141 | 
142 | # End of https://www.toptal.com/developers/gitignore/api/python
143 | 
144 | .virtualenv
145 | .direnv
146 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: local
 3 |     hooks:
 4 |       - id: black
 5 |         name: black
 6 |         entry: poetry
 7 |         language: system
 8 |         types: [python]
 9 |         args:
10 |           - run
11 |           - black
12 | 
13 |   - repo: local
14 |     hooks:
15 |       - id: mypy
16 |         name: mypy
17 |         entry: poetry
18 |         language: system
19 |         pass_filenames: false
20 |         args:
21 |           - run
22 |           - mypy
23 |           - .
24 | 
25 |   - repo: local
26 |     hooks:
27 |       - id: pylint
28 |         name: pylint
29 |         entry: poetry
30 |         language: system
31 |         types: [python]
32 |         args:
33 |           - run
34 |           - pylint
35 |           - "-rn" # Only display messages
36 |           - "-sn" # Don't display the score
37 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
  1 | [MASTER]
  2 | 
  3 | # A comma-separated list of package or module names from where C extensions may
  4 | # be loaded. Extensions are loading into the active Python interpreter and may
  5 | # run arbitrary code.
  6 | extension-pkg-whitelist=
  7 | 
  8 | # Specify a score threshold to be exceeded before program exits with error.
  9 | fail-under=10.0
 10 | 
 11 | # Add files or directories to the blacklist. They should be base names, not
 12 | # paths.
 13 | ignore=CVS
 14 | 
 15 | # Add files or directories matching the regex patterns to the blacklist. The
 16 | # regex matches against base names, not paths.
 17 | ignore-patterns=
 18 | 
 19 | # Python code to execute, usually for sys.path manipulation such as
 20 | # pygtk.require().
 21 | #init-hook=
 22 | 
 23 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
 24 | # number of processors available to use.
 25 | jobs=1
 26 | 
 27 | # Control the amount of potential inferred values when inferring a single
 28 | # object. This can help the performance when dealing with large functions or
 29 | # complex, nested conditions.
 30 | limit-inference-results=100
 31 | 
 32 | # List of plugins (as comma separated values of python module names) to load,
 33 | # usually to register additional checkers.
 34 | load-plugins=
 35 | 
 36 | # Pickle collected data for later comparisons.
 37 | persistent=yes
 38 | 
 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit
 40 | # user-friendly hints instead of false-positive error messages.
 41 | suggestion-mode=yes
 42 | 
 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the
 44 | # active Python interpreter and may run arbitrary code.
 45 | unsafe-load-any-extension=no
 46 | 
 47 | 
 48 | [MESSAGES CONTROL]
 49 | 
 50 | # Only show warnings with the listed confidence levels. Leave empty to show
 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
 52 | confidence=
 53 | 
 54 | # Disable the message, report, category or checker with the given id(s). You
 55 | # can either give multiple identifiers separated by comma (,) or put this
 56 | # option multiple times (only on the command line, not in the configuration
 57 | # file where it should appear only once). You can also use "--disable=all" to
 58 | # disable everything first and then reenable specific checks. For example, if
 59 | # you want to run only the similarities checker, you can use "--disable=all
 60 | # --enable=similarities". If you want to run only the classes checker, but have
 61 | # no Warning level messages displayed, use "--disable=all --enable=classes
 62 | # --disable=W".
 63 | disable=
 64 |         missing-function-docstring,
 65 |         missing-module-docstring,
 66 |         invalid-name,
 67 |         redefined-outer-name,
 68 |         missing-class-docstring,
 69 |         fixme,
 70 |         unnecessary-lambda-assignment,
 71 |         use-dict-literal
 72 | 
 73 | # Enable the message, report, category or checker with the given id(s). You can
 74 | # either give multiple identifier separated by comma (,) or put this option
 75 | # multiple time (only on the command line, not in the configuration file where
 76 | # it should appear only once). See also the "--disable" option for examples.
 77 | enable=c-extension-no-member
 78 | 
 79 | 
 80 | [REPORTS]
 81 | 
 82 | # Python expression which should return a score less than or equal to 10. You
 83 | # have access to the variables 'error', 'warning', 'refactor', and 'convention'
 84 | # which contain the number of messages in each category, as well as 'statement'
 85 | # which is the total number of statements analyzed. This score is used by the
 86 | # global evaluation report (RP0004).
 87 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
 88 | 
 89 | # Template used to display messages. This is a python new-style format string
 90 | # used to format the message information. See doc for all details.
 91 | #msg-template=
 92 | 
 93 | # Set the output format. Available formats are text, parseable, colorized, json
 94 | # and msvs (visual studio). You can also give a reporter class, e.g.
 95 | # mypackage.mymodule.MyReporterClass.
 96 | output-format=text
 97 | 
 98 | # Tells whether to display a full report or only the messages.
 99 | reports=no
100 | 
101 | # Activate the evaluation score.
102 | score=yes
103 | 
104 | 
105 | [REFACTORING]
106 | 
107 | # Maximum number of nested blocks for function / method body
108 | max-nested-blocks=5
109 | 
110 | # Complete name of functions that never returns. When checking for
111 | # inconsistent-return-statements if a never returning function is called then
112 | # it will be considered as an explicit return statement and no message will be
113 | # printed.
114 | never-returning-functions=sys.exit
115 | 
116 | 
117 | [TYPECHECK]
118 | 
119 | # List of decorators that produce context managers, such as
120 | # contextlib.contextmanager. Add to this list to register other decorators that
121 | # produce valid context managers.
122 | contextmanager-decorators=contextlib.contextmanager
123 | 
124 | # List of members which are set dynamically and missed by pylint inference
125 | # system, and so shouldn't trigger E1101 when accessed. Python regular
126 | # expressions are accepted.
127 | generated-members=
128 | 
129 | # Tells whether missing members accessed in mixin class should be ignored. A
130 | # mixin class is detected if its name ends with "mixin" (case insensitive).
131 | ignore-mixin-members=yes
132 | 
133 | # Tells whether to warn about missing members when the owner of the attribute
134 | # is inferred to be None.
135 | ignore-none=yes
136 | 
137 | # This flag controls whether pylint should warn about no-member and similar
138 | # checks whenever an opaque object is returned when inferring. The inference
139 | # can return multiple potential results while evaluating a Python object, but
140 | # some branches might not be evaluated, which results in partial inference. In
141 | # that case, it might be useful to still emit no-member and other checks for
142 | # the rest of the inferred objects.
143 | ignore-on-opaque-inference=yes
144 | 
145 | # List of class names for which member attributes should not be checked (useful
146 | # for classes with dynamically set attributes). This supports the use of
147 | # qualified names.
148 | ignored-classes=optparse.Values,thread._local,_thread._local
149 | 
150 | # List of module names for which member attributes should not be checked
151 | # (useful for modules/projects where namespaces are manipulated during runtime
152 | # and thus existing member attributes cannot be deduced by static analysis). It
153 | # supports qualified module names, as well as Unix pattern matching.
154 | ignored-modules=
155 | 
156 | # Show a hint with possible names when a member name was not found. The aspect
157 | # of finding the hint is based on edit distance.
158 | missing-member-hint=yes
159 | 
160 | # The minimum edit distance a name should have in order to be considered a
161 | # similar match for a missing member name.
162 | missing-member-hint-distance=1
163 | 
164 | # The total number of similar names that should be taken in consideration when
165 | # showing a hint for a missing member.
166 | missing-member-max-choices=1
167 | 
168 | # List of decorators that change the signature of a decorated function.
169 | signature-mutators=
170 | 
171 | 
172 | [VARIABLES]
173 | 
174 | # List of additional names supposed to be defined in builtins. Remember that
175 | # you should avoid defining new builtins when possible.
176 | additional-builtins=
177 | 
178 | # Tells whether unused global variables should be treated as a violation.
179 | allow-global-unused-variables=yes
180 | 
181 | # List of strings which can identify a callback function by name. A callback
182 | # name must start or end with one of those strings.
183 | callbacks=cb_,
184 |           _cb
185 | 
186 | # A regular expression matching the name of dummy variables (i.e. expected to
187 | # not be used).
188 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
189 | 
190 | # Argument names that match this expression will be ignored. Default to name
191 | # with leading underscore.
192 | ignored-argument-names=_.*|^ignored_|^unused_
193 | 
194 | # Tells whether we should check for unused import in __init__ files.
195 | init-import=no
196 | 
197 | # List of qualified module names which can have objects that can redefine
198 | # builtins.
199 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
200 | 
201 | 
202 | [BASIC]
203 | 
204 | # Naming style matching correct argument names.
205 | argument-naming-style=snake_case
206 | 
207 | # Regular expression matching correct argument names. Overrides argument-
208 | # naming-style.
209 | #argument-rgx=
210 | 
211 | # Naming style matching correct attribute names.
212 | attr-naming-style=snake_case
213 | 
214 | # Regular expression matching correct attribute names. Overrides attr-naming-
215 | # style.
216 | #attr-rgx=
217 | 
218 | # Bad variable names which should always be refused, separated by a comma.
219 | bad-names=foo,
220 |           bar,
221 |           baz,
222 |           toto,
223 |           tutu,
224 |           tata
225 | 
226 | # Bad variable names regexes, separated by a comma. If names match any regex,
227 | # they will always be refused
228 | bad-names-rgxs=
229 | 
230 | # Naming style matching correct class attribute names.
231 | class-attribute-naming-style=any
232 | 
233 | # Regular expression matching correct class attribute names. Overrides class-
234 | # attribute-naming-style.
235 | #class-attribute-rgx=
236 | 
237 | # Naming style matching correct class names.
238 | class-naming-style=PascalCase
239 | 
240 | # Regular expression matching correct class names. Overrides class-naming-
241 | # style.
242 | #class-rgx=
243 | 
244 | # Naming style matching correct constant names.
245 | const-naming-style=UPPER_CASE
246 | 
247 | # Regular expression matching correct constant names. Overrides const-naming-
248 | # style.
249 | #const-rgx=
250 | 
251 | # Minimum line length for functions/classes that require docstrings, shorter
252 | # ones are exempt.
253 | docstring-min-length=-1
254 | 
255 | # Naming style matching correct function names.
256 | function-naming-style=snake_case
257 | 
258 | # Regular expression matching correct function names. Overrides function-
259 | # naming-style.
260 | #function-rgx=
261 | 
262 | # Good variable names which should always be accepted, separated by a comma.
263 | good-names=i,
264 |            j,
265 |            k,
266 |            ex,
267 |            Run,
268 |            _
269 | 
270 | # Good variable names regexes, separated by a comma. If names match any regex,
271 | # they will always be accepted
272 | good-names-rgxs=
273 | 
274 | # Include a hint for the correct naming format with invalid-name.
275 | include-naming-hint=no
276 | 
277 | # Naming style matching correct inline iteration names.
278 | inlinevar-naming-style=any
279 | 
280 | # Regular expression matching correct inline iteration names. Overrides
281 | # inlinevar-naming-style.
282 | #inlinevar-rgx=
283 | 
284 | # Naming style matching correct method names.
285 | method-naming-style=snake_case
286 | 
287 | # Regular expression matching correct method names. Overrides method-naming-
288 | # style.
289 | #method-rgx=
290 | 
291 | # Naming style matching correct module names.
292 | module-naming-style=snake_case
293 | 
294 | # Regular expression matching correct module names. Overrides module-naming-
295 | # style.
296 | #module-rgx=
297 | 
298 | # Colon-delimited sets of names that determine each other's naming style when
299 | # the name regexes allow several styles.
300 | name-group=
301 | 
302 | # Regular expression which should only match function or class names that do
303 | # not require a docstring.
304 | no-docstring-rgx=^_
305 | 
306 | # List of decorators that produce properties, such as abc.abstractproperty. Add
307 | # to this list to register other decorators that produce valid properties.
308 | # These decorators are taken in consideration only for invalid-name.
309 | property-classes=abc.abstractproperty
310 | 
311 | # Naming style matching correct variable names.
312 | variable-naming-style=snake_case
313 | 
314 | # Regular expression matching correct variable names. Overrides variable-
315 | # naming-style.
316 | #variable-rgx=
317 | 
318 | 
319 | [SPELLING]
320 | 
321 | # Limits count of emitted suggestions for spelling mistakes.
322 | max-spelling-suggestions=4
323 | 
324 | # Spelling dictionary name. Available dictionaries: none. To make it work,
325 | # install the python-enchant package.
326 | spelling-dict=
327 | 
328 | # List of comma separated words that should not be checked.
329 | spelling-ignore-words=
330 | 
331 | # A path to a file that contains the private dictionary; one word per line.
332 | spelling-private-dict-file=
333 | 
334 | # Tells whether to store unknown words to the private dictionary (see the
335 | # --spelling-private-dict-file option) instead of raising a message.
336 | spelling-store-unknown-words=no
337 | 
338 | 
339 | [MISCELLANEOUS]
340 | 
341 | # List of note tags to take in consideration, separated by a comma.
342 | notes=FIXME,
343 |       XXX,
344 |       TODO
345 | 
346 | # Regular expression of note tags to take in consideration.
347 | #notes-rgx=
348 | 
349 | 
350 | [STRING]
351 | 
352 | # This flag controls whether inconsistent-quotes generates a warning when the
353 | # character used as a quote delimiter is used inconsistently within a module.
354 | check-quote-consistency=no
355 | 
356 | # This flag controls whether the implicit-str-concat should generate a warning
357 | # on implicit string concatenation in sequences defined over several lines.
358 | check-str-concat-over-line-jumps=no
359 | 
360 | 
361 | [LOGGING]
362 | 
363 | # The type of string formatting that logging methods do. `old` means using %
364 | # formatting, `new` is for `{}` formatting.
365 | logging-format-style=old
366 | 
367 | # Logging modules to check that the string format arguments are in logging
368 | # function parameter format.
369 | logging-modules=logging
370 | 
371 | 
372 | [FORMAT]
373 | 
374 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
375 | expected-line-ending-format=
376 | 
377 | # Regexp for a line that is allowed to be longer than the limit.
378 | ignore-long-lines=^\s*(# )?<?https?://\S+>?$
379 | 
380 | # Number of spaces of indent required inside a hanging or continued line.
381 | indent-after-paren=4
382 | 
383 | # String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
384 | # tab).
385 | indent-string='    '
386 | 
387 | # Maximum number of characters on a single line.
388 | max-line-length=100
389 | 
390 | # Maximum number of lines in a module.
391 | max-module-lines=1000
392 | 
393 | # Allow the body of a class to be on the same line as the declaration if body
394 | # contains single statement.
395 | single-line-class-stmt=no
396 | 
397 | # Allow the body of an if to be on the same line as the test if there is no
398 | # else.
399 | single-line-if-stmt=no
400 | 
401 | 
402 | [SIMILARITIES]
403 | 
404 | # Ignore comments when computing similarities.
405 | ignore-comments=yes
406 | 
407 | # Ignore docstrings when computing similarities.
408 | ignore-docstrings=yes
409 | 
410 | # Ignore imports when computing similarities.
411 | ignore-imports=no
412 | 
413 | # Minimum lines number of a similarity.
414 | min-similarity-lines=5
415 | 
416 | 
417 | [DESIGN]
418 | 
419 | # Maximum number of arguments for function / method.
420 | max-args=5
421 | 
422 | # Maximum number of attributes for a class (see R0902).
423 | max-attributes=7
424 | 
425 | # Maximum number of boolean expressions in an if statement (see R0916).
426 | max-bool-expr=5
427 | 
428 | # Maximum number of branch for function / method body.
429 | max-branches=12
430 | 
431 | # Maximum number of locals for function / method body.
432 | max-locals=15
433 | 
434 | # Maximum number of parents for a class (see R0901).
435 | max-parents=7
436 | 
437 | # Maximum number of public methods for a class (see R0904).
438 | max-public-methods=20
439 | 
440 | # Maximum number of return / yield for function / method body.
441 | max-returns=6
442 | 
443 | # Maximum number of statements in function / method body.
444 | max-statements=50
445 | 
446 | # Minimum number of public methods for a class (see R0903).
447 | min-public-methods=2
448 | 
449 | 
450 | [IMPORTS]
451 | 
452 | # List of modules that can be imported at any level, not just the top level
453 | # one.
454 | allow-any-import-level=
455 | 
456 | # Allow wildcard imports from modules that define __all__.
457 | allow-wildcard-with-all=no
458 | 
459 | # Analyse import fallback blocks. This can be used to support both Python 2 and
460 | # 3 compatible code, which means that the block might have code that exists
461 | # only in one or another interpreter, leading to false positives when analysed.
462 | analyse-fallback-blocks=no
463 | 
464 | # Deprecated modules which should not be used, separated by a comma.
465 | deprecated-modules=optparse,tkinter.tix
466 | 
467 | # Create a graph of external dependencies in the given file (report RP0402 must
468 | # not be disabled).
469 | ext-import-graph=
470 | 
471 | # Create a graph of every (i.e. internal and external) dependencies in the
472 | # given file (report RP0402 must not be disabled).
473 | import-graph=
474 | 
475 | # Create a graph of internal dependencies in the given file (report RP0402 must
476 | # not be disabled).
477 | int-import-graph=
478 | 
479 | # Force import order to recognize a module as part of the standard
480 | # compatibility libraries.
481 | known-standard-library=
482 | 
483 | # Force import order to recognize a module as part of a third party library.
484 | known-third-party=enchant
485 | 
486 | # Couples of modules and preferred modules, separated by a comma.
487 | preferred-modules=
488 | 
489 | 
490 | [CLASSES]
491 | 
492 | # List of method names used to declare (i.e. assign) instance attributes.
493 | defining-attr-methods=__init__,
494 |                       __new__,
495 |                       setUp,
496 |                       __post_init__
497 | 
498 | # List of member names, which should be excluded from the protected access
499 | # warning.
500 | exclude-protected=_asdict,
501 |                   _fields,
502 |                   _replace,
503 |                   _source,
504 |                   _make
505 | 
506 | # List of valid names for the first argument in a class method.
507 | valid-classmethod-first-arg=cls
508 | 
509 | # List of valid names for the first argument in a metaclass class method.
510 | valid-metaclass-classmethod-first-arg=cls
511 | 
512 | 
513 | [EXCEPTIONS]
514 | 
515 | # Exceptions that will emit a warning when being caught. Defaults to
516 | # "BaseException, Exception".
517 | overgeneral-exceptions=builtins.BaseException,
518 |                        builtins.Exception
519 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.13.3
2 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Stage 1: Build
 2 | FROM python:3.13-slim-bookworm as builder
 3 | 
 4 | ENV PYTHONUNBUFFERED=1 \
 5 |     POETRY_NO_INTERACTION=1 \
 6 |     POETRY_VIRTUALENVS_IN_PROJECT=1 \
 7 |     POETRY_VIRTUALENVS_CREATE=1 \
 8 |     POETRY_CACHE_DIR=/tmp/poetry_cache
 9 | 
10 | WORKDIR /app/
11 | COPY pyproject.toml poetry.lock /app/
12 | RUN apt-get update && \
13 |     apt-get -y dist-upgrade && \
14 |     apt install -y locales libcurl4-openssl-dev libssl-dev build-essential && \
15 |     apt-get clean && \
16 |     rm -rf /var/lib/apt/lists/* && \
17 |     pip install -U pip poetry && \
18 |     poetry install --without dev --no-root && \
19 |     rm -rf $POETRY_CACHE_DIR
20 | 
21 | # Stage 2: Runtime environment
22 | FROM python:3.13-slim-bookworm
23 | 
24 | ENV PYTHONUNBUFFERED=1 \
25 |     VIRTUAL_ENV=/app/.venv \
26 |     PATH="/app/.venv/bin:$PATH"
27 | 
28 | COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
29 | COPY . /app/
30 | 
31 | EXPOSE 9808
32 | 
33 | RUN adduser --disabled-login exporter
34 | 
35 | USER exporter
36 | 
37 | ENTRYPOINT ["python", "/app/cli.py"]
38 | 


--------------------------------------------------------------------------------
/Dockerfile.pyinstaller:
--------------------------------------------------------------------------------
 1 | FROM danihodovic/pyinstaller-builder:latest
 2 | 
 3 | ARG PYTHON_VERSION=3.12
 4 | 
 5 | RUN pyenv install $PYTHON_VERSION && pyenv global $PYTHON_VERSION
 6 | 
 7 | RUN pip install poetry
 8 | 
 9 | WORKDIR /app/
10 | 
11 | COPY ./pyproject.toml ./poetry.lock /app/
12 | 
13 | RUN poetry install
14 | 
15 | COPY . /app/
16 | 
17 | RUN eval "$(pyenv init -)" && pyinstaller cli.py -y --onefile --name celery-exporter \
18 |         --hidden-import=celery.fixups \
19 |         --hidden-import=celery.fixups.django \
20 |         --hidden-import=celery.app.events \
21 |         --hidden-import=celery.loaders.app \
22 |         --hidden-import=celery.app.amqp \
23 |         --hidden-import=celery.app.control \
24 |         --hidden-import=kombu.transport.redis \
25 |         --hidden-import=kombu.transport.pyamqp
26 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Dani Hodovic
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # celery-exporter ![Build Status](https://github.com/danihodovic/celery-exporter/actions/workflows/.github/workflows/ci.yml/badge.svg) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
  2 | 
  3 | ![celery-tasks-by-task](images/celery-tasks-by-task.png)
  4 | 
  5 | ##### Table of Contents
  6 | 
  7 | * [Why another exporter?](#why-another-exporter)
  8 |   * [Features](#features)
  9 | * [Usage](#usage)
 10 |   * [Enable events using the CLI](#enable-events-using-the-cli)
 11 |   * [Running the exporter](#running-the-exporter)
 12 | * [Metrics](#metrics)
 13 | * [Development](#development)
 14 | * [Contributors](#contributors)
 15 | 
 16 | ### Why another exporter?
 17 | 
 18 | While I was adding Celery monitoring to a client site I realized that the
 19 | existing brokers either didn't work, exposed incorrect metric values or didn't
 20 | expose the metrics I needed. So I wrote this exporter which essentially wraps
 21 | the built-in Celery monitoring API and exposes all of the event metrics to
 22 | Prometheus in real-time.
 23 | 
 24 | ## Features
 25 | 
 26 | - Tested for both Redis and RabbitMQ
 27 | - Uses the built in [real-time monitoring component in Celery](https://docs.celeryproject.org/en/latest/userguide/monitoring.html#real-time-processing) to expose Prometheus metrics
 28 | - Tracks task status (task-started, task-succeeded, task-failed etc)
 29 | - Tracks which workers are running and the number of active tasks
 30 | - Follows the Prometheus exporter [best practises](https://prometheus.io/docs/instrumenting/writing_exporters/)
 31 | - Deployed as a Docker image or Python single-file binary (via PyInstaller)
 32 | - Exposes a health check endpoint at /health
 33 | - Grafana dashboards provided by the Celery-mixin
 34 | - Prometheus alerts provided by the Celery-mixin
 35 | 
 36 | ## Dashboards and alerts
 37 | 
 38 | Alerting rules can be found [here](./celery-mixin/prometheus-alerts.yaml). By
 39 | default we alert if:
 40 | 
 41 | - A task failed in the last 10 minutes.
 42 | - No Celery workers are online.
 43 | 
 44 | Tweak these to suit your use-case.
 45 | 
 46 | The Grafana dashboard (seen in the image above) is
 47 | [here](https://grafana.com/grafana/dashboards/17508). You can import it
 48 | directly into your Grafana instance.
 49 | 
 50 | There's another Grafana dashboards that shows an overview of Celery tasks. An image can be found in `./images/celery-tasks-overview.png`. It can also be found
 51 | [here](https://grafana.com/grafana/dashboards/17509).
 52 | 
 53 | ## Usage
 54 | 
 55 | Celery needs to be configured to send events to the broker which the exporter
 56 | will collect. You can either enable this via Celery configuration or via the
 57 | Celery CLI.
 58 | 
 59 | ##### Enable events using the CLI
 60 | 
 61 | To enable events in the CLI run the below command. Note that by default it
 62 | doesn't send the `task-sent` event which needs to be [configured](https://docs.celeryproject.org/en/latest/userguide/configuration.html#std-setting-task_send_sent_event) in the
 63 | configuration. The other events work out of the box.
 64 | 
 65 | ```sh
 66 | $ celery -A <myproject> control enable_events
 67 | ```
 68 | 
 69 | **Enable events using the configuration:**
 70 | 
 71 | ```python
 72 | # In celeryconfig.py
 73 | worker_send_task_events = True
 74 | task_send_sent_event = True
 75 | ```
 76 | 
 77 | **Configuration in Django:**
 78 | ```python
 79 | # In settings.py
 80 | CELERY_WORKER_SEND_TASK_EVENTS = True
 81 | CELERY_TASK_SEND_SENT_EVENT = True
 82 | ```
 83 | 
 84 | ##### Running the exporter
 85 | 
 86 | Using Docker:
 87 | 
 88 | ```sh
 89 | docker run -p 9808:9808 danihodovic/celery-exporter --broker-url=redis://redis.service.consul/1
 90 | ```
 91 | 
 92 | Using the Python binary (for-non Docker environments):
 93 | ```sh
 94 | curl -L https://github.com/danihodovic/celery-exporter/releases/download/latest/celery-exporter -o ./celery-exporter
 95 | chmod +x ./celery-exporter
 96 | ./celery-exporter --broker-url=redis://redis.service.consul/1
 97 | ```
 98 | 
 99 | ###### Kubernetes
100 | 
101 | There's a Helm in the directory `charts/celery-exporter` for deploying the Celery-exporter to Kubernetes using Helm.
102 | 
103 | ###### Environment variables
104 | 
105 | All arguments can be specified using environment variables with a `CE_` prefix:
106 | 
107 | ```sh
108 | docker run -p 9808:9808 -e CE_BROKER_URL=redis://redis danihodovic/celery-exporter
109 | ```
110 | 
111 | ###### Specifying optional broker transport options
112 | 
113 | While the default options may be fine for most cases,
114 | there may be a need to specify optional broker transport options. This can be done by specifying
115 | one or more --broker-transport-option parameters as follows:
116 | 
117 | ```sh
118 | docker run -p 9808:9808 danihodovic/celery-exporter --broker-url=redis://redis.service.consul/1 \
119 |   --broker-transport-option global_keyprefix=danihodovic \
120 |   --broker-transport-option visibility_timeout=7200
121 | ```
122 | 
123 | In case of extended transport options, such as `sentinel_kwargs` you can pass JSON string:,
124 | for example:
125 | 
126 | ```sh
127 | docker run -p 9808:9808 danihodovic/celery-exporter --broker-url=sentinel://sentinel.service.consul/1 \
128 |   --broker-transport-option master_name=my_master \
129 |   --broker-transport-option sentinel_kwargs="{\"password\": \"sentinelpass\"}"
130 | ```
131 | 
132 | The list of available broker transport options can be found here:
133 | https://docs.celeryq.dev/projects/kombu/en/stable/reference/kombu.transport.redis.html
134 | 
135 | ###### Specifying an optional retry interval
136 | 
137 | By default, celery-exporter will raise an exception and exit if there
138 | are any errors communicating with the broker. If preferred, one can
139 | have the celery-exporter retry connecting to the broker after a certain
140 | period of time in seconds via the `--retry-interval` parameter as follows:
141 | 
142 | ```sh
143 | docker run -p 9808:9808 danihodovic/celery-exporter --broker-url=redis://redis.service.consul/1 \
144 |   --retry-interval=5
145 | ```
146 | 
147 | ##### Test for prometheus scrape target
148 | ```sh
149 | curl 127.0.0.1:9808/metrics
150 | ```
151 | 
152 | ##### Grafana Dashboards & Prometheus Alerts
153 | 
154 | Head over to the [Celery-mixin in this subdirectory](https://github.com/danihodovic/celery-exporter/tree/master/celery-mixin) to generate rules and dashboards suited to your Prometheus setup.
155 | 
156 | ### Metrics
157 | Name     | Description | Type
158 | ---------|-------------|----
159 | celery_task_sent_total | Sent when a task message is published. | Counter
160 | celery_task_received_total | Sent when the worker receives a task. | Counter
161 | celery_task_started_total | Sent just before the worker executes the task. | Counter
162 | celery_task_succeeded_total | Sent if the task executed successfully. | Counter
163 | celery_task_failed_total | Sent if the execution of the task failed. | Counter
164 | celery_task_rejected_total | The task was rejected by the worker, possibly to be re-queued or moved to a dead letter queue. | Counter
165 | celery_task_revoked_total | Sent if the task has been revoked. | Counter
166 | celery_task_retried_total | Sent if the task failed, but will be retried in the future. | Counter
167 | celery_worker_up | Indicates if a worker has recently sent a heartbeat. | Gauge
168 | celery_worker_tasks_active | The number of tasks the worker is currently processing | Gauge
169 | celery_task_runtime_bucket | Histogram of runtime measurements for each task | Histogram
170 | celery_queue_length | The number of message in broker queue | Gauge
171 | celery_active_consumer_count | The number of active consumer in broker queue **(Only work for [RabbitMQ and Qpid](https://qpid.apache.org/) broker, more details at [here](https://github.com/danihodovic/celery-exporter/pull/118#issuecomment-1169870481))** | Gauge
172 | celery_active_worker_count | The number of active workers in broker queue | Gauge
173 | celery_active_process_count | The number of active process in broker queue. Each worker may have more than one process. | Gauge
174 | 
175 | Used in production at [https://findwork.dev](https://findwork.dev) and [https://django.wtf](https://django.wtf).
176 | 
177 | 
178 | ## Development
179 | Pull requests are welcome here!
180 | 
181 | To start developing run commands below to prepare your environment after the `git clone` command:
182 | ```shell
183 | # Install dependencies and pre-commit hooks
184 | poetry install
185 | pre-commit install
186 | 
187 | # Test everything works fine
188 | pre-commit run --all-files
189 | docker-compose up -d
190 | pytest --broker=memory      --log-level=DEBUG
191 | pytest --broker=redis       --log-level=DEBUG
192 | pytest --broker=rabbitmq    --log-level=DEBUG
193 | ```
194 | 
195 | ## Contributors
196 | 
197 | <a href="https://github.com/danihodovic/celery-exporter/graphs/contributors">
198 |   <img src="https://contrib.rocks/image?repo=danihodovic/celery-exporter" />
199 | </a>
200 | 
201 | Made with [contrib.rocks](https://contrib.rocks).
202 | 


--------------------------------------------------------------------------------
/Taskfile.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # yamllint disable rule:line-length
 3 | version: '3'
 4 | 
 5 | tasks:
 6 |   build-image:
 7 |     desc: Builds a docker image
 8 |     cmds:
 9 |       - docker build . -t danihodovic/celery-exporter
10 | 
11 |   trivy-scan:
12 |     desc: Scans the docker image for vulnerabilities
13 |     cmds:
14 |       - trivy image --severity CRITICAL,HIGH --ignore-unfixed danihodovic/celery-exporter:latest
15 | 
16 |   build-binary:
17 |     desc: Creates a binary
18 |     cmds:
19 |       - docker build . -t celery-exporter-builder -f Dockerfile.pyinstaller --build-arg PYTHON_VERSION=$(cat .python-version)
20 |       - >
21 |         container=$(docker run --rm -d celery-exporter-builder sleep 5) &&
22 |         docker cp $container:/app/dist/celery-exporter celery-exporter
23 | 
24 |   release:
25 |     desc: Creates a Github release
26 |     deps: [build-binary]
27 |     cmds:
28 |       - git tag --delete latest
29 |       - git tag -a latest -m 'Latest build'
30 |       - >
31 |         github-release delete
32 |         --user danihodovic
33 |         --repo celery-exporter
34 |         --tag latest
35 |       - >
36 |         github-release release
37 |         --user danihodovic
38 |         --repo celery-exporter
39 |         --tag latest
40 |         --name celery-exporter
41 |         --description "Celery exporter for Prometheus"
42 |       - >
43 |         github-release upload
44 |         --user danihodovic
45 |         --repo celery-exporter
46 |         --tag latest
47 |         --name celery-exporter
48 |         --file ./celery-exporter
49 | 


--------------------------------------------------------------------------------
/buildInstaller.sh:
--------------------------------------------------------------------------------
1 | docker build -f Dockerfile.pyinstaller . -t cel-ex-builder
2 | docker rm celex -f
3 | docker run --name celex -d cel-ex-builder
4 | rm celery-exporter
5 | docker cp celex:/app/dist/celery-exporter .


--------------------------------------------------------------------------------
/celery-mixin/.gitignore:
--------------------------------------------------------------------------------
1 | vendor
2 | jsonnetfile.lock.json
3 | 


--------------------------------------------------------------------------------
/celery-mixin/Makefile:
--------------------------------------------------------------------------------
 1 | JSONNET_FMT := jsonnetfmt -n 2 --max-blank-lines 2 --string-style s --comment-style s
 2 | 
 3 | all: fmt prometheus-alerts.yaml dashboards_out lint
 4 | 
 5 | fmt:
 6 | 	find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
 7 | 		xargs -n 1 -- $(JSONNET_FMT) -i
 8 | 
 9 | prometheus-alerts.yaml: mixin.libsonnet config.libsonnet $(wildcard alerts/*)
10 | 	jsonnet -S alerts.jsonnet > $@
11 | 
12 | dashboards_out: mixin.libsonnet config.libsonnet $(wildcard dashboards/*)
13 | 	@mkdir -p dashboards_out
14 | 	jsonnet -J vendor -m dashboards_out dashboards.jsonnet
15 | 
16 | lint: prometheus-alerts.yaml
17 | 	find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
18 | 		while read f; do \
19 | 			$(JSONNET_FMT) "$$f" | diff -u "$$f" -; \
20 | 		done
21 | 
22 | 	promtool check rules prometheus-alerts.yaml
23 | 
24 | test: prometheus-alerts.yaml
25 | 	promtool test rules tests.yaml
26 | 
27 | clean:
28 | 	rm -rf dashboards_out prometheus-alerts.yaml
29 | 


--------------------------------------------------------------------------------
/celery-mixin/README.md:
--------------------------------------------------------------------------------
 1 | # Prometheus Monitoring Mixin for Celery
 2 | 
 3 | A set of Grafana dashboards and Prometheus alerts for Celery.
 4 | 
 5 | ## How to use
 6 | 
 7 | This mixin is designed to be vendored into the repo with your infrastructure config.
 8 | To do this, use [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler):
 9 | 
10 | You then have three options for deploying your dashboards
11 | 
12 | 1. Generate the config files and deploy them yourself
13 | 2. Use jsonnet to deploy this mixin along with Prometheus and Grafana
14 | 3. Use prometheus-operator to deploy this mixin
15 | 
16 | ## Generate config files
17 | 
18 | You can manually generate the alerts, dashboards and rules files, but first you
19 | must install some tools:
20 | 
21 | ```sh
22 | go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
23 | brew install jsonnet
24 | ```
25 | 
26 | Then, grab the mixin and its dependencies:
27 | 
28 | ```sh
29 | git clone https://github.com/danihodovic/celery-exporter
30 | cd celery-exporter/celery-mixin
31 | jb install
32 | ```
33 | 
34 | Finally, build the mixin:
35 | 
36 | ```sh
37 | make prometheus-alerts.yaml
38 | make dashboards_out
39 | ```
40 | 
41 | The `prometheus-alerts.yaml` file then need to passed
42 | to your Prometheus server, and the files in `dashboards_out` need to be imported
43 | into you Grafana server.  The exact details will depending on how you deploy your
44 | monitoring stack.
45 | 
46 | ## Alerts
47 | 
48 | The mixin follows the [monitoring-mixins guidelines](https://github.com/monitoring-mixins/docs#guidelines-for-alert-names-labels-and-annotations) for alerts.
49 | 


--------------------------------------------------------------------------------
/celery-mixin/alerts.jsonnet:
--------------------------------------------------------------------------------
1 | std.manifestYamlDoc((import 'mixin.libsonnet').prometheusAlerts)
2 | 


--------------------------------------------------------------------------------
/celery-mixin/alerts/alerts.libsonnet:
--------------------------------------------------------------------------------
 1 | {
 2 |   prometheusAlerts+:: {
 3 |     groups+: [
 4 |       {
 5 |         name: 'celery',
 6 |         rules: std.prune([
 7 |           {
 8 |             alert: 'CeleryTaskHighFailRate',
 9 |             expr: |||
10 |               sum(
11 |                 increase(
12 |                   celery_task_failed_total{
13 |                     %(celerySelector)s,
14 |                     queue_name!~"%(celeryIgnoredQueues)s",
15 |                     name!~"%(celeryIgnoredTasks)s"
16 |                   }[%(celeryTaskFailedInterval)s]
17 |                 )
18 |               )  by (job, namespace, queue_name, name)
19 |               /
20 |               (
21 |                 sum(
22 |                   increase(
23 |                     celery_task_failed_total{
24 |                       %(celerySelector)s,
25 |                       queue_name!~"%(celeryIgnoredQueues)s",
26 |                       name!~"%(celeryIgnoredTasks)s"
27 |                     }[%(celeryTaskFailedInterval)s]
28 |                   )
29 |                 )  by (job, namespace, queue_name, name)
30 |                 +
31 |                 sum(
32 |                   increase(
33 |                     celery_task_succeeded_total{
34 |                       %(celerySelector)s,
35 |                       queue_name!~"%(celeryIgnoredQueues)s",
36 |                       name!~"%(celeryIgnoredTasks)s"
37 |                     }[%(celeryTaskFailedInterval)s]
38 |                   )
39 |                 )  by (job, namespace, queue_name, name)
40 |               )
41 |               * 100 > %(celeryTaskFailedThreshold)s
42 |             ||| % $._config,
43 |             annotations: {
44 |               summary: 'Celery high task fail rate.',
45 |               description: 'More than %(celeryTaskFailedThreshold)s%% tasks failed for the task {{ $labels.job }}/{{ $labels.queue_name }}/{{ $labels.name }} the past %(celeryTaskFailedInterval)s.' % $._config,
46 |               dashboard_url: $._config.celeryTasksByTaskUrl + '?var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}&var-task={{ $labels.name }}',
47 |             },
48 |             'for': '1m',
49 |             labels: {
50 |               severity: 'warning',
51 |             },
52 |           },
53 |           if $._config.celeryCeleryHighQueueLengthAlertEnabled then {
54 |             alert: 'CeleryHighQueueLength',
55 |             expr: |||
56 |               sum(
57 |                 celery_queue_length{
58 |                   %(celerySelector)s,
59 |                   queue_name!~"%(celeryIgnoredQueues)s"
60 |                 }
61 |               )  by (job, namespace, queue_name)
62 |               > %(celeryHighQueueLengthThreshold)s
63 |             ||| % $._config,
64 |             'for': $._config.celeryHighQueueLengthInterval,
65 |             labels: {
66 |               severity: 'warning',
67 |             },
68 |             annotations: {
69 |               summary: 'Celery high queue length.',
70 |               description: 'More than %(celeryHighQueueLengthThreshold)s tasks in the queue {{ $labels.job }}/{{ $labels.queue_name }} the past %(celeryHighQueueLengthInterval)s.' % $._config,
71 |               dashboard_url: $._config.celeryTasksOverviewUrl + '?&var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}',
72 |             },
73 |           },
74 |           if $._config.celeryWorkerDownAlertEnabled then {
75 |             alert: 'CeleryWorkerDown',
76 |             expr: |||
77 |               celery_worker_up{%(celerySelector)s} == 0
78 |             ||| % $._config,
79 |             'for': $._config.celeryWorkerDownInterval,
80 |             labels: {
81 |               severity: 'warning',
82 |             },
83 |             annotations: {
84 |               summary: 'A Celery worker is offline.',
85 |               description: 'The Celery worker {{ $labels.job }}/{{ $labels.hostname }} is offline.',
86 |               dashboard_url: $._config.celeryTasksOverviewUrl + '?&var-job={{ $labels.job }}',
87 |             },
88 |           },
89 |         ]),
90 |       },
91 |     ],
92 |   },
93 | }
94 | 


--------------------------------------------------------------------------------
/celery-mixin/config.libsonnet:
--------------------------------------------------------------------------------
 1 | local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
 2 | local annotation = g.dashboard.annotation;
 3 | 
 4 | {
 5 |   _config+:: {
 6 |     // Selectors are inserted between {} in Prometheus queries.
 7 |     celerySelector: 'job=~".*celery.*"',
 8 | 
 9 |     grafanaUrl: 'https://grafana.com',
10 | 
11 |     celeryIgnoredTasks: 'None',
12 |     celeryIgnoredQueues: 'None',
13 | 
14 |     celeryTasksOverviewUid: 'celery-tasks-overview-32s3',
15 |     celeryTasksByTaskUid: 'celery-tasks-by-task-32s3',
16 | 
17 |     celeryTasksOverviewUrl: '%s/d/%s/celery-tasks-overview' % [self.grafanaUrl, self.celeryTasksOverviewUid],
18 |     celeryTasksByTaskUrl: '%s/d/%s/celery-tasks-by-task' % [self.grafanaUrl, self.celeryTasksByTaskUid],
19 | 
20 |     tags: ['celery', 'celery-mixin'],
21 | 
22 |     // If you have autoscaling workers then you maybe do not want to alert on workers that are down.
23 |     celeryWorkerDownAlertEnabled: true,
24 |     celeryCeleryHighQueueLengthAlertEnabled: true,
25 |     // The task interval is used as the interval for Prometheus alerts of failed tasks.
26 |     celeryTaskFailedInterval: '10m',
27 |     celeryTaskFailedThreshold: '5',  // percent
28 |     celeryHighQueueLengthInterval: '20m',
29 |     celeryHighQueueLengthThreshold: '100',
30 |     celeryWorkerDownInterval: '15m',
31 | 
32 |     // Custom annotations to display in graphs
33 |     annotation: {
34 |       enabled: false,
35 |       name: 'Custom Annotation',
36 |       datasource: '-- Grafana --',
37 |       iconColor: 'green',
38 |       tags: [],
39 |     },
40 | 
41 |     customAnnotation:: if $._config.annotation.enabled then
42 |       annotation.withName($._config.annotation.name) +
43 |       annotation.withIconColor($._config.annotation.iconColor) +
44 |       annotation.withHide(false) +
45 |       annotation.datasource.withUid($._config.annotation.datasource) +
46 |       annotation.target.withMatchAny(true) +
47 |       annotation.target.withTags($._config.annotation.tags) +
48 |       annotation.target.withType('tags')
49 |     else {},
50 |   },
51 | }
52 | 


--------------------------------------------------------------------------------
/celery-mixin/dashboards.jsonnet:
--------------------------------------------------------------------------------
1 | local dashboards = (import 'mixin.libsonnet').grafanaDashboards;
2 | 
3 | {
4 |   [name]: dashboards[name]
5 |   for name in std.objectFields(dashboards)
6 | }
7 | 


--------------------------------------------------------------------------------
/celery-mixin/dashboards/celery-tasks-by-task.libsonnet:
--------------------------------------------------------------------------------
  1 | local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
  2 | 
  3 | local dashboard = g.dashboard;
  4 | local row = g.panel.row;
  5 | local grid = g.util.grid;
  6 | 
  7 | local variable = dashboard.variable;
  8 | local datasource = variable.datasource;
  9 | local query = variable.query;
 10 | local prometheus = g.query.prometheus;
 11 | 
 12 | local timeSeriesPanel = g.panel.timeSeries;
 13 | local tablePanel = g.panel.table;
 14 | 
 15 | // Timeseries
 16 | local tsOptions = timeSeriesPanel.options;
 17 | local tsStandardOptions = timeSeriesPanel.standardOptions;
 18 | local tsQueryOptions = timeSeriesPanel.queryOptions;
 19 | local tsFieldConfig = timeSeriesPanel.fieldConfig;
 20 | local tsCustom = tsFieldConfig.defaults.custom;
 21 | local tsLegend = tsOptions.legend;
 22 | local tsOverride = tsStandardOptions.override;
 23 | 
 24 | // Table
 25 | local tbOptions = tablePanel.options;
 26 | local tbStandardOptions = tablePanel.standardOptions;
 27 | local tbQueryOptions = tablePanel.queryOptions;
 28 | local tbOverride = tbStandardOptions.override;
 29 | 
 30 | {
 31 |   grafanaDashboards+:: {
 32 | 
 33 |     local datasourceVariable =
 34 |       datasource.new(
 35 |         'datasource',
 36 |         'prometheus',
 37 |       ) +
 38 |       datasource.generalOptions.withLabel('Data source'),
 39 | 
 40 |     local namespaceVariable =
 41 |       query.new(
 42 |         'namespace',
 43 |         'label_values(celery_worker_up{}, namespace)'
 44 |       ) +
 45 |       query.withDatasourceFromVariable(datasourceVariable) +
 46 |       query.withSort(1) +
 47 |       query.generalOptions.withLabel('Namespace') +
 48 |       query.selectionOptions.withMulti(false) +
 49 |       query.selectionOptions.withIncludeAll(false) +
 50 |       query.refresh.onLoad() +
 51 |       query.refresh.onTime(),
 52 | 
 53 | 
 54 |     local jobVariable =
 55 |       query.new(
 56 |         'job',
 57 |         'label_values(celery_worker_up{namespace="$namespace"}, job)'
 58 |       ) +
 59 |       query.withDatasourceFromVariable(datasourceVariable) +
 60 |       query.withSort(1) +
 61 |       query.generalOptions.withLabel('Job') +
 62 |       query.selectionOptions.withMulti(false) +
 63 |       query.selectionOptions.withIncludeAll(false) +
 64 |       query.refresh.onLoad() +
 65 |       query.refresh.onTime(),
 66 | 
 67 |     local queueNameVariable =
 68 |       query.new(
 69 |         'queue_name',
 70 |         'label_values(celery_task_received_total{namespace="$namespace", job="$job", name!~"%(celeryIgnoredQueues)s"}, queue_name)' % $._config
 71 |       ) +
 72 |       query.withDatasourceFromVariable(datasourceVariable) +
 73 |       query.withSort(1) +
 74 |       query.generalOptions.withLabel('Queue Name') +
 75 |       query.selectionOptions.withMulti(false) +
 76 |       query.selectionOptions.withIncludeAll(false) +
 77 |       query.refresh.onLoad() +
 78 |       query.refresh.onTime(),
 79 | 
 80 |     local taskVariable =
 81 |       query.new(
 82 |         'task',
 83 |         'label_values(celery_task_received_total{namespace="$namespace", job="$job", queue_name=~"$queue_name", name!~"%(celeryIgnoredTasks)s"}, name)' % $._config
 84 |       ) +
 85 |       query.withDatasourceFromVariable(datasourceVariable) +
 86 |       query.withSort(1) +
 87 |       query.generalOptions.withLabel('Task') +
 88 |       query.selectionOptions.withMulti(true) +
 89 |       query.selectionOptions.withIncludeAll(false) +
 90 |       query.refresh.onLoad() +
 91 |       query.refresh.onTime(),
 92 | 
 93 |     local variables = [
 94 |       datasourceVariable,
 95 |       namespaceVariable,
 96 |       jobVariable,
 97 |       queueNameVariable,
 98 |       taskVariable,
 99 |     ],
100 | 
101 |     local taskExceptionsQuery = |||
102 |       round(
103 |         sum (
104 |           increase(
105 |             celery_task_failed_total{
106 |               job="$job",
107 |               name=~"$task",
108 |               queue_name=~"$queue_name"
109 |             }[$__range]
110 |           )
111 |         ) by (name, exception) > 0
112 |       )
113 |     |||,
114 |     local taskExceptionsTable =
115 |       tablePanel.new(
116 |         'Task Exceptions',
117 |       ) +
118 |       tbStandardOptions.withUnit('short') +
119 |       tbOptions.withSortBy(
120 |         tbOptions.sortBy.withDisplayName('Value') +
121 |         tbOptions.sortBy.withDesc(true)
122 |       ) +
123 |       tbOptions.footer.withEnablePagination(true) +
124 |       tbQueryOptions.withTargets(
125 |         prometheus.new(
126 |           '$datasource',
127 |           taskExceptionsQuery,
128 |         ) +
129 |         prometheus.withFormat('table') +
130 |         prometheus.withInstant(true)
131 |       ) +
132 |       tbQueryOptions.withTransformations([
133 |         tbQueryOptions.transformation.withId(
134 |           'organize'
135 |         ) +
136 |         tbQueryOptions.transformation.withOptions(
137 |           {
138 |             renameByName: {
139 |               name: 'Task',
140 |               exception: 'Exception',
141 |             },
142 |             indexByName: {
143 |               name: 0,
144 |               exception: 1,
145 |               Value: 2,
146 |             },
147 |             excludeByName: {
148 |               Time: true,
149 |               job: true,
150 |             },
151 |           }
152 |         ),
153 |       ]),
154 | 
155 |     local taskFailedQuery = |||
156 |       sum (
157 |         round(
158 |           increase(
159 |             celery_task_failed_total{
160 |               job="$job",
161 |               name=~"$task",
162 |               queue_name=~"$queue_name"
163 |             }[$__range]
164 |           )
165 |         )
166 |       ) by (name) > 0
167 |     |||,
168 |     local taskSucceededQuery = std.strReplace(taskFailedQuery, 'failed', 'succeeded'),
169 |     local taskSentQuery = std.strReplace(taskFailedQuery, 'failed', 'sent'),
170 |     local taskReceivedQuery = std.strReplace(taskFailedQuery, 'failed', 'received'),
171 |     local taskRetriedQuery = std.strReplace(taskFailedQuery, 'failed', 'retried'),
172 |     local taskRevokedQuery = std.strReplace(taskFailedQuery, 'failed', 'revoked'),
173 |     local taskRejectedQuery = std.strReplace(taskFailedQuery, 'failed', 'rejected'),
174 |     local taskSuccessRateQuery = |||
175 |       %s/(%s+%s) > -1
176 |     ||| % [
177 |       // Strip out > 0 from the end of the success query
178 |       std.strReplace(taskSucceededQuery, ' > 0', ''),
179 |       std.strReplace(taskSucceededQuery, ' > 0', ''),
180 |       std.strReplace(taskFailedQuery, ' > 0', ''),
181 |     ],  // Add > -1 to remove NaN results
182 | 
183 |     local tasksStatsTable =
184 |       tablePanel.new(
185 |         'Task Stats',
186 |       ) +
187 |       tbStandardOptions.withUnit('short') +
188 |       tbStandardOptions.withNoValue(0) +
189 |       tbOptions.withSortBy(
190 |         tbOptions.sortBy.withDisplayName('Succeeded') +
191 |         tbOptions.sortBy.withDesc(true)
192 |       ) +
193 |       tbOptions.footer.withEnablePagination(true) +
194 |       tbQueryOptions.withTargets(
195 |         [
196 |           prometheus.new(
197 |             '$datasource',
198 |             taskSuccessRateQuery,
199 |           ) +
200 |           prometheus.withFormat('table') +
201 |           prometheus.withInstant(true),
202 |           prometheus.new(
203 |             '$datasource',
204 |             taskSucceededQuery,
205 |           ) +
206 |           prometheus.withFormat('table') +
207 |           prometheus.withInstant(true),
208 |           prometheus.new(
209 |             '$datasource',
210 |             taskFailedQuery,
211 |           ) +
212 |           prometheus.withFormat('table') +
213 |           prometheus.withInstant(true),
214 |           prometheus.new(
215 |             '$datasource',
216 |             taskSentQuery,
217 |           ) +
218 |           prometheus.withFormat('table') +
219 |           prometheus.withInstant(true),
220 |           prometheus.new(
221 |             '$datasource',
222 |             taskReceivedQuery,
223 |           ) +
224 |           prometheus.withFormat('table') +
225 |           prometheus.withInstant(true),
226 |           prometheus.new(
227 |             '$datasource',
228 |             taskRejectedQuery,
229 |           ) +
230 |           prometheus.withFormat('table') +
231 |           prometheus.withInstant(true),
232 |           prometheus.new(
233 |             '$datasource',
234 |             taskRetriedQuery,
235 |           ) +
236 |           prometheus.withFormat('table') +
237 |           prometheus.withInstant(true),
238 |           prometheus.new(
239 |             '$datasource',
240 |             taskRevokedQuery,
241 |           ) +
242 |           prometheus.withFormat('table') +
243 |           prometheus.withInstant(true),
244 |         ]
245 |       ) +
246 |       tbQueryOptions.withTransformations([
247 |         tbQueryOptions.transformation.withId(
248 |           'merge'
249 |         ),
250 |         tbQueryOptions.transformation.withId(
251 |           'organize'
252 |         ) +
253 |         tbQueryOptions.transformation.withOptions(
254 |           {
255 |             renameByName: {
256 |               name: 'Name',
257 |               'Value #A': 'Success Rate',
258 |               'Value #B': 'Succeeded',
259 |               'Value #C': 'Failed',
260 |               'Value #D': 'Sent',
261 |               'Value #E': 'Received',
262 |               'Value #F': 'Rejected',
263 |               'Value #G': 'Retried',
264 |               'Value #H': 'Revoked',
265 |             },
266 |             indexByName: {
267 |               name: 0,
268 |               'Value #A': 1,
269 |               'Value #B': 2,
270 |               'Value #C': 3,
271 |               'Value #D': 4,
272 |               'Value #E': 5,
273 |               'Value #F': 6,
274 |               'Value #G': 7,
275 |               'Value #H': 8,
276 |             },
277 |             excludeByName: {
278 |               Time: true,
279 |             },
280 |           }
281 |         ),
282 |       ]) +
283 |       tbStandardOptions.withOverrides([
284 |         tbOverride.byName.new('Success Rate') +
285 |         tbOverride.byName.withPropertiesFromOptions(
286 |           tbStandardOptions.withUnit('percentunit')
287 |         ),
288 |       ]),
289 | 
290 |     local taskFailedByExceptionIntervalQuery = |||
291 |       sum (
292 |         round(
293 |           increase(
294 |             celery_task_failed_total{
295 |               job="$job",
296 |               name=~"$task",
297 |               queue_name=~"$queue_name"
298 |             }[$__rate_interval]
299 |           )
300 |         )
301 |       ) by (name, exception) > 0
302 |     |||,
303 | 
304 |     local tasksFailedByExceptionTimeSeriesPanel =
305 |       timeSeriesPanel.new(
306 |         'Task Exceptions',
307 |       ) +
308 |       tsQueryOptions.withTargets(
309 |         [
310 |           prometheus.new(
311 |             '$datasource',
312 |             taskFailedByExceptionIntervalQuery,
313 |           ) +
314 |           prometheus.withLegendFormat(
315 |             '{{ name }}/{{ exception }}'
316 |           ),
317 |         ]
318 |       ) +
319 |       tsStandardOptions.withUnit('short') +
320 |       tsOptions.tooltip.withMode('multi') +
321 |       tsOptions.tooltip.withSort('desc') +
322 |       tsLegend.withShowLegend(true) +
323 |       tsLegend.withDisplayMode('table') +
324 |       tsLegend.withPlacement('right') +
325 |       tsLegend.withCalcs(['mean', 'max']) +
326 |       tsLegend.withSortBy('Mean') +
327 |       tsLegend.withSortDesc(true) +
328 |       tsCustom.withSpanNulls(false),
329 | 
330 |     local taskFailedIntervalQuery = |||
331 |       sum (
332 |         round(
333 |           increase(
334 |             celery_task_failed_total{
335 |               job="$job",
336 |               name=~"$task",
337 |               queue_name=~"$queue_name"
338 |             }[$__rate_interval]
339 |           )
340 |         )
341 |       ) by (name) > 0
342 |     |||,
343 |     local taskSucceededIntervalQuery = std.strReplace(taskFailedIntervalQuery, 'failed', 'succeeded'),
344 |     local taskSentIntervalQuery = std.strReplace(taskFailedIntervalQuery, 'failed', 'sent'),
345 |     local taskReceivedIntervalQuery = std.strReplace(taskFailedIntervalQuery, 'failed', 'received'),
346 |     local taskRetriedIntervalQuery = std.strReplace(taskFailedIntervalQuery, 'failed', 'retried'),
347 |     local taskRevokedIntervalQuery = std.strReplace(taskFailedIntervalQuery, 'failed', 'revoked'),
348 |     local taskRejectedIntervalQuery = std.strReplace(taskFailedIntervalQuery, 'failed', 'rejected'),
349 | 
350 | 
351 |     local tasksCompletedTimeSeriesPanel =
352 |       timeSeriesPanel.new(
353 |         'Tasks Completed',
354 |       ) +
355 |       tsQueryOptions.withTargets(
356 |         [
357 |           prometheus.new(
358 |             '$datasource',
359 |             taskSucceededIntervalQuery,
360 |           ) +
361 |           prometheus.withLegendFormat(
362 |             'Succeeded - {{ name }}'
363 |           ),
364 |           prometheus.new(
365 |             '$datasource',
366 |             taskFailedIntervalQuery,
367 |           ) +
368 |           prometheus.withLegendFormat(
369 |             'Failed - {{ name }}'
370 |           ),
371 |           prometheus.new(
372 |             '$datasource',
373 |             taskSentIntervalQuery,
374 |           ) +
375 |           prometheus.withLegendFormat(
376 |             'Sent - {{ name }}'
377 |           ),
378 |           prometheus.new(
379 |             '$datasource',
380 |             taskReceivedIntervalQuery,
381 |           ) +
382 |           prometheus.withLegendFormat(
383 |             'Received - {{ name }}'
384 |           ),
385 |           prometheus.new(
386 |             '$datasource',
387 |             taskRetriedIntervalQuery,
388 |           ) +
389 |           prometheus.withLegendFormat(
390 |             'Retried - {{ name }}'
391 |           ),
392 |           prometheus.new(
393 |             '$datasource',
394 |             taskRevokedIntervalQuery,
395 |           ) +
396 |           prometheus.withLegendFormat(
397 |             'Revoked - {{ name }}'
398 |           ),
399 |           prometheus.new(
400 |             '$datasource',
401 |             taskRejectedIntervalQuery,
402 |           ) +
403 |           prometheus.withLegendFormat(
404 |             'Rejected - {{ name }}'
405 |           ),
406 |         ]
407 |       ) +
408 |       tsStandardOptions.withUnit('short') +
409 |       tsOptions.tooltip.withMode('multi') +
410 |       tsOptions.tooltip.withSort('desc') +
411 |       tsLegend.withShowLegend(true) +
412 |       tsLegend.withDisplayMode('table') +
413 |       tsLegend.withPlacement('right') +
414 |       tsLegend.withCalcs(['mean', 'max']) +
415 |       tsLegend.withSortBy('Mean') +
416 |       tsLegend.withSortDesc(true) +
417 |       tsCustom.withSpanNulls(false),
418 | 
419 |     local tasksRuntimeP50Query = |||
420 |       histogram_quantile(0.50,
421 |         sum(
422 |           irate(
423 |             celery_task_runtime_bucket{
424 |               job="$job",
425 |               name=~"$task",
426 |               queue_name=~"$queue_name"
427 |             }[$__rate_interval]
428 |           ) > 0
429 |         ) by (name, job, le)
430 |       )
431 |     |||,
432 |     local tasksRuntimeP95Query = std.strReplace(tasksRuntimeP50Query, '0.50', '0.95'),
433 |     local tasksRuntimeP99Query = std.strReplace(tasksRuntimeP50Query, '0.50', '0.99'),
434 | 
435 |     local tasksRuntimeTimeSeriesPanel =
436 |       timeSeriesPanel.new(
437 |         'Tasks Runtime',
438 |       ) +
439 |       tsQueryOptions.withTargets(
440 |         [
441 |           prometheus.new(
442 |             '$datasource',
443 |             tasksRuntimeP50Query,
444 |           ) +
445 |           prometheus.withLegendFormat(
446 |             'P50 - {{ name }}'
447 |           ),
448 |           prometheus.new(
449 |             '$datasource',
450 |             tasksRuntimeP95Query,
451 |           ) +
452 |           prometheus.withLegendFormat(
453 |             'P95 - {{ name }}'
454 |           ),
455 |           prometheus.new(
456 |             '$datasource',
457 |             tasksRuntimeP99Query,
458 |           ) +
459 |           prometheus.withLegendFormat(
460 |             'P99 - {{ name }}'
461 |           ),
462 |         ]
463 |       ) +
464 |       tsStandardOptions.withUnit('s') +
465 |       tsOptions.tooltip.withMode('multi') +
466 |       tsOptions.tooltip.withSort('desc') +
467 |       tsStandardOptions.withOverrides([
468 |         tsOverride.byName.new('P50') +
469 |         tsOverride.byName.withPropertiesFromOptions(
470 |           tsStandardOptions.color.withMode('fixed') +
471 |           tsStandardOptions.color.withFixedColor('green')
472 |         ),
473 |         tsOverride.byName.new('P95') +
474 |         tsOverride.byName.withPropertiesFromOptions(
475 |           tsStandardOptions.color.withMode('fixed') +
476 |           tsStandardOptions.color.withFixedColor('yellow')
477 |         ),
478 |         tsOverride.byName.new('P99') +
479 |         tsOverride.byName.withPropertiesFromOptions(
480 |           tsStandardOptions.color.withMode('fixed') +
481 |           tsStandardOptions.color.withFixedColor('red')
482 |         ),
483 |       ]) +
484 |       tsLegend.withShowLegend(true) +
485 |       tsLegend.withDisplayMode('table') +
486 |       tsLegend.withPlacement('right') +
487 |       tsLegend.withCalcs(['mean', 'max']) +
488 |       tsLegend.withSortBy('Mean') +
489 |       tsLegend.withSortDesc(true) +
490 |       tsCustom.withSpanNulls(false),
491 | 
492 |     local tasksRow =
493 |       row.new(
494 |         title='Tasks'
495 |       ),
496 | 
497 | 
498 |     'celery-tasks-by-task.json':
499 |       dashboard.new(
500 |         'Celery / Tasks / By Task',
501 |       ) +
502 |       dashboard.withDescription(
503 |         'A dashboard that monitors Celery. It is created using the Celery-mixin for the the (Celery-exporter)[https://github.com/danihodovic/celery-exporter]'
504 |       ) +
505 |       dashboard.withUid($._config.celeryTasksByTaskUid) +
506 |       dashboard.withTags($._config.tags) +
507 |       dashboard.withTimezone('utc') +
508 |       dashboard.withEditable(true) +
509 |       dashboard.time.withFrom('now-2d') +
510 |       dashboard.time.withTo('now') +
511 |       dashboard.withVariables(variables) +
512 |       dashboard.withLinks(
513 |         [
514 |           dashboard.link.dashboards.new('Celery Dashboards', $._config.tags) +
515 |           dashboard.link.link.options.withTargetBlank(true),
516 |         ]
517 |       ) +
518 |       dashboard.withPanels(
519 |         [
520 |           tasksRow +
521 |           row.gridPos.withX(0) +
522 |           row.gridPos.withY(0) +
523 |           row.gridPos.withW(24) +
524 |           row.gridPos.withH(1),
525 |           tasksStatsTable +
526 |           timeSeriesPanel.gridPos.withX(0) +
527 |           timeSeriesPanel.gridPos.withY(1) +
528 |           timeSeriesPanel.gridPos.withW(16) +
529 |           timeSeriesPanel.gridPos.withH(8),
530 |           taskExceptionsTable +
531 |           timeSeriesPanel.gridPos.withX(16) +
532 |           timeSeriesPanel.gridPos.withY(1) +
533 |           timeSeriesPanel.gridPos.withW(8) +
534 |           timeSeriesPanel.gridPos.withH(8),
535 |           tasksCompletedTimeSeriesPanel +
536 |           timeSeriesPanel.gridPos.withX(0) +
537 |           timeSeriesPanel.gridPos.withY(9) +
538 |           timeSeriesPanel.gridPos.withW(24) +
539 |           timeSeriesPanel.gridPos.withH(8),
540 |           tasksFailedByExceptionTimeSeriesPanel +
541 |           timeSeriesPanel.gridPos.withX(0) +
542 |           timeSeriesPanel.gridPos.withY(17) +
543 |           timeSeriesPanel.gridPos.withW(24) +
544 |           timeSeriesPanel.gridPos.withH(8),
545 |           tasksRuntimeTimeSeriesPanel +
546 |           timeSeriesPanel.gridPos.withX(0) +
547 |           timeSeriesPanel.gridPos.withY(25) +
548 |           timeSeriesPanel.gridPos.withW(24) +
549 |           timeSeriesPanel.gridPos.withH(8),
550 |         ]
551 |       ) +
552 |       if $._config.annotation.enabled then
553 |         dashboard.withAnnotations($._config.customAnnotation)
554 |       else {},
555 |   },
556 | }
557 | 


--------------------------------------------------------------------------------
/celery-mixin/dashboards/dashboards.libsonnet:
--------------------------------------------------------------------------------
1 | (import 'celery-tasks-overview.libsonnet') +
2 | (import 'celery-tasks-by-task.libsonnet') +
3 | {}
4 | 


--------------------------------------------------------------------------------
/celery-mixin/dashboards_out/celery-tasks-by-task.json:
--------------------------------------------------------------------------------
  1 | {
  2 |    "description": "A dashboard that monitors Celery. It is created using the Celery-mixin for the the (Celery-exporter)[https://github.com/danihodovic/celery-exporter]",
  3 |    "editable": true,
  4 |    "links": [
  5 |       {
  6 |          "tags": [
  7 |             "celery",
  8 |             "celery-mixin"
  9 |          ],
 10 |          "targetBlank": true,
 11 |          "title": "Celery Dashboards",
 12 |          "type": "dashboards"
 13 |       }
 14 |    ],
 15 |    "panels": [
 16 |       {
 17 |          "collapsed": false,
 18 |          "gridPos": {
 19 |             "h": 1,
 20 |             "w": 24,
 21 |             "x": 0,
 22 |             "y": 0
 23 |          },
 24 |          "id": 1,
 25 |          "title": "Tasks",
 26 |          "type": "row"
 27 |       },
 28 |       {
 29 |          "datasource": {
 30 |             "type": "datasource",
 31 |             "uid": "-- Mixed --"
 32 |          },
 33 |          "fieldConfig": {
 34 |             "defaults": {
 35 |                "noValue": 0,
 36 |                "unit": "short"
 37 |             },
 38 |             "overrides": [
 39 |                {
 40 |                   "matcher": {
 41 |                      "id": "byName",
 42 |                      "options": "Success Rate"
 43 |                   },
 44 |                   "properties": [
 45 |                      {
 46 |                         "id": "unit",
 47 |                         "value": "percentunit"
 48 |                      }
 49 |                   ]
 50 |                }
 51 |             ]
 52 |          },
 53 |          "gridPos": {
 54 |             "h": 8,
 55 |             "w": 16,
 56 |             "x": 0,
 57 |             "y": 1
 58 |          },
 59 |          "id": 2,
 60 |          "options": {
 61 |             "footer": {
 62 |                "enablePagination": true
 63 |             },
 64 |             "sortBy": [
 65 |                {
 66 |                   "desc": true,
 67 |                   "displayName": "Succeeded"
 68 |                }
 69 |             ]
 70 |          },
 71 |          "pluginVersion": "v11.1.0",
 72 |          "targets": [
 73 |             {
 74 |                "datasource": {
 75 |                   "type": "prometheus",
 76 |                   "uid": "$datasource"
 77 |                },
 78 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_succeeded_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  )\n) by (name)\n/(sum (\n  round(\n    increase(\n      celery_task_succeeded_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  )\n) by (name)\n+sum (\n  round(\n    increase(\n      celery_task_failed_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  )\n) by (name)\n) > -1\n",
 79 |                "format": "table",
 80 |                "instant": true
 81 |             },
 82 |             {
 83 |                "datasource": {
 84 |                   "type": "prometheus",
 85 |                   "uid": "$datasource"
 86 |                },
 87 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_succeeded_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  )\n) by (name) > 0\n",
 88 |                "format": "table",
 89 |                "instant": true
 90 |             },
 91 |             {
 92 |                "datasource": {
 93 |                   "type": "prometheus",
 94 |                   "uid": "$datasource"
 95 |                },
 96 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_failed_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  )\n) by (name) > 0\n",
 97 |                "format": "table",
 98 |                "instant": true
 99 |             },
100 |             {
101 |                "datasource": {
102 |                   "type": "prometheus",
103 |                   "uid": "$datasource"
104 |                },
105 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_sent_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  )\n) by (name) > 0\n",
106 |                "format": "table",
107 |                "instant": true
108 |             },
109 |             {
110 |                "datasource": {
111 |                   "type": "prometheus",
112 |                   "uid": "$datasource"
113 |                },
114 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_received_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  )\n) by (name) > 0\n",
115 |                "format": "table",
116 |                "instant": true
117 |             },
118 |             {
119 |                "datasource": {
120 |                   "type": "prometheus",
121 |                   "uid": "$datasource"
122 |                },
123 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_rejected_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  )\n) by (name) > 0\n",
124 |                "format": "table",
125 |                "instant": true
126 |             },
127 |             {
128 |                "datasource": {
129 |                   "type": "prometheus",
130 |                   "uid": "$datasource"
131 |                },
132 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_retried_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  )\n) by (name) > 0\n",
133 |                "format": "table",
134 |                "instant": true
135 |             },
136 |             {
137 |                "datasource": {
138 |                   "type": "prometheus",
139 |                   "uid": "$datasource"
140 |                },
141 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_revoked_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  )\n) by (name) > 0\n",
142 |                "format": "table",
143 |                "instant": true
144 |             }
145 |          ],
146 |          "title": "Task Stats",
147 |          "transformations": [
148 |             {
149 |                "id": "merge"
150 |             },
151 |             {
152 |                "id": "organize",
153 |                "options": {
154 |                   "excludeByName": {
155 |                      "Time": true
156 |                   },
157 |                   "indexByName": {
158 |                      "Value #A": 1,
159 |                      "Value #B": 2,
160 |                      "Value #C": 3,
161 |                      "Value #D": 4,
162 |                      "Value #E": 5,
163 |                      "Value #F": 6,
164 |                      "Value #G": 7,
165 |                      "Value #H": 8,
166 |                      "name": 0
167 |                   },
168 |                   "renameByName": {
169 |                      "Value #A": "Success Rate",
170 |                      "Value #B": "Succeeded",
171 |                      "Value #C": "Failed",
172 |                      "Value #D": "Sent",
173 |                      "Value #E": "Received",
174 |                      "Value #F": "Rejected",
175 |                      "Value #G": "Retried",
176 |                      "Value #H": "Revoked",
177 |                      "name": "Name"
178 |                   }
179 |                }
180 |             }
181 |          ],
182 |          "type": "table"
183 |       },
184 |       {
185 |          "datasource": {
186 |             "type": "datasource",
187 |             "uid": "-- Mixed --"
188 |          },
189 |          "fieldConfig": {
190 |             "defaults": {
191 |                "unit": "short"
192 |             }
193 |          },
194 |          "gridPos": {
195 |             "h": 8,
196 |             "w": 8,
197 |             "x": 16,
198 |             "y": 1
199 |          },
200 |          "id": 3,
201 |          "options": {
202 |             "footer": {
203 |                "enablePagination": true
204 |             },
205 |             "sortBy": [
206 |                {
207 |                   "desc": true,
208 |                   "displayName": "Value"
209 |                }
210 |             ]
211 |          },
212 |          "pluginVersion": "v11.1.0",
213 |          "targets": [
214 |             {
215 |                "datasource": {
216 |                   "type": "prometheus",
217 |                   "uid": "$datasource"
218 |                },
219 |                "expr": "round(\n  sum (\n    increase(\n      celery_task_failed_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__range]\n    )\n  ) by (name, exception) > 0\n)\n",
220 |                "format": "table",
221 |                "instant": true
222 |             }
223 |          ],
224 |          "title": "Task Exceptions",
225 |          "transformations": [
226 |             {
227 |                "id": "organize",
228 |                "options": {
229 |                   "excludeByName": {
230 |                      "Time": true,
231 |                      "job": true
232 |                   },
233 |                   "indexByName": {
234 |                      "Value": 2,
235 |                      "exception": 1,
236 |                      "name": 0
237 |                   },
238 |                   "renameByName": {
239 |                      "exception": "Exception",
240 |                      "name": "Task"
241 |                   }
242 |                }
243 |             }
244 |          ],
245 |          "type": "table"
246 |       },
247 |       {
248 |          "datasource": {
249 |             "type": "datasource",
250 |             "uid": "-- Mixed --"
251 |          },
252 |          "fieldConfig": {
253 |             "defaults": {
254 |                "custom": {
255 |                   "spanNulls": false
256 |                },
257 |                "unit": "short"
258 |             }
259 |          },
260 |          "gridPos": {
261 |             "h": 8,
262 |             "w": 24,
263 |             "x": 0,
264 |             "y": 9
265 |          },
266 |          "id": 4,
267 |          "options": {
268 |             "legend": {
269 |                "calcs": [
270 |                   "mean",
271 |                   "max"
272 |                ],
273 |                "displayMode": "table",
274 |                "placement": "right",
275 |                "showLegend": true,
276 |                "sortBy": "Mean",
277 |                "sortDesc": true
278 |             },
279 |             "tooltip": {
280 |                "mode": "multi",
281 |                "sort": "desc"
282 |             }
283 |          },
284 |          "pluginVersion": "v11.1.0",
285 |          "targets": [
286 |             {
287 |                "datasource": {
288 |                   "type": "prometheus",
289 |                   "uid": "$datasource"
290 |                },
291 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_succeeded_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    )\n  )\n) by (name) > 0\n",
292 |                "legendFormat": "Succeeded - {{ name }}"
293 |             },
294 |             {
295 |                "datasource": {
296 |                   "type": "prometheus",
297 |                   "uid": "$datasource"
298 |                },
299 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_failed_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    )\n  )\n) by (name) > 0\n",
300 |                "legendFormat": "Failed - {{ name }}"
301 |             },
302 |             {
303 |                "datasource": {
304 |                   "type": "prometheus",
305 |                   "uid": "$datasource"
306 |                },
307 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_sent_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    )\n  )\n) by (name) > 0\n",
308 |                "legendFormat": "Sent - {{ name }}"
309 |             },
310 |             {
311 |                "datasource": {
312 |                   "type": "prometheus",
313 |                   "uid": "$datasource"
314 |                },
315 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_received_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    )\n  )\n) by (name) > 0\n",
316 |                "legendFormat": "Received - {{ name }}"
317 |             },
318 |             {
319 |                "datasource": {
320 |                   "type": "prometheus",
321 |                   "uid": "$datasource"
322 |                },
323 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_retried_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    )\n  )\n) by (name) > 0\n",
324 |                "legendFormat": "Retried - {{ name }}"
325 |             },
326 |             {
327 |                "datasource": {
328 |                   "type": "prometheus",
329 |                   "uid": "$datasource"
330 |                },
331 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_revoked_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    )\n  )\n) by (name) > 0\n",
332 |                "legendFormat": "Revoked - {{ name }}"
333 |             },
334 |             {
335 |                "datasource": {
336 |                   "type": "prometheus",
337 |                   "uid": "$datasource"
338 |                },
339 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_rejected_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    )\n  )\n) by (name) > 0\n",
340 |                "legendFormat": "Rejected - {{ name }}"
341 |             }
342 |          ],
343 |          "title": "Tasks Completed",
344 |          "type": "timeseries"
345 |       },
346 |       {
347 |          "datasource": {
348 |             "type": "datasource",
349 |             "uid": "-- Mixed --"
350 |          },
351 |          "fieldConfig": {
352 |             "defaults": {
353 |                "custom": {
354 |                   "spanNulls": false
355 |                },
356 |                "unit": "short"
357 |             }
358 |          },
359 |          "gridPos": {
360 |             "h": 8,
361 |             "w": 24,
362 |             "x": 0,
363 |             "y": 17
364 |          },
365 |          "id": 5,
366 |          "options": {
367 |             "legend": {
368 |                "calcs": [
369 |                   "mean",
370 |                   "max"
371 |                ],
372 |                "displayMode": "table",
373 |                "placement": "right",
374 |                "showLegend": true,
375 |                "sortBy": "Mean",
376 |                "sortDesc": true
377 |             },
378 |             "tooltip": {
379 |                "mode": "multi",
380 |                "sort": "desc"
381 |             }
382 |          },
383 |          "pluginVersion": "v11.1.0",
384 |          "targets": [
385 |             {
386 |                "datasource": {
387 |                   "type": "prometheus",
388 |                   "uid": "$datasource"
389 |                },
390 |                "expr": "sum (\n  round(\n    increase(\n      celery_task_failed_total{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    )\n  )\n) by (name, exception) > 0\n",
391 |                "legendFormat": "{{ name }}/{{ exception }}"
392 |             }
393 |          ],
394 |          "title": "Task Exceptions",
395 |          "type": "timeseries"
396 |       },
397 |       {
398 |          "datasource": {
399 |             "type": "datasource",
400 |             "uid": "-- Mixed --"
401 |          },
402 |          "fieldConfig": {
403 |             "defaults": {
404 |                "custom": {
405 |                   "spanNulls": false
406 |                },
407 |                "unit": "s"
408 |             },
409 |             "overrides": [
410 |                {
411 |                   "matcher": {
412 |                      "id": "byName",
413 |                      "options": "P50"
414 |                   },
415 |                   "properties": [
416 |                      {
417 |                         "id": "color",
418 |                         "value": {
419 |                            "fixedColor": "green",
420 |                            "mode": "fixed"
421 |                         }
422 |                      }
423 |                   ]
424 |                },
425 |                {
426 |                   "matcher": {
427 |                      "id": "byName",
428 |                      "options": "P95"
429 |                   },
430 |                   "properties": [
431 |                      {
432 |                         "id": "color",
433 |                         "value": {
434 |                            "fixedColor": "yellow",
435 |                            "mode": "fixed"
436 |                         }
437 |                      }
438 |                   ]
439 |                },
440 |                {
441 |                   "matcher": {
442 |                      "id": "byName",
443 |                      "options": "P99"
444 |                   },
445 |                   "properties": [
446 |                      {
447 |                         "id": "color",
448 |                         "value": {
449 |                            "fixedColor": "red",
450 |                            "mode": "fixed"
451 |                         }
452 |                      }
453 |                   ]
454 |                }
455 |             ]
456 |          },
457 |          "gridPos": {
458 |             "h": 8,
459 |             "w": 24,
460 |             "x": 0,
461 |             "y": 25
462 |          },
463 |          "id": 6,
464 |          "options": {
465 |             "legend": {
466 |                "calcs": [
467 |                   "mean",
468 |                   "max"
469 |                ],
470 |                "displayMode": "table",
471 |                "placement": "right",
472 |                "showLegend": true,
473 |                "sortBy": "Mean",
474 |                "sortDesc": true
475 |             },
476 |             "tooltip": {
477 |                "mode": "multi",
478 |                "sort": "desc"
479 |             }
480 |          },
481 |          "pluginVersion": "v11.1.0",
482 |          "targets": [
483 |             {
484 |                "datasource": {
485 |                   "type": "prometheus",
486 |                   "uid": "$datasource"
487 |                },
488 |                "expr": "histogram_quantile(0.50,\n  sum(\n    irate(\n      celery_task_runtime_bucket{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    ) > 0\n  ) by (name, job, le)\n)\n",
489 |                "legendFormat": "P50 - {{ name }}"
490 |             },
491 |             {
492 |                "datasource": {
493 |                   "type": "prometheus",
494 |                   "uid": "$datasource"
495 |                },
496 |                "expr": "histogram_quantile(0.95,\n  sum(\n    irate(\n      celery_task_runtime_bucket{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    ) > 0\n  ) by (name, job, le)\n)\n",
497 |                "legendFormat": "P95 - {{ name }}"
498 |             },
499 |             {
500 |                "datasource": {
501 |                   "type": "prometheus",
502 |                   "uid": "$datasource"
503 |                },
504 |                "expr": "histogram_quantile(0.99,\n  sum(\n    irate(\n      celery_task_runtime_bucket{\n        job=\"$job\",\n        name=~\"$task\",\n        queue_name=~\"$queue_name\"\n      }[$__rate_interval]\n    ) > 0\n  ) by (name, job, le)\n)\n",
505 |                "legendFormat": "P99 - {{ name }}"
506 |             }
507 |          ],
508 |          "title": "Tasks Runtime",
509 |          "type": "timeseries"
510 |       }
511 |    ],
512 |    "schemaVersion": 39,
513 |    "tags": [
514 |       "celery",
515 |       "celery-mixin"
516 |    ],
517 |    "templating": {
518 |       "list": [
519 |          {
520 |             "label": "Data source",
521 |             "name": "datasource",
522 |             "query": "prometheus",
523 |             "type": "datasource"
524 |          },
525 |          {
526 |             "datasource": {
527 |                "type": "prometheus",
528 |                "uid": "${datasource}"
529 |             },
530 |             "includeAll": false,
531 |             "label": "Namespace",
532 |             "multi": false,
533 |             "name": "namespace",
534 |             "query": "label_values(celery_worker_up{}, namespace)",
535 |             "refresh": 2,
536 |             "sort": 1,
537 |             "type": "query"
538 |          },
539 |          {
540 |             "datasource": {
541 |                "type": "prometheus",
542 |                "uid": "${datasource}"
543 |             },
544 |             "includeAll": false,
545 |             "label": "Job",
546 |             "multi": false,
547 |             "name": "job",
548 |             "query": "label_values(celery_worker_up{namespace=\"$namespace\"}, job)",
549 |             "refresh": 2,
550 |             "sort": 1,
551 |             "type": "query"
552 |          },
553 |          {
554 |             "datasource": {
555 |                "type": "prometheus",
556 |                "uid": "${datasource}"
557 |             },
558 |             "includeAll": false,
559 |             "label": "Queue Name",
560 |             "multi": false,
561 |             "name": "queue_name",
562 |             "query": "label_values(celery_task_received_total{namespace=\"$namespace\", job=\"$job\", name!~\"None\"}, queue_name)",
563 |             "refresh": 2,
564 |             "sort": 1,
565 |             "type": "query"
566 |          },
567 |          {
568 |             "datasource": {
569 |                "type": "prometheus",
570 |                "uid": "${datasource}"
571 |             },
572 |             "includeAll": false,
573 |             "label": "Task",
574 |             "multi": true,
575 |             "name": "task",
576 |             "query": "label_values(celery_task_received_total{namespace=\"$namespace\", job=\"$job\", queue_name=~\"$queue_name\", name!~\"None\"}, name)",
577 |             "refresh": 2,
578 |             "sort": 1,
579 |             "type": "query"
580 |          }
581 |       ]
582 |    },
583 |    "time": {
584 |       "from": "now-2d",
585 |       "to": "now"
586 |    },
587 |    "timezone": "utc",
588 |    "title": "Celery / Tasks / By Task",
589 |    "uid": "celery-tasks-by-task-32s3"
590 | }
591 | 


--------------------------------------------------------------------------------
/celery-mixin/dashboards_out/celery-tasks.json:
--------------------------------------------------------------------------------
  1 | {
  2 |    "__inputs": [ ],
  3 |    "__requires": [ ],
  4 |    "annotations": {
  5 |       "list": [ ]
  6 |    },
  7 |    "description": "A dashboard that monitors Celery. It is created using the Celery-mixin for the the (Celery-exporter)[https://github.com/danihodovic/celery-exporter]",
  8 |    "editable": false,
  9 |    "gnetId": null,
 10 |    "graphTooltip": 0,
 11 |    "hideControls": false,
 12 |    "id": null,
 13 |    "links": [ ],
 14 |    "panels": [
 15 |       {
 16 |          "collapse": false,
 17 |          "collapsed": false,
 18 |          "gridPos": {
 19 |             "h": 1,
 20 |             "w": 24,
 21 |             "x": 0,
 22 |             "y": 0
 23 |          },
 24 |          "id": 2,
 25 |          "panels": [ ],
 26 |          "repeat": null,
 27 |          "repeatIteration": null,
 28 |          "repeatRowId": null,
 29 |          "showTitle": true,
 30 |          "title": "Summary",
 31 |          "titleSize": "h6",
 32 |          "type": "row"
 33 |       },
 34 |       {
 35 |          "datasource": "$datasource",
 36 |          "fieldConfig": {
 37 |             "defaults": {
 38 |                "links": [ ],
 39 |                "mappings": [ ],
 40 |                "thresholds": {
 41 |                   "mode": "absolute",
 42 |                   "steps": [ ]
 43 |                },
 44 |                "unit": "none"
 45 |             }
 46 |          },
 47 |          "gridPos": {
 48 |             "h": 4,
 49 |             "w": 4,
 50 |             "x": 0,
 51 |             "y": 1
 52 |          },
 53 |          "id": 3,
 54 |          "links": [ ],
 55 |          "options": {
 56 |             "colorMode": "value",
 57 |             "graphMode": "area",
 58 |             "justifyMode": "auto",
 59 |             "orientation": "auto",
 60 |             "reduceOptions": {
 61 |                "calcs": [
 62 |                   "last"
 63 |                ],
 64 |                "fields": "",
 65 |                "values": false
 66 |             },
 67 |             "textMode": "auto"
 68 |          },
 69 |          "pluginVersion": "7",
 70 |          "targets": [
 71 |             {
 72 |                "expr": "count(celery_worker_up{job=~\"celery|celery-exporter\"} == 1)",
 73 |                "format": "time_series",
 74 |                "intervalFactor": 1,
 75 |                "legendFormat": "",
 76 |                "refId": "A"
 77 |             }
 78 |          ],
 79 |          "title": "Celery Workers",
 80 |          "transparent": false,
 81 |          "type": "stat"
 82 |       },
 83 |       {
 84 |          "datasource": "$datasource",
 85 |          "fieldConfig": {
 86 |             "defaults": {
 87 |                "links": [ ],
 88 |                "mappings": [ ],
 89 |                "thresholds": {
 90 |                   "mode": "absolute",
 91 |                   "steps": [ ]
 92 |                },
 93 |                "unit": "none"
 94 |             }
 95 |          },
 96 |          "gridPos": {
 97 |             "h": 4,
 98 |             "w": 5,
 99 |             "x": 4,
100 |             "y": 1
101 |          },
102 |          "id": 4,
103 |          "links": [ ],
104 |          "options": {
105 |             "colorMode": "value",
106 |             "graphMode": "area",
107 |             "justifyMode": "auto",
108 |             "orientation": "auto",
109 |             "reduceOptions": {
110 |                "calcs": [
111 |                   "last"
112 |                ],
113 |                "fields": "",
114 |                "values": false
115 |             },
116 |             "textMode": "auto"
117 |          },
118 |          "pluginVersion": "7",
119 |          "targets": [
120 |             {
121 |                "expr": "sum(celery_worker_tasks_active{job=~\"celery|celery-exporter\"})",
122 |                "format": "time_series",
123 |                "intervalFactor": 1,
124 |                "legendFormat": "",
125 |                "refId": "A"
126 |             }
127 |          ],
128 |          "title": "Tasks Active",
129 |          "transparent": false,
130 |          "type": "stat"
131 |       },
132 |       {
133 |          "datasource": "$datasource",
134 |          "fieldConfig": {
135 |             "defaults": {
136 |                "links": [ ],
137 |                "mappings": [ ],
138 |                "thresholds": {
139 |                   "mode": "absolute",
140 |                   "steps": [ ]
141 |                },
142 |                "unit": "none"
143 |             }
144 |          },
145 |          "gridPos": {
146 |             "h": 4,
147 |             "w": 5,
148 |             "x": 9,
149 |             "y": 1
150 |          },
151 |          "id": 5,
152 |          "links": [ ],
153 |          "options": {
154 |             "colorMode": "value",
155 |             "graphMode": "area",
156 |             "justifyMode": "auto",
157 |             "orientation": "auto",
158 |             "reduceOptions": {
159 |                "calcs": [
160 |                   "last"
161 |                ],
162 |                "fields": "",
163 |                "values": false
164 |             },
165 |             "textMode": "auto"
166 |          },
167 |          "pluginVersion": "7",
168 |          "targets": [
169 |             {
170 |                "expr": "sum(round(increase(celery_task_received_total{job=~\"celery|celery-exporter\"}[1d])))\n",
171 |                "format": "time_series",
172 |                "intervalFactor": 2,
173 |                "legendFormat": "",
174 |                "refId": "A"
175 |             }
176 |          ],
177 |          "title": "Tasks received by workers last 24h",
178 |          "transparent": false,
179 |          "type": "stat"
180 |       },
181 |       {
182 |          "datasource": "$datasource",
183 |          "fieldConfig": {
184 |             "defaults": {
185 |                "links": [ ],
186 |                "mappings": [ ],
187 |                "thresholds": {
188 |                   "mode": "absolute",
189 |                   "steps": [
190 |                      {
191 |                         "color": "green",
192 |                         "value": 0.94999999999999996
193 |                      }
194 |                   ]
195 |                },
196 |                "unit": "percentunit"
197 |             }
198 |          },
199 |          "gridPos": {
200 |             "h": 4,
201 |             "w": 5,
202 |             "x": 14,
203 |             "y": 1
204 |          },
205 |          "id": 6,
206 |          "links": [ ],
207 |          "options": {
208 |             "colorMode": "value",
209 |             "graphMode": "area",
210 |             "justifyMode": "auto",
211 |             "orientation": "auto",
212 |             "reduceOptions": {
213 |                "calcs": [
214 |                   "last"
215 |                ],
216 |                "fields": "",
217 |                "values": false
218 |             },
219 |             "textMode": "auto"
220 |          },
221 |          "pluginVersion": "7",
222 |          "targets": [
223 |             {
224 |                "expr": "sum(round(increase(celery_task_succeeded_total{job=~\"celery|celery-exporter\"}[1d])))\n/(sum(round(increase(celery_task_succeeded_total{job=~\"celery|celery-exporter\"}[1d])))\n+sum(round(increase(celery_task_failed_total{job=~\"celery|celery-exporter\"}[1d])))\n)\n",
225 |                "format": "time_series",
226 |                "intervalFactor": 2,
227 |                "legendFormat": "",
228 |                "refId": "A"
229 |             }
230 |          ],
231 |          "title": "Successful completion rate last 24h",
232 |          "transparent": false,
233 |          "type": "stat"
234 |       },
235 |       {
236 |          "datasource": "$datasource",
237 |          "fieldConfig": {
238 |             "defaults": {
239 |                "links": [ ],
240 |                "mappings": [ ],
241 |                "thresholds": {
242 |                   "mode": "absolute",
243 |                   "steps": [ ]
244 |                },
245 |                "unit": "none"
246 |             }
247 |          },
248 |          "gridPos": {
249 |             "h": 4,
250 |             "w": 5,
251 |             "x": 19,
252 |             "y": 1
253 |          },
254 |          "id": 7,
255 |          "links": [ ],
256 |          "options": {
257 |             "colorMode": "value",
258 |             "graphMode": "area",
259 |             "justifyMode": "auto",
260 |             "orientation": "auto",
261 |             "reduceOptions": {
262 |                "calcs": [
263 |                   "last"
264 |                ],
265 |                "fields": "",
266 |                "values": false
267 |             },
268 |             "textMode": "auto"
269 |          },
270 |          "pluginVersion": "7",
271 |          "targets": [
272 |             {
273 |                "expr": "sum(rate(celery_task_runtime_sum{job=~\"celery|celery-exporter\"}[1d])) / sum(rate(celery_task_runtime_count{job=~\"celery|celery-exporter\"}[1d])) > 0\n",
274 |                "format": "time_series",
275 |                "intervalFactor": 2,
276 |                "legendFormat": "",
277 |                "refId": "A"
278 |             }
279 |          ],
280 |          "title": "Average Runtime for Tasks last 24h",
281 |          "transparent": false,
282 |          "type": "stat"
283 |       },
284 |       {
285 |          "columns": [ ],
286 |          "datasource": "$datasource",
287 |          "gridPos": {
288 |             "h": 8,
289 |             "w": 8,
290 |             "x": 0,
291 |             "y": 5
292 |          },
293 |          "id": 8,
294 |          "links": [ ],
295 |          "sort": {
296 |             "col": 2,
297 |             "desc": true
298 |          },
299 |          "span": "4",
300 |          "styles": [
301 |             {
302 |                "alias": "Time",
303 |                "dateFormat": "YYYY-MM-DD HH:mm:ss",
304 |                "pattern": "Time",
305 |                "type": "hidden"
306 |             },
307 |             {
308 |                "alias": "Task",
309 |                "pattern": "name"
310 |             }
311 |          ],
312 |          "targets": [
313 |             {
314 |                "expr": "round(topk(5, sum by (name) (increase(celery_task_failed_total{job=~\"celery|celery-exporter\"}[1d]) > 0 )))\n",
315 |                "format": "table",
316 |                "instant": true,
317 |                "intervalFactor": 2,
318 |                "legendFormat": "",
319 |                "refId": "A"
320 |             }
321 |          ],
322 |          "timeFrom": null,
323 |          "timeShift": null,
324 |          "title": "Top 5 failed tasks last 24h",
325 |          "type": "table"
326 |       },
327 |       {
328 |          "columns": [ ],
329 |          "datasource": "$datasource",
330 |          "gridPos": {
331 |             "h": 8,
332 |             "w": 8,
333 |             "x": 8,
334 |             "y": 5
335 |          },
336 |          "id": 9,
337 |          "links": [ ],
338 |          "sort": {
339 |             "col": 2,
340 |             "desc": true
341 |          },
342 |          "span": "4",
343 |          "styles": [
344 |             {
345 |                "alias": "Time",
346 |                "dateFormat": "YYYY-MM-DD HH:mm:ss",
347 |                "pattern": "Time",
348 |                "type": "hidden"
349 |             },
350 |             {
351 |                "alias": "Task",
352 |                "pattern": "name"
353 |             }
354 |          ],
355 |          "targets": [
356 |             {
357 |                "expr": "round(topk(5, sum by (exception) (increase(celery_task_failed_total{job=~\"celery|celery-exporter\"}[1d]) > 0 )))\n",
358 |                "format": "table",
359 |                "instant": true,
360 |                "intervalFactor": 2,
361 |                "legendFormat": "",
362 |                "refId": "A"
363 |             }
364 |          ],
365 |          "timeFrom": null,
366 |          "timeShift": null,
367 |          "title": "Top 5 exceptions last 24h",
368 |          "type": "table"
369 |       },
370 |       {
371 |          "columns": [ ],
372 |          "datasource": "$datasource",
373 |          "gridPos": {
374 |             "h": 8,
375 |             "w": 8,
376 |             "x": 16,
377 |             "y": 5
378 |          },
379 |          "id": 10,
380 |          "links": [ ],
381 |          "sort": {
382 |             "col": 2,
383 |             "desc": true
384 |          },
385 |          "span": "4",
386 |          "styles": [
387 |             {
388 |                "alias": "Time",
389 |                "dateFormat": "YYYY-MM-DD HH:mm:ss",
390 |                "pattern": "Time",
391 |                "type": "hidden"
392 |             },
393 |             {
394 |                "alias": "Task",
395 |                "pattern": "name"
396 |             }
397 |          ],
398 |          "targets": [
399 |             {
400 |                "expr": "topk(5, (sum by(name) (rate(celery_task_runtime_sum{job=~\"celery|celery-exporter\"}[1d])) / sum by (name) (rate(celery_task_runtime_count{job=~\"celery|celery-exporter\"}[1d])) > 0 ))\n",
401 |                "format": "table",
402 |                "instant": true,
403 |                "intervalFactor": 2,
404 |                "legendFormat": "",
405 |                "refId": "A"
406 |             }
407 |          ],
408 |          "timeFrom": null,
409 |          "timeShift": null,
410 |          "title": "Top 5 task runtime last 24h",
411 |          "type": "table"
412 |       },
413 |       {
414 |          "collapse": false,
415 |          "collapsed": false,
416 |          "gridPos": {
417 |             "h": 1,
418 |             "w": 24,
419 |             "x": 0,
420 |             "y": 13
421 |          },
422 |          "id": 11,
423 |          "panels": [ ],
424 |          "repeat": null,
425 |          "repeatIteration": null,
426 |          "repeatRowId": null,
427 |          "showTitle": true,
428 |          "title": "Individual Tasks",
429 |          "titleSize": "h6",
430 |          "type": "row"
431 |       },
432 |       {
433 |          "columns": [ ],
434 |          "datasource": "$datasource",
435 |          "gridPos": {
436 |             "h": 8,
437 |             "w": 24,
438 |             "x": 0,
439 |             "y": 14
440 |          },
441 |          "id": 12,
442 |          "links": [ ],
443 |          "sort": {
444 |             "col": 2,
445 |             "desc": false
446 |          },
447 |          "span": "6",
448 |          "styles": [
449 |             {
450 |                "alias": "Time",
451 |                "dateFormat": "YYYY-MM-DD HH:mm:ss",
452 |                "pattern": "Time",
453 |                "type": "hidden"
454 |             },
455 |             {
456 |                "alias": "Task",
457 |                "pattern": "name"
458 |             },
459 |             {
460 |                "alias": "Success Rate",
461 |                "pattern": "Value #A",
462 |                "type": "number",
463 |                "unit": "percentunit"
464 |             },
465 |             {
466 |                "alias": "Received",
467 |                "decimals": "0",
468 |                "pattern": "Value #B",
469 |                "type": "number",
470 |                "unit": "short"
471 |             },
472 |             {
473 |                "alias": "Succeeded",
474 |                "decimals": "0",
475 |                "pattern": "Value #C",
476 |                "type": "number",
477 |                "unit": "short"
478 |             },
479 |             {
480 |                "alias": "Failed",
481 |                "decimals": "0",
482 |                "pattern": "Value #D",
483 |                "type": "number",
484 |                "unit": "short"
485 |             },
486 |             {
487 |                "alias": "Rejected",
488 |                "decimals": "0",
489 |                "pattern": "Value #E",
490 |                "type": "number",
491 |                "unit": "short"
492 |             },
493 |             {
494 |                "alias": "Retried",
495 |                "decimals": "0",
496 |                "pattern": "Value #F",
497 |                "type": "number",
498 |                "unit": "short"
499 |             },
500 |             {
501 |                "alias": "Revoked",
502 |                "decimals": "0",
503 |                "pattern": "Value #G",
504 |                "type": "number",
505 |                "unit": "short"
506 |             }
507 |          ],
508 |          "targets": [
509 |             {
510 |                "expr": "sum by (name) (round(increase(celery_task_succeeded_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[$__range])))\n/(sum by (name) (round(increase(celery_task_succeeded_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[$__range])))\n+sum by (name) (round(increase(celery_task_failed_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[$__range])))\n) > -1\n",
511 |                "format": "table",
512 |                "instant": true,
513 |                "intervalFactor": 2,
514 |                "legendFormat": "",
515 |                "refId": "A"
516 |             },
517 |             {
518 |                "expr": "sum by (name) (round(increase(celery_task_received_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[$__range]))) > 0\n",
519 |                "format": "table",
520 |                "instant": true,
521 |                "intervalFactor": 2,
522 |                "legendFormat": "",
523 |                "refId": "B"
524 |             },
525 |             {
526 |                "expr": "sum by (name) (round(increase(celery_task_succeeded_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[$__range]))) > 0\n",
527 |                "format": "table",
528 |                "instant": true,
529 |                "intervalFactor": 2,
530 |                "legendFormat": "",
531 |                "refId": "C"
532 |             },
533 |             {
534 |                "expr": "sum by (name) (round(increase(celery_task_failed_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[$__range]))) > 0\n",
535 |                "format": "table",
536 |                "instant": true,
537 |                "intervalFactor": 2,
538 |                "legendFormat": "",
539 |                "refId": "D"
540 |             },
541 |             {
542 |                "expr": "sum by (name) (round(increase(celery_task_rejected_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[$__range]))) > 0\n",
543 |                "format": "table",
544 |                "instant": true,
545 |                "intervalFactor": 2,
546 |                "legendFormat": "",
547 |                "refId": "E"
548 |             },
549 |             {
550 |                "expr": "sum by (name) (round(increase(celery_task_retried_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[$__range]))) > 0\n",
551 |                "format": "table",
552 |                "instant": true,
553 |                "intervalFactor": 2,
554 |                "legendFormat": "",
555 |                "refId": "F"
556 |             },
557 |             {
558 |                "expr": "sum by (name) (round(increase(celery_task_revoked_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[$__range]))) > 0\n",
559 |                "format": "table",
560 |                "instant": true,
561 |                "intervalFactor": 2,
562 |                "legendFormat": "",
563 |                "refId": "G"
564 |             }
565 |          ],
566 |          "timeFrom": null,
567 |          "timeShift": null,
568 |          "title": "Task Stats",
569 |          "type": "table"
570 |       },
571 |       {
572 |          "aliasColors": { },
573 |          "bars": false,
574 |          "dashLength": 10,
575 |          "dashes": false,
576 |          "datasource": "$datasource",
577 |          "fill": 1,
578 |          "fillGradient": 0,
579 |          "gridPos": {
580 |             "h": 10,
581 |             "w": 24,
582 |             "x": 0,
583 |             "y": 22
584 |          },
585 |          "id": 13,
586 |          "legend": {
587 |             "alignAsTable": true,
588 |             "avg": true,
589 |             "current": true,
590 |             "hideZero": true,
591 |             "max": false,
592 |             "min": false,
593 |             "rightSide": true,
594 |             "show": true,
595 |             "sideWidth": null,
596 |             "total": false,
597 |             "values": true
598 |          },
599 |          "lines": true,
600 |          "linewidth": 1,
601 |          "links": [ ],
602 |          "nullPointMode": "null",
603 |          "percentage": false,
604 |          "pointradius": 5,
605 |          "points": false,
606 |          "renderer": "flot",
607 |          "repeat": null,
608 |          "seriesOverrides": [ ],
609 |          "spaceLength": 10,
610 |          "stack": false,
611 |          "steppedLine": false,
612 |          "targets": [
613 |             {
614 |                "expr": "sum by (name) (round(increase(celery_task_succeeded_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[10m])))\n",
615 |                "format": "time_series",
616 |                "intervalFactor": 2,
617 |                "legendFormat": "Succeeded - {{ name }}",
618 |                "refId": "A"
619 |             },
620 |             {
621 |                "expr": "sum by (name) (round(increase(celery_task_failed_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[10m])))\n",
622 |                "format": "time_series",
623 |                "intervalFactor": 2,
624 |                "legendFormat": "Failed - {{ name }}",
625 |                "refId": "B"
626 |             },
627 |             {
628 |                "expr": "sum by (name) (round(increase(celery_task_received_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[10m])))\n",
629 |                "format": "time_series",
630 |                "intervalFactor": 2,
631 |                "legendFormat": "Received - {{ name }}",
632 |                "refId": "C"
633 |             },
634 |             {
635 |                "expr": "sum by (name) (round(increase(celery_task_retried_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[10m])))\n",
636 |                "format": "time_series",
637 |                "intervalFactor": 2,
638 |                "legendFormat": "Retried - {{ name }}",
639 |                "refId": "D"
640 |             },
641 |             {
642 |                "expr": "sum by (name) (round(increase(celery_task_rejected_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[10m])))\n",
643 |                "format": "time_series",
644 |                "intervalFactor": 2,
645 |                "legendFormat": "Rejected - {{ name }}",
646 |                "refId": "E"
647 |             },
648 |             {
649 |                "expr": "sum by (name) (round(increase(celery_task_revoked_total{job=~\"celery|celery-exporter\", name=~\"$task\"}[10m])))\n",
650 |                "format": "time_series",
651 |                "intervalFactor": 2,
652 |                "legendFormat": "Revoked - {{ name }}",
653 |                "refId": "F"
654 |             }
655 |          ],
656 |          "thresholds": [ ],
657 |          "timeFrom": null,
658 |          "timeShift": null,
659 |          "title": "Tasks completed with 10m intervals",
660 |          "tooltip": {
661 |             "shared": true,
662 |             "sort": 0,
663 |             "value_type": "individual"
664 |          },
665 |          "type": "graph",
666 |          "xaxis": {
667 |             "buckets": null,
668 |             "mode": "time",
669 |             "name": null,
670 |             "show": true,
671 |             "values": [ ]
672 |          },
673 |          "yaxes": [
674 |             {
675 |                "format": "short",
676 |                "label": null,
677 |                "logBase": 1,
678 |                "max": null,
679 |                "min": null,
680 |                "show": true
681 |             },
682 |             {
683 |                "format": "short",
684 |                "label": null,
685 |                "logBase": 1,
686 |                "max": null,
687 |                "min": null,
688 |                "show": true
689 |             }
690 |          ]
691 |       },
692 |       {
693 |          "aliasColors": { },
694 |          "bars": false,
695 |          "dashLength": 10,
696 |          "dashes": false,
697 |          "datasource": "$datasource",
698 |          "fill": 1,
699 |          "fillGradient": 0,
700 |          "gridPos": {
701 |             "h": 8,
702 |             "w": 24,
703 |             "x": 0,
704 |             "y": 32
705 |          },
706 |          "id": 14,
707 |          "legend": {
708 |             "alignAsTable": true,
709 |             "avg": true,
710 |             "current": true,
711 |             "hideZero": true,
712 |             "max": false,
713 |             "min": false,
714 |             "rightSide": true,
715 |             "show": true,
716 |             "sideWidth": null,
717 |             "total": false,
718 |             "values": true
719 |          },
720 |          "lines": true,
721 |          "linewidth": 1,
722 |          "links": [ ],
723 |          "nullPointMode": "null",
724 |          "percentage": false,
725 |          "pointradius": 5,
726 |          "points": false,
727 |          "renderer": "flot",
728 |          "repeat": null,
729 |          "seriesOverrides": [ ],
730 |          "spaceLength": 10,
731 |          "stack": false,
732 |          "steppedLine": false,
733 |          "targets": [
734 |             {
735 |                "expr": "sum by (name) (rate(celery_task_runtime_sum{job=~\"celery|celery-exporter\", name=~\"$task\"}[10m])) / sum by (name) (rate(celery_task_runtime_count{job=~\"celery|celery-exporter\", name=~\"$task\"}[10m])) > 0\n",
736 |                "format": "time_series",
737 |                "intervalFactor": 2,
738 |                "legendFormat": "{{ name }}",
739 |                "refId": "A"
740 |             }
741 |          ],
742 |          "thresholds": [ ],
743 |          "timeFrom": null,
744 |          "timeShift": null,
745 |          "title": "Tasks Runtime with 10m intervals",
746 |          "tooltip": {
747 |             "shared": true,
748 |             "sort": 0,
749 |             "value_type": "individual"
750 |          },
751 |          "type": "graph",
752 |          "xaxis": {
753 |             "buckets": null,
754 |             "mode": "time",
755 |             "name": null,
756 |             "show": true,
757 |             "values": [ ]
758 |          },
759 |          "yaxes": [
760 |             {
761 |                "format": "short",
762 |                "label": null,
763 |                "logBase": 1,
764 |                "max": null,
765 |                "min": null,
766 |                "show": true
767 |             },
768 |             {
769 |                "format": "short",
770 |                "label": null,
771 |                "logBase": 1,
772 |                "max": null,
773 |                "min": null,
774 |                "show": true
775 |             }
776 |          ]
777 |       }
778 |    ],
779 |    "refresh": "",
780 |    "rows": [ ],
781 |    "schemaVersion": 14,
782 |    "style": "dark",
783 |    "tags": [ ],
784 |    "templating": {
785 |       "list": [
786 |          {
787 |             "current": {
788 |                "text": "Prometheus",
789 |                "value": "Prometheus"
790 |             },
791 |             "hide": 0,
792 |             "label": null,
793 |             "name": "datasource",
794 |             "options": [ ],
795 |             "query": "prometheus",
796 |             "refresh": 1,
797 |             "regex": "",
798 |             "type": "datasource"
799 |          },
800 |          {
801 |             "allValue": null,
802 |             "current": {
803 |                "text": "",
804 |                "value": ""
805 |             },
806 |             "datasource": "$datasource",
807 |             "hide": 0,
808 |             "includeAll": true,
809 |             "label": null,
810 |             "multi": true,
811 |             "name": "task",
812 |             "options": [ ],
813 |             "query": "label_values(celery_task_sent_total, name)",
814 |             "refresh": 1,
815 |             "regex": "",
816 |             "sort": 1,
817 |             "tagValuesQuery": "",
818 |             "tags": [ ],
819 |             "tagsQuery": "",
820 |             "type": "query",
821 |             "useTags": false
822 |          }
823 |       ]
824 |    },
825 |    "time": {
826 |       "from": "now-2d",
827 |       "to": "now"
828 |    },
829 |    "timepicker": {
830 |       "refresh_intervals": [
831 |          "5s",
832 |          "10s",
833 |          "30s",
834 |          "1m",
835 |          "5m",
836 |          "15m",
837 |          "30m",
838 |          "1h",
839 |          "2h",
840 |          "1d"
841 |       ],
842 |       "time_options": [
843 |          "5m",
844 |          "15m",
845 |          "1h",
846 |          "6h",
847 |          "12h",
848 |          "24h",
849 |          "2d",
850 |          "7d",
851 |          "30d"
852 |       ]
853 |    },
854 |    "timezone": "utc",
855 |    "title": "Celery / Tasks",
856 |    "uid": "celery-exporter",
857 |    "version": 0
858 | }
859 | 


--------------------------------------------------------------------------------
/celery-mixin/jsonnetfile.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": 1,
 3 |   "dependencies": [
 4 |     {
 5 |       "source": {
 6 |         "git": {
 7 |           "remote": "https://github.com/grafana/grafonnet.git",
 8 |           "subdir": "gen/grafonnet-latest"
 9 |         }
10 |       },
11 |       "version": "main"
12 |     }
13 |   ],
14 |   "legacyImports": false
15 | }
16 | 


--------------------------------------------------------------------------------
/celery-mixin/mixin.libsonnet:
--------------------------------------------------------------------------------
1 | (import 'alerts/alerts.libsonnet') +
2 | (import 'dashboards/dashboards.libsonnet') +
3 | (import 'config.libsonnet')
4 | 


--------------------------------------------------------------------------------
/celery-mixin/prometheus-alerts.yaml:
--------------------------------------------------------------------------------
 1 | "groups":
 2 | - "name": "celery"
 3 |   "rules":
 4 |   - "alert": "CeleryTaskHighFailRate"
 5 |     "annotations":
 6 |       "dashboard_url": "https://grafana.com/d/celery-tasks-by-task-32s3/celery-tasks-by-task?var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}&var-task={{ $labels.name }}"
 7 |       "description": "More than 5% tasks failed for the task {{ $labels.job }}/{{ $labels.queue_name }}/{{ $labels.name }} the past 10m."
 8 |       "summary": "Celery high task fail rate."
 9 |     "expr": |
10 |       sum(
11 |         increase(
12 |           celery_task_failed_total{
13 |             job=~"celery|celery-exporter",
14 |             queue_name!~"None",
15 |             name!~"None"
16 |           }[10m]
17 |         )
18 |       )  by (job, namespace, queue_name, name)
19 |       /
20 |       (
21 |         sum(
22 |           increase(
23 |             celery_task_failed_total{
24 |               job=~"celery|celery-exporter",
25 |               queue_name!~"None",
26 |               name!~"None"
27 |             }[10m]
28 |           )
29 |         )  by (job, namespace, queue_name, name)
30 |         +
31 |         sum(
32 |           increase(
33 |             celery_task_succeeded_total{
34 |               job=~"celery|celery-exporter",
35 |               queue_name!~"None",
36 |               name!~"None"
37 |             }[10m]
38 |           )
39 |         )  by (job, namespace, queue_name, name)
40 |       )
41 |       * 100 > 5
42 |     "for": "1m"
43 |     "labels":
44 |       "severity": "warning"
45 |   - "alert": "CeleryHighQueueLength"
46 |     "annotations":
47 |       "dashboard_url": "https://grafana.com/d/celery-tasks-overview-32s3/celery-tasks-overview?&var-job={{ $labels.job }}&var-queue_name={{ $labels.queue_name }}"
48 |       "description": "More than 100 tasks in the queue {{ $labels.job }}/{{ $labels.queue_name }} the past 20m."
49 |       "summary": "Celery high queue length."
50 |     "expr": |
51 |       sum(
52 |         celery_queue_length{
53 |           job=~"celery|celery-exporter",
54 |           queue_name!~"None"
55 |         }
56 |       )  by (job, namespace, queue_name)
57 |       > 100
58 |     "for": "20m"
59 |     "labels":
60 |       "severity": "warning"
61 |   - "alert": "CeleryWorkerDown"
62 |     "annotations":
63 |       "dashboard_url": "https://grafana.com/d/celery-tasks-overview-32s3/celery-tasks-overview?&var-job={{ $labels.job }}"
64 |       "description": "The Celery worker {{ $labels.job }}/{{ $labels.hostname }} is offline."
65 |       "summary": "A Celery worker is offline."
66 |     "expr": |
67 |       celery_worker_up{job=~"celery|celery-exporter"} == 0
68 |     "for": "15m"
69 |     "labels":
70 |       "severity": "warning"
71 | 


--------------------------------------------------------------------------------
/celery-mixin/tests.yaml:
--------------------------------------------------------------------------------
 1 | # yamllint disable rule:line-length
 2 | ---
 3 | rule_files:
 4 |   - prometheus-alerts.yaml
 5 | 
 6 | tests:
 7 |   - interval: 5m
 8 |     input_series:
 9 |       - series: 'celery_task_failed_total{job="celery-exporter", namespace="staging", queue_name="celery", name="test-task"}'
10 |         values: "1+10x10"
11 |       - series: 'celery_task_succeeded_total{job="celery-exporter", namespace="staging", queue_name="celery", name="test-task"}'
12 |         values: "1+10x10"
13 |     alert_rule_test:
14 |       - eval_time: 15m
15 |         alertname: CeleryTaskHighFailRate
16 |         exp_alerts:
17 |           - exp_labels:
18 |               job: celery-exporter
19 |               severity: warning
20 |               namespace: staging
21 |               queue_name: celery
22 |               name: test-task
23 |             exp_annotations:
24 |               summary: "Celery high task fail rate."
25 |               description: "More than 5% tasks failed for the task celery-exporter/celery/test-task the past 10m."
26 |               dashboard_url: "https://grafana.com/d/celery-tasks-by-task-32s3/celery-tasks-by-task?var-job=celery-exporter&var-queue_name=celery&var-task=test-task"
27 |   - interval: 1m
28 |     input_series:
29 |       - series: 'celery_queue_length{job="celery-exporter", namespace="staging", queue_name="celery-low-queue"}'
30 |         values: "1+0x50"
31 |       - series: 'celery_queue_length{job="celery-exporter", namespace="staging", queue_name="celery-high-queue"}'
32 |         values: "1000+200x50"
33 |     alert_rule_test:
34 |       - eval_time: 40m
35 |         alertname: CeleryHighQueueLength
36 |         exp_alerts:
37 |           - exp_labels:
38 |               job: celery-exporter
39 |               severity: warning
40 |               namespace: staging
41 |               queue_name: celery-high-queue
42 |             exp_annotations:
43 |               summary: "Celery high queue length."
44 |               description: "More than 100 tasks in the queue celery-exporter/celery-high-queue the past 20m."
45 |               dashboard_url: https://grafana.com/d/celery-tasks-overview-32s3/celery-tasks-overview?&var-job=celery-exporter&var-queue_name=celery-high-queue
46 |   - interval: 1m
47 |     input_series:
48 |       - series: 'celery_worker_up{job="celery-exporter", namespace="staging", hostname="down"}'
49 |         values: "0+0x20"
50 |       - series: 'celery_worker_up{job="celery-exporter", namespace="staging", hostname="up"}'
51 |         values: "1+0x20"
52 |     alert_rule_test:
53 |       - eval_time: 20m
54 |         alertname: CeleryWorkerDown
55 |         exp_alerts:
56 |           - exp_labels:
57 |               job: celery-exporter
58 |               severity: warning
59 |               namespace: staging
60 |               hostname: down
61 |             exp_annotations:
62 |               summary: "A Celery worker is offline."
63 |               description: "The Celery worker celery-exporter/down is offline."
64 |               dashboard_url: "https://grafana.com/d/celery-tasks-overview-32s3/celery-tasks-overview?&var-job=celery-exporter"
65 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v2
 2 | name: celery-exporter
 3 | description: Prometheus exporter for Celery
 4 | type: application
 5 | home: https://github.com/danihodovic/celery-exporter
 6 | keywords:
 7 |   - celery
 8 |   - prometheus
 9 |   - exporter
10 | sources:
11 |   - https://github.com/danihodovic/celery-exporter
12 | maintainers:
13 |   - name: danihodovic
14 |   - name: adinhodovic
15 | 
16 | version: 0.8.0
17 | appVersion: 0.9.2
18 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/README.md:
--------------------------------------------------------------------------------
 1 | # celery-exporter
 2 | 
 3 | ![Version: 0.7.0](https://img.shields.io/badge/Version-0.7.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.9.2](https://img.shields.io/badge/AppVersion-0.9.2-informational?style=flat-square)
 4 | 
 5 | Prometheus exporter for Celery
 6 | 
 7 | **Homepage:** <https://github.com/danihodovic/celery-exporter>
 8 | 
 9 | ## Installation
10 | 
11 | Add the helm repository:
12 | 
13 | ```bash
14 | helm repo add danihodovic https://danihodovic.github.io/celery-exporter/
15 | ```
16 | 
17 | Install the chart:
18 | 
19 | ```bash
20 | helm install celery-exporter danihodovic/celery-exporter
21 | ```
22 | 
23 | 
24 | You'll need to set the enviroment variable `CE_BROKER_URL` to the broker url of your celery instance.
25 | 
26 | For example:
27 | 
28 | ```bash
29 | helm install celery-exporter danihodovic/celery-exporter --set env[0].name=CE_BROKER_URL,env[0].value=redis://redis:6379/0
30 | ```
31 | 
32 | ## Maintainers
33 | 
34 | | Name | Email | Url |
35 | | ---- | ------ | --- |
36 | | danihodovic |  |  |
37 | | adinhodovic |  |  |
38 | 
39 | ## Source Code
40 | 
41 | * <https://github.com/danihodovic/celery-exporter>
42 | 
43 | ## Values
44 | 
45 | | Key | Type | Default | Description |
46 | |-----|------|---------|-------------|
47 | | affinity | object | `{}` |  |
48 | | env | list | `[]` |  |
49 | | fullnameOverride | string | `""` |  |
50 | | image.pullPolicy | string | `"IfNotPresent"` |  |
51 | | image.repository | string | `"danihodovic/celery-exporter"` |  |
52 | | image.tag | string | `""` |  |
53 | | imagePullSecrets | list | `[]` |  |
54 | | ingress.annotations | object | `{}` |  |
55 | | ingress.className | string | `""` |  |
56 | | ingress.enabled | bool | `false` |  |
57 | | ingress.hosts[0].host | string | `"celery-exporter.example"` |  |
58 | | ingress.hosts[0].paths[0].path | string | `"/"` |  |
59 | | ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` |  |
60 | | ingress.tls | list | `[]` |  |
61 | | livenessProbe | object | `{}` |  |
62 | | nameOverride | string | `""` |  |
63 | | nodeSelector | object | `{}` |  |
64 | | podAnnotations | object | `{}` |  |
65 | | podSecurityContext | object | `{}` |  |
66 | | readinessProbe | object | `{}` |  |
67 | | replicaCount | int | `1` |  |
68 | | resources | object | `{}` |  |
69 | | securityContext | object | `{}` |  |
70 | | service.port | int | `9808` |  |
71 | | service.type | string | `"ClusterIP"` |  |
72 | | service.annotations | object | `{}` |  |
73 | | serviceAccount.annotations | object | `{}` |  |
74 | | serviceAccount.create | bool | `true` |  |
75 | | serviceAccount.name | string | `""` |  |
76 | | serviceMonitor.additionalLabels | object | `{}` |  |
77 | | serviceMonitor.enabled | bool | `false` |  |
78 | | serviceMonitor.metricRelabelings | list | `[]` |  |
79 | | serviceMonitor.namespace | string | `""` |  |
80 | | serviceMonitor.namespaceSelector | object | `{}` |  |
81 | | serviceMonitor.relabelings | list | `[]` |  |
82 | | serviceMonitor.scrapeInterval | string | `"30s"` |  |
83 | | serviceMonitor.targetLabels | list | `[]` |  |
84 | | tolerations | list | `[]` |  |
85 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/ci/test-values.yaml:
--------------------------------------------------------------------------------
1 | env:
2 |   - name: CE_BROKER_URL
3 |     value: memory://localhost/
4 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/templates/NOTES.txt:
--------------------------------------------------------------------------------
 1 | 1. Get the application URL by running these commands:
 2 | {{- if .Values.ingress.enabled }}
 3 | {{- range $host := .Values.ingress.hosts }}
 4 |   {{- range .paths }}
 5 |   http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
 6 |   {{- end }}
 7 | {{- end }}
 8 | {{- else if contains "NodePort" .Values.service.type }}
 9 |   export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "celery-exporter.fullname" . }})
10 |   export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
11 |   echo http://$NODE_IP:$NODE_PORT
12 | {{- else if contains "LoadBalancer" .Values.service.type }}
13 |      NOTE: It may take a few minutes for the LoadBalancer IP to be available.
14 |            You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "celery-exporter.fullname" . }}'
15 |   export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "celery-exporter.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
16 |   echo http://$SERVICE_IP:{{ .Values.service.port }}
17 | {{- else if contains "ClusterIP" .Values.service.type }}
18 |   export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "celery-exporter.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
19 |   export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
20 |   echo "Visit http://127.0.0.1:8080 to use your application"
21 |   kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
22 | {{- end }}
23 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/*
 2 | Expand the name of the chart.
 3 | */}}
 4 | {{- define "celery-exporter.name" -}}
 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 6 | {{- end }}
 7 | 
 8 | {{/*
 9 | Create a default fully qualified app name.
10 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
11 | If release name contains chart name it will be used as a full name.
12 | */}}
13 | {{- define "celery-exporter.fullname" -}}
14 | {{- if .Values.fullnameOverride }}
15 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
16 | {{- else }}
17 | {{- $name := default .Chart.Name .Values.nameOverride }}
18 | {{- if contains $name .Release.Name }}
19 | {{- .Release.Name | trunc 63 | trimSuffix "-" }}
20 | {{- else }}
21 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
22 | {{- end }}
23 | {{- end }}
24 | {{- end }}
25 | 
26 | {{/*
27 | Create chart name and version as used by the chart label.
28 | */}}
29 | {{- define "celery-exporter.chart" -}}
30 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
31 | {{- end }}
32 | 
33 | {{/*
34 | Common labels
35 | */}}
36 | {{- define "celery-exporter.labels" -}}
37 | helm.sh/chart: {{ include "celery-exporter.chart" . }}
38 | {{ include "celery-exporter.selectorLabels" . }}
39 | {{- if .Chart.AppVersion }}
40 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
41 | {{- end }}
42 | app.kubernetes.io/managed-by: {{ .Release.Service }}
43 | {{- end }}
44 | 
45 | {{/*
46 | Selector labels
47 | */}}
48 | {{- define "celery-exporter.selectorLabels" -}}
49 | app.kubernetes.io/name: {{ include "celery-exporter.name" . }}
50 | app.kubernetes.io/instance: {{ .Release.Name }}
51 | {{- end }}
52 | 
53 | {{/*
54 | Create the name of the service account to use
55 | */}}
56 | {{- define "celery-exporter.serviceAccountName" -}}
57 | {{- if .Values.serviceAccount.create }}
58 | {{- default (include "celery-exporter.fullname" .) .Values.serviceAccount.name }}
59 | {{- else }}
60 | {{- default "default" .Values.serviceAccount.name }}
61 | {{- end }}
62 | {{- end }}
63 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/templates/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: {{ include "celery-exporter.fullname" . }}
 5 |   labels:
 6 |     {{- include "celery-exporter.labels" . | nindent 4 }}
 7 | spec:
 8 |   replicas: {{ .Values.replicaCount }}
 9 |   selector:
10 |     matchLabels:
11 |       {{- include "celery-exporter.selectorLabels" . | nindent 6 }}
12 |   template:
13 |     metadata:
14 |       {{- with .Values.podAnnotations }}
15 |       annotations:
16 |         {{- toYaml . | nindent 8 }}
17 |       {{- end }}
18 |       labels:
19 |         {{- include "celery-exporter.selectorLabels" . | nindent 8 }}
20 |     spec:
21 |       {{- with .Values.imagePullSecrets }}
22 |       imagePullSecrets:
23 |         {{- toYaml . | nindent 8 }}
24 |       {{- end }}
25 |       serviceAccountName: {{ include "celery-exporter.serviceAccountName" . }}
26 |       securityContext:
27 |         {{- toYaml .Values.podSecurityContext | nindent 8 }}
28 |       containers:
29 |         - name: {{ .Chart.Name }}
30 |           securityContext:
31 |             {{- toYaml .Values.securityContext | nindent 12 }}
32 |           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
33 |           imagePullPolicy: {{ .Values.image.pullPolicy }}
34 |           ports:
35 |             - name: http
36 |               containerPort: 9808
37 |               protocol: TCP
38 |           readinessProbe:
39 |             httpGet:
40 |               path: /health
41 |               port: http
42 |             timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "5" }}
43 |             failureThreshold: {{ .Values.livenessProbe.failureThreshold | default "5" }}
44 |             periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "10" }}
45 |             successThreshold: {{ .Values.livenessProbe.successThreshold | default "1" }}
46 |           livenessProbe:
47 |             httpGet:
48 |               path: /health
49 |               port: http
50 |             timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "5" }}
51 |             failureThreshold: {{ .Values.livenessProbe.failureThreshold | default "5" }}
52 |             periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "10" }}
53 |             successThreshold: {{ .Values.livenessProbe.successThreshold | default "1" }}
54 |           resources:
55 |             {{- toYaml .Values.resources | nindent 12 }}
56 |           {{- with .Values.env }}
57 |           env:
58 |             {{- toYaml . | nindent 12 }}
59 |           {{- end }}
60 |       {{- with .Values.nodeSelector }}
61 |       nodeSelector:
62 |         {{- toYaml . | nindent 8 }}
63 |       {{- end }}
64 |       {{- with .Values.affinity }}
65 |       affinity:
66 |         {{- toYaml . | nindent 8 }}
67 |       {{- end }}
68 |       {{- with .Values.tolerations }}
69 |       tolerations:
70 |         {{- toYaml . | nindent 8 }}
71 |       {{- end }}
72 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/templates/ingress.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.ingress.enabled -}}
 2 | {{- $fullName := include "celery-exporter.fullname" . -}}
 3 | {{- $svcPort := .Values.service.port -}}
 4 | {{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
 5 |   {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
 6 |   {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
 7 |   {{- end }}
 8 | {{- end }}
 9 | {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
10 | apiVersion: networking.k8s.io/v1
11 | {{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
12 | apiVersion: networking.k8s.io/v1beta1
13 | {{- else -}}
14 | apiVersion: extensions/v1beta1
15 | {{- end }}
16 | kind: Ingress
17 | metadata:
18 |   name: {{ $fullName }}
19 |   labels:
20 |     {{- include "celery-exporter.labels" . | nindent 4 }}
21 |   {{- with .Values.ingress.annotations }}
22 |   annotations:
23 |     {{- toYaml . | nindent 4 }}
24 |   {{- end }}
25 | spec:
26 |   {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
27 |   ingressClassName: {{ .Values.ingress.className }}
28 |   {{- end }}
29 |   {{- if .Values.ingress.tls }}
30 |   tls:
31 |     {{- range .Values.ingress.tls }}
32 |     - hosts:
33 |         {{- range .hosts }}
34 |         - {{ . | quote }}
35 |         {{- end }}
36 |       secretName: {{ .secretName }}
37 |     {{- end }}
38 |   {{- end }}
39 |   rules:
40 |     {{- range .Values.ingress.hosts }}
41 |     - host: {{ .host | quote }}
42 |       http:
43 |         paths:
44 |           {{- range .paths }}
45 |           - path: {{ .path }}
46 |             {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
47 |             pathType: {{ .pathType }}
48 |             {{- end }}
49 |             backend:
50 |               {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
51 |               service:
52 |                 name: {{ $fullName }}
53 |                 port:
54 |                   number: {{ $svcPort }}
55 |               {{- else }}
56 |               serviceName: {{ $fullName }}
57 |               servicePort: {{ $svcPort }}
58 |               {{- end }}
59 |           {{- end }}
60 |     {{- end }}
61 | {{- end }}
62 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/templates/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: {{ include "celery-exporter.fullname" . }}
 5 |   labels:
 6 |     {{- include "celery-exporter.labels" . | nindent 4 }}
 7 |   {{- with .Values.service.annotations }}
 8 |   annotations:
 9 |     {{- toYaml . | nindent 4 }}
10 |   {{- end }}
11 | 
12 | spec:
13 |   type: {{ .Values.service.type }}
14 |   ports:
15 |     - port: {{ .Values.service.port }}
16 |       targetPort: http
17 |       protocol: TCP
18 |       name: http
19 |   selector:
20 |     {{- include "celery-exporter.selectorLabels" . | nindent 4 }}
21 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.serviceAccount.create -}}
 2 | apiVersion: v1
 3 | kind: ServiceAccount
 4 | metadata:
 5 |   name: {{ include "celery-exporter.serviceAccountName" . }}
 6 |   labels:
 7 |     {{- include "celery-exporter.labels" . | nindent 4 }}
 8 |   {{- with .Values.serviceAccount.annotations }}
 9 |   annotations:
10 |     {{- toYaml . | nindent 4 }}
11 |   {{- end }}
12 | {{- end }}
13 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/templates/servicemonitor.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.serviceMonitor.enabled -}}
 2 | apiVersion: monitoring.coreos.com/v1
 3 | kind: ServiceMonitor
 4 | metadata:
 5 |   name: {{ include "celery-exporter.fullname" . }}
 6 | {{- if .Values.serviceMonitor.namespace }}
 7 |   namespace: {{ .Values.serviceMonitor.namespace | quote }}
 8 | {{- end }}
 9 |   labels:
10 |     {{- include "celery-exporter.labels" . | nindent 4 }}
11 |   {{- if .Values.serviceMonitor.additionalLabels }}
12 |     {{- toYaml .Values.serviceMonitor.additionalLabels | nindent 4 }}
13 |   {{- end }}
14 | spec:
15 |   endpoints:
16 |     - port: http
17 |       interval: {{ .Values.serviceMonitor.scrapeInterval }}
18 |     {{- if .Values.serviceMonitor.honorLabels }}
19 |       honorLabels: true
20 |     {{- end }}
21 |     {{- if .Values.serviceMonitor.relabelings }}
22 |       relabelings: {{ toYaml .Values.serviceMonitor.relabelings | nindent 8 }}
23 |     {{- end }}
24 |     {{- if .Values.serviceMonitor.metricRelabelings }}
25 |       metricRelabelings: {{ toYaml .Values.serviceMonitor.metricRelabelings | nindent 8 }}
26 |     {{- end }}
27 | {{- if .Values.serviceMonitor.jobLabel }}
28 |   jobLabel: {{ .Values.serviceMonitor.jobLabel | quote }}
29 | {{- end }}
30 | {{- if .Values.serviceMonitor.namespaceSelector }}
31 |   namespaceSelector: {{ toYaml .Values.serviceMonitor.namespaceSelector | nindent 4 }}
32 | {{- else }}
33 |   namespaceSelector:
34 |     matchNames:
35 |       - {{ .Release.Namespace }}
36 | {{- end }}
37 | {{- if .Values.serviceMonitor.targetLabels }}
38 |   targetLabels:
39 |   {{- range .Values.serviceMonitor.targetLabels }}
40 |     - {{ . }}
41 |   {{- end }}
42 | {{- end }}
43 |   selector:
44 |     matchLabels:
45 |       {{- include "celery-exporter.selectorLabels" . | nindent 6 }}
46 | {{- end }}
47 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/templates/tests/test-connection.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: "{{ include "celery-exporter.fullname" . }}-test-connection"
 5 |   labels:
 6 |     {{- include "celery-exporter.labels" . | nindent 4 }}
 7 |   annotations:
 8 |     "helm.sh/hook": test
 9 | spec:
10 |   containers:
11 |     - name: wget
12 |       image: busybox
13 |       command: ['wget']
14 |       args: ['{{ include "celery-exporter.fullname" . }}:{{ .Values.service.port }}']
15 |   restartPolicy: Never
16 | 


--------------------------------------------------------------------------------
/charts/celery-exporter/values.yaml:
--------------------------------------------------------------------------------
  1 | # Default values for celery-exporter.
  2 | # This is a YAML-formatted file.
  3 | # Declare variables to be passed into your templates.
  4 | 
  5 | replicaCount: 1
  6 | 
  7 | image:
  8 |   repository: danihodovic/celery-exporter
  9 |   pullPolicy: IfNotPresent
 10 |   # Overrides the image tag whose default is the chart appVersion.
 11 |   tag: ""
 12 | 
 13 | imagePullSecrets: []
 14 | nameOverride: ""
 15 | fullnameOverride: ""
 16 | 
 17 | serviceAccount:
 18 |   # Specifies whether a service account should be created
 19 |   create: true
 20 |   # Annotations to add to the service account
 21 |   annotations: {}
 22 |   # The name of the service account to use.
 23 |   # If not set and create is true, a name is generated using the fullname template
 24 |   name: ""
 25 | 
 26 | env: []
 27 |   # - name: CE_BROKER_URL
 28 |   #   value: <MY_BROKER_URL>
 29 |   # - name: CE_BROKER_URL
 30 |   #   valueFrom:
 31 |   #     secretKeyRef:
 32 |   #       name: MY_SECRET
 33 |   #       key: MY_SECRET_KEY
 34 | 
 35 | podAnnotations: {}
 36 | 
 37 | podSecurityContext: {}
 38 |   # fsGroup: 2000
 39 | 
 40 | securityContext: {}
 41 |   # capabilities:
 42 |   #   drop:
 43 |   #   - ALL
 44 |   # readOnlyRootFilesystem: true
 45 |   # runAsNonRoot: true
 46 |   # runAsUser: 1000
 47 | 
 48 | service:
 49 |   type: ClusterIP
 50 |   port: 9808
 51 |   annotations: {}
 52 |   #   prometheus.io/scrape: "true"
 53 |   #   prometheus.io/port: "9808"
 54 | 
 55 | ingress:
 56 |   enabled: false
 57 |   className: ""
 58 |   annotations: {}
 59 |     # kubernetes.io/ingress.class: nginx
 60 |     # kubernetes.io/tls-acme: "true"
 61 |   hosts:
 62 |     - host: celery-exporter.example
 63 |       paths:
 64 |         - path: /
 65 |           pathType: ImplementationSpecific
 66 |   tls: []
 67 |   #  - secretName: chart-example-tls
 68 |   #    hosts:
 69 |   #      - chart-example.local
 70 | 
 71 | serviceMonitor:
 72 |   enabled: false
 73 |   additionalLabels: {}
 74 |   ## The label to use to retrieve the job name from.
 75 |   ## jobLabel: "app.kubernetes.io/name"
 76 |   namespace: ""
 77 |   namespaceSelector: {}
 78 |   ## Default: scrape .Release.Namespace only
 79 |   ## To scrape all, use the following:
 80 |   ## namespaceSelector:
 81 |   ##   any: true
 82 |   scrapeInterval: 30s
 83 |   # honorLabels: true
 84 |   targetLabels: []
 85 |   relabelings: []
 86 |   metricRelabelings: []
 87 | 
 88 | resources: {}
 89 |   # We usually recommend not to specify default resources and to leave this as a conscious
 90 |   # choice for the user. This also increases chances charts run on environments with little
 91 |   # resources, such as Minikube. If you do want to specify resources, uncomment the following
 92 |   # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
 93 |   # limits:
 94 |   #   cpu: 100m
 95 |   #   memory: 128Mi
 96 |   # requests:
 97 |   #   cpu: 100m
 98 |   #   memory: 128Mi
 99 | 
100 | livenessProbe: {}
101 |   # Liveness and readiness probe timeout values.
102 |   # timeoutSeconds: 5
103 |   # failureThreshold: 5
104 |   # periodSeconds: 10
105 |   # successThreshold: 1
106 | readinessProbe: {}
107 |   # timeoutSeconds: 15
108 |   # failureThreshold: 5
109 |   # periodSeconds: 10
110 |   # successThreshold: 1
111 | 
112 | nodeSelector: {}
113 | 
114 | tolerations: []
115 | 
116 | affinity: {}
117 | 


--------------------------------------------------------------------------------
/cli.py:
--------------------------------------------------------------------------------
1 | from src.cli import cli
2 | 
3 | if __name__ == "__main__":
4 |     # pylint: disable=no-value-for-parameter,unexpected-keyword-arg
5 |     cli(auto_envvar_prefix="CE")
6 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
  1 | import socket
  2 | import threading
  3 | import copy
  4 | 
  5 | import pytest
  6 | 
  7 | from src.exporter import Exporter
  8 | 
  9 | 
 10 | def pytest_addoption(parser):
 11 |     parser.addoption(
 12 |         "--broker",
 13 |         action="store",
 14 |         default="redis",
 15 |         help="What broker to use in tests",
 16 |         choices=("redis", "rabbitmq", "memory"),
 17 |     )
 18 |     parser.addoption(
 19 |         "--loglevel",
 20 |         action="store",
 21 |         default="INFO",
 22 |         help="Log level of the exporter and celery worker in tests",
 23 |         choices=("DEBUG", "INFO", "WARNING", "ERROR"),
 24 |     )
 25 | 
 26 | 
 27 | @pytest.fixture(scope="session")
 28 | def broker(request):
 29 |     return request.config.getoption("--broker")
 30 | 
 31 | 
 32 | @pytest.fixture(scope="session")
 33 | def log_level(request):
 34 |     return request.config.getoption("--loglevel")
 35 | 
 36 | 
 37 | @pytest.fixture(scope="session")
 38 | def celery_config(broker):
 39 |     config = dict(
 40 |         task_send_sent_event=True,
 41 |         worker_send_task_events=True,
 42 |     )
 43 |     if broker == "redis":
 44 |         config["broker_url"] = "redis://localhost:6379/"  # type: ignore
 45 |     elif broker == "rabbitmq":
 46 |         config["broker_url"] = "amqp://guest:guest@localhost:5672"  # type: ignore
 47 |     elif broker == "memory":
 48 |         config["broker_url"] = "memory://localhost/"  # type: ignore
 49 | 
 50 |     return config
 51 | 
 52 | 
 53 | # https://github.com/celery/celery/pull/6632
 54 | @pytest.fixture(scope="session")
 55 | def celery_worker_parameters(log_level):
 56 |     return dict(
 57 |         loglevel=log_level,
 58 |         without_heartbeat=False,
 59 |     )
 60 | 
 61 | 
 62 | @pytest.fixture(scope="session")
 63 | def celery_enable_logging(log_level):
 64 |     return log_level == "DEBUG"
 65 | 
 66 | 
 67 | @pytest.fixture(scope="session")
 68 | def find_free_port():
 69 |     """
 70 |     https://gist.github.com/bertjwregeer/0be94ced48383a42e70c3d9fff1f4ad0
 71 |     """
 72 | 
 73 |     def _find_free_port():
 74 |         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 75 |         s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
 76 |         s.bind(("0.0.0.0", 0))
 77 |         portnum = s.getsockname()[1]
 78 |         s.close()
 79 | 
 80 |         return portnum
 81 | 
 82 |     return _find_free_port
 83 | 
 84 | 
 85 | # Configurations for exporters
 86 | @pytest.fixture(scope="session")
 87 | def exporter_cfg_defaults(find_free_port, celery_config, log_level):
 88 |     cfg = {
 89 |         "host": "0.0.0.0",
 90 |         "port": find_free_port(),
 91 |         "broker_url": celery_config["broker_url"],
 92 |         "broker_transport_option": ["visibility_timeout=7200"],
 93 |         "broker_ssl_option": [],
 94 |         "retry_interval": 5,
 95 |         "log_level": log_level,
 96 |         "accept_content": None,
 97 |         "worker_timeout": 1,
 98 |         "purge_offline_worker_metrics": 10,
 99 |         "initial_queues": ["queue_from_command_line"],
100 |     }
101 |     yield cfg
102 | 
103 | 
104 | @pytest.fixture()
105 | def exporter_instance(exporter_cfg_defaults, find_free_port):
106 |     exporter_cfg = copy.deepcopy(exporter_cfg_defaults)
107 |     exporter_cfg["port"] = find_free_port()
108 |     exporter = Exporter(
109 |         worker_timeout_seconds=exporter_cfg["worker_timeout"],
110 |         purge_offline_worker_metrics_seconds=exporter_cfg[
111 |             "purge_offline_worker_metrics"
112 |         ],
113 |         initial_queues=exporter_cfg["initial_queues"],
114 |     )
115 |     setattr(exporter, "cfg", exporter_cfg)
116 |     yield exporter
117 | 
118 | 
119 | @pytest.fixture()
120 | def threaded_exporter(exporter_instance):
121 |     thread = threading.Thread(
122 |         target=exporter_instance.run, args=(exporter_instance.cfg,), daemon=True
123 |     )
124 |     thread.start()
125 |     yield exporter_instance
126 | 
127 | 
128 | # Fixtures for same exporter, but with static labels
129 | @pytest.fixture
130 | def exporter_instance_static_labels(exporter_cfg_defaults, find_free_port):
131 |     exporter_cfg = copy.deepcopy(exporter_cfg_defaults)
132 |     exporter_cfg["port"] = find_free_port()
133 |     exporter_cfg["static_label"] = {
134 |         "test_label_1": "test_value",
135 |         "test_label_2_long_named": "test_value_2_long_named",
136 |     }
137 |     exporter = Exporter(
138 |         worker_timeout_seconds=exporter_cfg["worker_timeout"],
139 |         purge_offline_worker_metrics_seconds=exporter_cfg[
140 |             "purge_offline_worker_metrics"
141 |         ],
142 |         initial_queues=exporter_cfg["initial_queues"],
143 |         static_label=exporter_cfg["static_label"],
144 |     )
145 |     setattr(exporter, "cfg", exporter_cfg)
146 |     yield exporter
147 | 
148 | 
149 | @pytest.fixture()
150 | def threaded_exporter_static_labels(exporter_instance_static_labels):
151 |     thread = threading.Thread(
152 |         target=exporter_instance_static_labels.run,
153 |         args=(exporter_instance_static_labels.cfg,),
154 |         daemon=True,
155 |     )
156 |     thread.start()
157 |     yield exporter_instance_static_labels
158 | 
159 | 
160 | @pytest.fixture()
161 | def hostname():
162 |     return socket.gethostname()
163 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: '2.4'
 3 | services:
 4 |   redis:
 5 |     image: 'redis:6'
 6 |     ports: ['6379:6379']
 7 | 
 8 |   rabbitmq:
 9 |     image: rabbitmq:3
10 |     ports: ['5672:5672']
11 | 


--------------------------------------------------------------------------------
/images/celery-tasks-by-task.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danihodovic/celery-exporter/e1160523e5230a44c314f37d21878d150ca97cf3/images/celery-tasks-by-task.png


--------------------------------------------------------------------------------
/images/celery-tasks-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danihodovic/celery-exporter/e1160523e5230a44c314f37d21878d150ca97cf3/images/celery-tasks-overview.png


--------------------------------------------------------------------------------
/jsonnetfile.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": 1,
 3 |   "dependencies": [
 4 |     {
 5 |       "source": {
 6 |         "git": {
 7 |           "remote": "https://github.com/honeylogic-io/utils-libsonnet.git",
 8 |           "subdir": "lib"
 9 |         }
10 |       },
11 |       "version": "master"
12 |     }
13 |   ],
14 |   "legacyImports": true
15 | }
16 | 


--------------------------------------------------------------------------------
/jsonnetfile.lock.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": 1,
 3 |   "dependencies": [
 4 |     {
 5 |       "source": {
 6 |         "git": {
 7 |           "remote": "https://github.com/honeylogic-io/utils-libsonnet.git",
 8 |           "subdir": "lib"
 9 |         }
10 |       },
11 |       "version": "cdcd088b54cf73511db37377841361b61abd5b14",
12 |       "sum": "jWIlMnQDtnLbHE5Aj8eTL0R3sLCe1v0syzXEj9BZwaI="
13 |     }
14 |   ],
15 |   "legacyImports": false
16 | }
17 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "prometheus-exporter-celery"
 3 | version = "0.12.0"
 4 | description = ""
 5 | authors = [
 6 | 	"Dani Hodovic <dani.hodovic@gmail.com>",
 7 | 	"Adin Hodovic <adin@email.com>",
 8 | ]
 9 | license = "MIT"
10 | packages = [
11 |     { include = "src" },
12 | ]
13 | readme = "README.md"
14 | repository = "https://github.com/danihodovic/celery-exporter"
15 | documentation = "https://github.com/danihodovic/celery-exporter"
16 | keywords = ["celery", "task-processing", "prometheus", "grafana", "monitoring"]
17 | classifiers = [
18 | 	"Topic :: System :: Monitoring",
19 | 	"Topic :: System :: Systems Administration",
20 | 	"Topic :: System :: Distributed Computing",
21 | 	"Framework :: Celery",
22 | 	"Framework :: Django",
23 | ]
24 | 
25 | [tool.black]
26 | skip_numeric_underscore_normalization = true
27 | exclude = ".*(venv|virtualenv|.poetry|migrations|node_modules)"
28 | 
29 | [tool.isort]
30 | profile = "black"
31 | multi_line_output = 3
32 | skip = '.virtualenv,.venv,.poetry,.poetry-cache'
33 | 
34 | [tool.poetry.dependencies]
35 | python = ">=3.11,<3.14"
36 | celery = "^5.5.2"
37 | prometheus-client = "^0.21.1"
38 | click = "^8.1.8"
39 | pretty-errors = "^1.2.25"
40 | loguru = "^0.7.3"
41 | redis = "^5.2.0"
42 | Flask = "^3.1.0"
43 | waitress = "^3.0.2"
44 | arrow = "^1.3.0"
45 | timy = "^0.4.2"
46 | 
47 | [tool.poetry.group.dev.dependencies]
48 | pytest = "^8.2.2"
49 | black = "^24.3.0"
50 | isort = "^5.13.2"
51 | jedi = "^0.19.1"
52 | pudb = "^2024.1.3"
53 | requests = "^2.32.3"
54 | pytest-cov = "^4.1.0"
55 | ptpython = "^3.0.25"
56 | pytest-mock = "^3.12.0"
57 | pyinstaller = "^6.13.0"
58 | mypy = "^1.8.0"
59 | types-requests = "^2"
60 | types-waitress = "^3.0.1.20241117"
61 | celery-types = "^0.11.0"
62 | pre-commit = "^2.19.0"
63 | pytest-celery = "^0.0.0"
64 | pylint = "^3.3.1"
65 | certifi = "^2024.8.30"
66 | idna = "^3.7"
67 | 
68 | [build-system]
69 | requires = ["poetry-core>=1.0.0a5"]
70 | build-backend = "poetry.core.masonry.api"
71 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = --pdbcls=pudb.debugger:Debugger --doctest-modules
3 | python_files = tests.py test_*.py
4 | norecursedirs = .git .venv .virtualenv
5 | log_cli = true
6 | filterwarnings =
7 |     ignore::celery.fixups.django.FixupWarning
8 |     ignore::DeprecationWarning
9 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/danihodovic/celery-exporter/e1160523e5230a44c314f37d21878d150ca97cf3/src/__init__.py


--------------------------------------------------------------------------------
/src/cli.py:
--------------------------------------------------------------------------------
  1 | import click
  2 | 
  3 | # pylint: disable=unused-import
  4 | import pretty_errors  # type: ignore
  5 | from prometheus_client import Histogram
  6 | 
  7 | from .exporter import Exporter
  8 | from .help import cmd_help
  9 | 
 10 | # https://github.com/pallets/click/issues/448#issuecomment-246029304
 11 | # pylint: disable=protected-access
 12 | click.core._verify_python3_env = lambda: None  # type: ignore
 13 | 
 14 | default_buckets_str = ",".join(map(str, Histogram.DEFAULT_BUCKETS))
 15 | 
 16 | 
 17 | def _comma_seperated_argument(_ctx, _param, value):
 18 |     if value is not None:
 19 |         return value.split(",")
 20 |     return []
 21 | 
 22 | 
 23 | # Accepts value string in format "key=val". Returns dict {key: val}.
 24 | # * If value is None - returns empty dict
 25 | def _eq_sign_separated_argument_to_dict(_ctx, _param, value):
 26 |     if value is not None:
 27 |         dict_of_key_value_pairs = {}
 28 |         for key_value_pair in value:
 29 |             key, val = key_value_pair.split("=")
 30 |             dict_of_key_value_pairs[key] = val
 31 |         return dict_of_key_value_pairs
 32 |     return {}
 33 | 
 34 | 
 35 | @click.command(help=cmd_help)
 36 | @click.option(
 37 |     "--broker-url",
 38 |     required=True,
 39 |     help="The url to the broker, e.g redis://1.2.3.4",
 40 | )
 41 | @click.option(
 42 |     "--broker-transport-option",
 43 |     required=False,
 44 |     default=[None],
 45 |     multiple=True,
 46 |     help="Celery broker transport option, e.g visibility_timeout=18000",
 47 | )
 48 | @click.option(
 49 |     "--broker-ssl-option",
 50 |     required=False,
 51 |     default=[None],
 52 |     multiple=True,
 53 |     help="Celery broker ssl option, e.g certfile=/var/ssl/amqp-server-cert.pem",
 54 | )
 55 | @click.option(
 56 |     "--accept-content",
 57 |     required=False,
 58 |     default=None,
 59 |     help="Celery accept content options, e.g 'json,pickle'",
 60 | )
 61 | @click.option(
 62 |     "--retry-interval",
 63 |     required=False,
 64 |     default=0,
 65 |     help="Broker exception retry interval in seconds, default is 0 for no retry",
 66 | )
 67 | @click.option(
 68 |     "--host",
 69 |     default="0.0.0.0",
 70 |     show_default=True,
 71 |     help="The host the exporter will listen on",
 72 | )
 73 | @click.option(
 74 |     "--port",
 75 |     type=int,
 76 |     default=9808,
 77 |     show_default=True,
 78 |     help="The port the exporter will listen on",
 79 | )
 80 | @click.option(
 81 |     "--buckets",
 82 |     default=default_buckets_str,
 83 |     show_default=True,
 84 |     help="Buckets for runtime histogram",
 85 | )
 86 | @click.option("--log-level", default="INFO", show_default=True)
 87 | @click.option(
 88 |     "--worker-timeout",
 89 |     default=5 * 60,
 90 |     show_default=True,
 91 |     help="If no heartbeat has been recieved from a worker in this many seconds, "
 92 |     "that a worker will be considered dead. If set to 0, workers will never be "
 93 |     "timed out",
 94 | )
 95 | @click.option(
 96 |     "--purge-offline-worker-metrics",
 97 |     default=10 * 60,
 98 |     show_default=True,
 99 |     help="If no heartbeat has been recieved from a worker in this many seconds, "
100 |     "that a worker will be considered dead. Metrics will be purged for this worker "
101 |     "after this many seconds. If set to 0, metrics will never be purged. Helps "
102 |     "with keeping the cardinality of the metrics low.",
103 | )
104 | @click.option(
105 |     "--generic-hostname-task-sent-metric",
106 |     default=False,
107 |     is_flag=True,
108 |     help="The metric celery_task_sent_total will be labeled with a generic hostname. "
109 |     "This option helps with label cardinality when using a dynamic number of clients "
110 |     "which create tasks. The default behavior is to label the metric with the client's hostname. "
111 |     "Knowing which client sent a task might not be useful for many use cases as for example in "
112 |     "Kubernetes environments where the client's hostname is a random string.",
113 | )
114 | @click.option(
115 |     "-Q",
116 |     "--queues",
117 |     default=None,
118 |     show_default=False,
119 |     callback=_comma_seperated_argument,
120 |     help="A comma seperated list of queues to force metrics to appear for. "
121 |     "Queues not included in this setting will not appear in metrics until at least one worker has "
122 |     "been seen to follow that queue.",
123 | )
124 | @click.option(
125 |     "--metric-prefix",
126 |     default="celery_",
127 |     help="Prefix all metrics with a string. "
128 |     "This option replaces the 'celery_*' part with a custom prefix. ",
129 | )
130 | @click.option(
131 |     "--default-queue-name",
132 |     default="celery",
133 |     help="task_default_queue option for celery."
134 |     "This option is to define default queue name for celery, if queue name is not present in "
135 |     "task parameters. It will be used in prom metrics label value.",
136 | )
137 | @click.option(
138 |     "--static-label",
139 |     required=False,
140 |     default=None,
141 |     multiple=True,
142 |     callback=_eq_sign_separated_argument_to_dict,
143 |     help="Add label with static value to all metrics",
144 | )
145 | def cli(  # pylint: disable=too-many-arguments,too-many-positional-arguments,too-many-locals
146 |     broker_url,
147 |     broker_transport_option,
148 |     accept_content,
149 |     retry_interval,
150 |     host,
151 |     port,
152 |     buckets,
153 |     log_level,
154 |     broker_ssl_option,
155 |     worker_timeout,
156 |     purge_offline_worker_metrics,
157 |     generic_hostname_task_sent_metric,
158 |     queues,
159 |     metric_prefix,
160 |     default_queue_name,
161 |     static_label,
162 | ):  # pylint: disable=unused-argument
163 |     formatted_buckets = list(map(float, buckets.split(",")))
164 |     ctx = click.get_current_context()
165 |     Exporter(
166 |         formatted_buckets,
167 |         worker_timeout,
168 |         purge_offline_worker_metrics,
169 |         generic_hostname_task_sent_metric,
170 |         queues,
171 |         metric_prefix,
172 |         default_queue_name,
173 |         static_label,
174 |     ).run(ctx.params)
175 | 


--------------------------------------------------------------------------------
/src/exporter.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=protected-access,,attribute-defined-outside-init
  2 | import json
  3 | import re
  4 | import sys
  5 | import time
  6 | from collections import defaultdict
  7 | from typing import Callable, Optional
  8 | 
  9 | from celery import Celery
 10 | from celery.events.state import State  # type: ignore
 11 | from celery.utils import nodesplit  # type: ignore
 12 | from celery.utils.time import utcoffset  # type: ignore
 13 | from kombu.exceptions import ChannelError  # type: ignore
 14 | from loguru import logger
 15 | from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram
 16 | 
 17 | from .http_server import start_http_server
 18 | 
 19 | 
 20 | class Exporter:  # pylint: disable=too-many-instance-attributes,too-many-branches
 21 |     state: State = None
 22 | 
 23 |     # pylint: disable=too-many-arguments,too-many-positional-arguments
 24 |     def __init__(
 25 |         self,
 26 |         buckets=None,
 27 |         worker_timeout_seconds=5 * 60,
 28 |         purge_offline_worker_metrics_seconds=10 * 60,
 29 |         generic_hostname_task_sent_metric=False,
 30 |         initial_queues=None,
 31 |         metric_prefix="celery_",
 32 |         default_queue_name="celery",
 33 |         static_label=None,
 34 |     ):
 35 |         self.registry = CollectorRegistry(auto_describe=True)
 36 |         self.queue_cache = set(initial_queues or [])
 37 |         self.worker_last_seen = {}
 38 |         self.worker_timeout_seconds = worker_timeout_seconds
 39 |         self.purge_offline_worker_metrics_after_seconds = (
 40 |             purge_offline_worker_metrics_seconds
 41 |         )
 42 |         self.generic_hostname_task_sent_metric = generic_hostname_task_sent_metric
 43 |         self.default_queue_name = default_queue_name
 44 | 
 45 |         # Static labels
 46 |         self.static_label = static_label or {}
 47 |         self.static_label_keys = self.static_label.keys()
 48 | 
 49 |         self.state_counters = {
 50 |             "task-sent": Counter(
 51 |                 f"{metric_prefix}task_sent",
 52 |                 "Sent when a task message is published.",
 53 |                 ["name", "hostname", "queue_name", *self.static_label_keys],
 54 |                 registry=self.registry,
 55 |             ),
 56 |             "task-received": Counter(
 57 |                 f"{metric_prefix}task_received",
 58 |                 "Sent when the worker receives a task.",
 59 |                 ["name", "hostname", "queue_name", *self.static_label_keys],
 60 |                 registry=self.registry,
 61 |             ),
 62 |             "task-started": Counter(
 63 |                 f"{metric_prefix}task_started",
 64 |                 "Sent just before the worker executes the task.",
 65 |                 ["name", "hostname", "queue_name", *self.static_label_keys],
 66 |                 registry=self.registry,
 67 |             ),
 68 |             "task-succeeded": Counter(
 69 |                 f"{metric_prefix}task_succeeded",
 70 |                 "Sent if the task executed successfully.",
 71 |                 ["name", "hostname", "queue_name", *self.static_label_keys],
 72 |                 registry=self.registry,
 73 |             ),
 74 |             "task-failed": Counter(
 75 |                 f"{metric_prefix}task_failed",
 76 |                 "Sent if the execution of the task failed.",
 77 |                 [
 78 |                     "name",
 79 |                     "hostname",
 80 |                     "exception",
 81 |                     "queue_name",
 82 |                     *self.static_label_keys,
 83 |                 ],
 84 |                 registry=self.registry,
 85 |             ),
 86 |             "task-rejected": Counter(
 87 |                 f"{metric_prefix}task_rejected",
 88 |                 # pylint: disable=line-too-long
 89 |                 "The task was rejected by the worker, possibly to be re-queued or moved to a dead letter queue.",
 90 |                 ["name", "hostname", "queue_name", *self.static_label_keys],
 91 |                 registry=self.registry,
 92 |             ),
 93 |             "task-revoked": Counter(
 94 |                 f"{metric_prefix}task_revoked",
 95 |                 "Sent if the task has been revoked.",
 96 |                 ["name", "hostname", "queue_name", *self.static_label_keys],
 97 |                 registry=self.registry,
 98 |             ),
 99 |             "task-retried": Counter(
100 |                 f"{metric_prefix}task_retried",
101 |                 "Sent if the task failed, but will be retried in the future.",
102 |                 ["name", "hostname", "queue_name", *self.static_label_keys],
103 |                 registry=self.registry,
104 |             ),
105 |         }
106 |         self.celery_worker_up = Gauge(
107 |             f"{metric_prefix}worker_up",
108 |             "Indicates if a worker has recently sent a heartbeat.",
109 |             ["hostname", *self.static_label_keys],
110 |             registry=self.registry,
111 |         )
112 |         self.worker_tasks_active = Gauge(
113 |             f"{metric_prefix}worker_tasks_active",
114 |             "The number of tasks the worker is currently processing",
115 |             ["hostname", *self.static_label_keys],
116 |             registry=self.registry,
117 |         )
118 |         self.celery_task_runtime = Histogram(
119 |             f"{metric_prefix}task_runtime",
120 |             "Histogram of task runtime measurements.",
121 |             ["name", "hostname", "queue_name", *self.static_label_keys],
122 |             registry=self.registry,
123 |             buckets=buckets or Histogram.DEFAULT_BUCKETS,
124 |         )
125 |         self.celery_queue_length = Gauge(
126 |             f"{metric_prefix}queue_length",
127 |             "The number of message in broker queue.",
128 |             ["queue_name", *self.static_label_keys],
129 |             registry=self.registry,
130 |         )
131 |         self.celery_active_consumer_count = Gauge(
132 |             f"{metric_prefix}active_consumer_count",
133 |             "The number of active consumer in broker queue.",
134 |             ["queue_name", *self.static_label_keys],
135 |             registry=self.registry,
136 |         )
137 |         self.celery_active_worker_count = Gauge(
138 |             f"{metric_prefix}active_worker_count",
139 |             "The number of active workers in broker queue.",
140 |             ["queue_name", *self.static_label_keys],
141 |             registry=self.registry,
142 |         )
143 |         self.celery_active_process_count = Gauge(
144 |             f"{metric_prefix}active_process_count",
145 |             "The number of active processes in broker queue.",
146 |             ["queue_name", *self.static_label_keys],
147 |             registry=self.registry,
148 |         )
149 | 
150 |     def scrape(self):
151 |         if (
152 |             self.worker_timeout_seconds > 0
153 |             or self.purge_offline_worker_metrics_after_seconds > 0
154 |         ):
155 |             self.track_timed_out_workers()
156 |         self.track_queue_metrics()
157 | 
158 |     def forget_worker(self, hostname):
159 |         if hostname in self.worker_last_seen:
160 |             self.celery_worker_up.labels(hostname=hostname, **self.static_label).set(0)
161 |             self.worker_tasks_active.labels(hostname=hostname, **self.static_label).set(
162 |                 0
163 |             )
164 |             logger.debug(
165 |                 "Updated gauge='{}' value='{}'", self.worker_tasks_active._name, 0
166 |             )
167 |             logger.debug(
168 |                 "Updated gauge='{}' value='{}'", self.celery_worker_up._name, 0
169 |             )
170 |             self.worker_last_seen[hostname]["forgotten"] = True
171 | 
172 |             # If purging of metrics is enabled we should keep the last seen so that we can
173 |             # use the timestamp to purge the metrics later
174 |             if self.purge_offline_worker_metrics_after_seconds == 0:
175 |                 del self.worker_last_seen[hostname]
176 | 
177 |     def purge_worker_metrics(self, hostname):
178 |         # Prometheus stores a copy of the metrics in memory, so we need to remove them
179 |         # The key of the metrics is a string sequence e.g ('celery(queue_name)', 'host-1(hostname)')
180 |         for label_seq in list(self.worker_tasks_active._metrics.keys()):
181 |             if hostname in label_seq:
182 |                 self.worker_tasks_active.remove(*label_seq)
183 | 
184 |         for label_seq in list(self.celery_worker_up._metrics.keys()):
185 |             if hostname in label_seq:
186 |                 self.celery_worker_up.remove(*label_seq)
187 | 
188 |         for counter in self.state_counters.values():
189 |             for label_seq in list(counter._metrics.keys()):
190 |                 if hostname in label_seq:
191 |                     counter.remove(*label_seq)
192 | 
193 |         for label_seq in list(self.celery_task_runtime._metrics.keys()):
194 |             if hostname in label_seq:
195 |                 self.celery_task_runtime.remove(*label_seq)
196 | 
197 |         del self.worker_last_seen[hostname]
198 | 
199 |     def track_timed_out_workers(self):
200 |         now = time.time()
201 |         # Make a copy of the last seen dict so we can delete from the dict with no issues
202 |         for hostname, worker_status in list(self.worker_last_seen.items()):
203 |             since = now - worker_status["ts"]
204 |             if since > self.worker_timeout_seconds and not worker_status["forgotten"]:
205 |                 logger.info(
206 |                     f"Have not seen {hostname} for {since:0.2f} seconds. "
207 |                     "Removing from metrics"
208 |                 )
209 |                 self.forget_worker(hostname)
210 | 
211 |             if self.purge_offline_worker_metrics_after_seconds > 0:
212 |                 if since > self.purge_offline_worker_metrics_after_seconds:
213 |                     logger.info(
214 |                         f"Have not seen {hostname} for {since:0.2f} seconds. "
215 |                         "Purging worker metrics"
216 |                     )
217 |                     self.purge_worker_metrics(hostname)
218 | 
219 |     def track_queue_metrics(self):
220 |         with self.app.connection() as connection:  # type: ignore
221 |             transport = connection.info()["transport"]
222 |             acceptable_transports = [
223 |                 "redis",
224 |                 "rediss",
225 |                 "amqp",
226 |                 "amqps",
227 |                 "memory",
228 |                 "sentinel",
229 |             ]
230 |             if transport not in acceptable_transports:
231 |                 logger.debug(
232 |                     f"Queue length tracking is only implemented for {acceptable_transports}"
233 |                 )
234 |                 return
235 | 
236 |             concurrency_per_worker = {
237 |                 worker: len(stats["pool"].get("processes", []))
238 |                 for worker, stats in (self.app.control.inspect().stats() or {}).items()
239 |             }
240 |             processes_per_queue = defaultdict(int)
241 |             workers_per_queue = defaultdict(int)
242 | 
243 |             # request workers to response active queues
244 |             # we need to cache queue info in exporter in case all workers are offline
245 |             # so that no worker response to exporter will make active_queues return None
246 |             queues = self.app.control.inspect().active_queues() or {}
247 |             for worker, info_list in queues.items():
248 |                 for queue_info in info_list:
249 |                     name = queue_info["name"]
250 |                     self.queue_cache.add(name)
251 |                     workers_per_queue[name] += 1
252 |                     processes_per_queue[name] += concurrency_per_worker.get(worker, 0)
253 | 
254 |             for queue in self.queue_cache:
255 |                 if transport in ["amqp", "amqps", "memory"]:
256 |                     consumer_count = rabbitmq_queue_consumer_count(connection, queue)
257 |                     self.celery_active_consumer_count.labels(
258 |                         queue_name=queue, **self.static_label
259 |                     ).set(consumer_count)
260 | 
261 |                 self.celery_active_process_count.labels(
262 |                     queue_name=queue, **self.static_label
263 |                 ).set(processes_per_queue[queue])
264 |                 self.celery_active_worker_count.labels(
265 |                     queue_name=queue, **self.static_label
266 |                 ).set(workers_per_queue[queue])
267 |                 length = queue_length(transport, connection, queue)
268 |                 if length is not None:
269 |                     self.celery_queue_length.labels(
270 |                         queue_name=queue, **self.static_label
271 |                     ).set(length)
272 | 
273 |     def track_task_event(self, event):
274 |         self.state.event(event)
275 |         task = self.state.tasks.get(event["uuid"])
276 |         logger.debug("Received event='{}' for task='{}'", event["type"], task.name)
277 | 
278 |         if event["type"] not in self.state_counters:
279 |             logger.warning("No counter matches task state='{}'", task.state)
280 | 
281 |         labels = {
282 |             "name": task.name,
283 |             "hostname": get_hostname(task.hostname),
284 |             "queue_name": getattr(task, "queue", self.default_queue_name),
285 |             **self.static_label,
286 |         }
287 |         if event["type"] == "task-sent" and self.generic_hostname_task_sent_metric:
288 |             labels["hostname"] = "generic"
289 | 
290 |         for counter_name, counter in self.state_counters.items():
291 |             _labels = labels.copy()
292 | 
293 |             if counter_name == "task-failed":
294 |                 if counter_name == event["type"]:
295 |                     _labels["exception"] = get_exception_class_name(task.exception)
296 |                 else:
297 |                     _labels["exception"] = ""
298 | 
299 |             if counter_name == event["type"]:
300 |                 counter.labels(**_labels).inc()
301 |                 logger.debug(
302 |                     "Incremented metric='{}' labels='{}'", counter._name, labels
303 |                 )
304 |             elif (
305 |                 event["type"] != "task-sent"
306 |             ):  # task-sent is sent by various hosts (webservers, task creators etc.) which cause label cardinality # pylint: disable=line-too-long
307 |                 # increase unaffected counters by zero in order to make them visible
308 |                 counter.labels(**_labels).inc(0)
309 | 
310 |         # observe task runtime
311 |         if event["type"] == "task-succeeded":
312 |             self.celery_task_runtime.labels(**labels).observe(task.runtime)
313 |             logger.debug(
314 |                 "Observed metric='{}' labels='{}': {}s",
315 |                 self.celery_task_runtime._name,
316 |                 labels,
317 |                 task.runtime,
318 |             )
319 | 
320 |     def track_worker_status(self, event, is_online):
321 |         value = 1 if is_online else 0
322 |         event_name = "worker-online" if is_online else "worker-offline"
323 |         hostname = get_hostname(event["hostname"])
324 |         logger.debug("Received event='{}' for hostname='{}'", event_name, hostname)
325 |         self.celery_worker_up.labels(hostname=hostname, **self.static_label).set(value)
326 | 
327 |         if is_online:
328 |             self.worker_last_seen[hostname] = {
329 |                 "ts": reverse_adjust_timestamp(
330 |                     event["timestamp"], event.get("utcoffset")
331 |                 ),
332 |                 "forgotten": False,
333 |             }
334 |         else:
335 |             self.forget_worker(hostname)
336 | 
337 |     def track_worker_heartbeat(self, event):
338 |         hostname = get_hostname(event["hostname"])
339 |         logger.debug("Received event='{}' for worker='{}'", event["type"], hostname)
340 | 
341 |         self.worker_last_seen[hostname] = {
342 |             "ts": reverse_adjust_timestamp(event["timestamp"], event.get("utcoffset")),
343 |             "forgotten": False,
344 |         }
345 |         worker_state = self.state.event(event)[0][0]
346 |         active = worker_state.active or 0
347 |         up = 1 if worker_state.alive else 0
348 |         self.celery_worker_up.labels(hostname=hostname, **self.static_label).set(up)
349 |         self.worker_tasks_active.labels(hostname=hostname, **self.static_label).set(
350 |             active
351 |         )
352 |         logger.debug(
353 |             "Updated gauge='{}' value='{}'", self.worker_tasks_active._name, active
354 |         )
355 |         logger.debug("Updated gauge='{}' value='{}'", self.celery_worker_up._name, up)
356 | 
357 |     def run(self, click_params):
358 |         logger.remove()
359 |         logger.add(sys.stdout, level=click_params["log_level"])
360 |         self.app = Celery(broker=click_params["broker_url"])
361 |         if click_params["accept_content"] is not None:
362 |             accept_content_list = click_params["accept_content"].split(",")
363 |             logger.info("Setting celery accept_content {}", accept_content_list)
364 |             self.app.config_from_object(dict(accept_content=accept_content_list))
365 |         transport_options = {}
366 |         for transport_option in click_params["broker_transport_option"]:
367 |             if transport_option is not None:
368 |                 option, value = transport_option.split("=", 1)
369 |                 if option is not None:
370 |                     logger.debug(
371 |                         "Setting celery broker_transport_option {}={}", option, value
372 |                     )
373 |                     transport_options[option] = transform_option_value(value)
374 | 
375 |         if transport_options is not None:
376 |             self.app.conf["broker_transport_options"] = transport_options
377 | 
378 |         ssl_options = {}
379 |         for ssl_option in click_params["broker_ssl_option"]:
380 |             if ssl_option is not None:
381 |                 option, value = ssl_option.split("=", 1)
382 |                 if option is not None:
383 |                     logger.debug("Setting celery ssl_option {}={}", option, value)
384 |                     if value.isnumeric():
385 |                         ssl_options[option] = int(value)
386 |                     else:
387 |                         ssl_options[option] = value
388 | 
389 |         if ssl_options is not None:
390 |             self.app.conf["broker_use_ssl"] = ssl_options
391 | 
392 |         self.state = self.app.events.State()  # type: ignore
393 |         self.retry_interval = click_params["retry_interval"]
394 |         if self.retry_interval:
395 |             logger.debug("Using retry_interval of {} seconds", self.retry_interval)
396 | 
397 |         handlers = {
398 |             "worker-heartbeat": self.track_worker_heartbeat,
399 |             "worker-online": lambda event: self.track_worker_status(event, True),
400 |             "worker-offline": lambda event: self.track_worker_status(event, False),
401 |         }
402 |         for key in self.state_counters:
403 |             handlers[key] = self.track_task_event
404 | 
405 |         with self.app.connection() as connection:  # type: ignore
406 |             start_http_server(
407 |                 self.registry,
408 |                 connection,
409 |                 click_params["host"],
410 |                 click_params["port"],
411 |                 self.scrape,
412 |             )
413 |             while True:
414 |                 try:
415 |                     recv = self.app.events.Receiver(connection, handlers=handlers)  # type: ignore
416 |                     recv.capture(limit=None, timeout=None, wakeup=True)  # type: ignore
417 | 
418 |                 except (KeyboardInterrupt, SystemExit) as ex:
419 |                     raise ex
420 | 
421 |                 except Exception as e:  # pylint: disable=broad-except
422 |                     logger.exception(
423 |                         "celery-exporter exception '{}', retrying in {} seconds.",
424 |                         str(e),
425 |                         self.retry_interval,
426 |                     )
427 |                     if self.retry_interval == 0:
428 |                         raise e
429 | 
430 |                 time.sleep(self.retry_interval)
431 | 
432 | 
433 | exception_pattern = re.compile(r"^(\w+)\(")
434 | 
435 | 
436 | def reverse_adjust_timestamp(
437 |     ts: float, offset: Optional[int] = None, here: Callable[..., float] = utcoffset
438 | ) -> float:
439 |     """Adjust timestamp in reverse of celery, based on provided utcoffset."""
440 |     return ts + ((offset or 0) - here()) * 3600
441 | 
442 | 
443 | def get_exception_class_name(exception_name: str):
444 |     m = exception_pattern.match(exception_name)
445 |     if m:
446 |         return m.group(1)
447 |     return "UnknownException"
448 | 
449 | 
450 | def get_hostname(name: str) -> str:
451 |     """
452 |     Get hostname from celery's hostname.
453 | 
454 |     Celery's hostname contains either worker's name or Process ID in it.
455 |     >>> get_hostname("workername@hostname")
456 |     'hostname'
457 |     >>> get_hostname("gen531@hostname")
458 |     'hostname'
459 | 
460 |     Prometheus suggests it:
461 |     > Do not use labels to store dimensions with high cardinality (many different label values)
462 |     """
463 |     _, hostname = nodesplit(name)
464 |     return hostname
465 | 
466 | 
467 | def transform_option_value(value: str):
468 |     """
469 |     Make an attempt to transform option value to appropriate type
470 | 
471 |     Result type:
472 |         - int - if input contains only digits
473 |         - dict - if input may be correctly decoded from JSON string
474 |         - str - in any other cases
475 |     """
476 |     if value.isnumeric():
477 |         return int(value)
478 |     try:
479 |         return json.loads(value)
480 |     except ValueError:
481 |         return value
482 | 
483 | 
484 | def redis_queue_length(connection, queue: str) -> int:
485 |     return connection.default_channel.client.llen(queue)
486 | 
487 | 
488 | def rabbitmq_queue_length(connection, queue: str) -> int:
489 |     if queue_info := rabbitmq_queue_info(connection, queue):
490 |         return queue_info.message_count
491 |     return 0
492 | 
493 | 
494 | def queue_length(transport, connection, queue: str) -> Optional[int]:
495 |     if transport in ["redis", "rediss", "sentinel"]:
496 |         return redis_queue_length(connection, queue)
497 | 
498 |     if transport in ["amqp", "amqps", "memory"]:
499 |         return rabbitmq_queue_length(connection, queue)
500 | 
501 |     return None
502 | 
503 | 
504 | def rabbitmq_queue_consumer_count(connection, queue: str) -> int:
505 |     if queue_info := rabbitmq_queue_info(connection, queue):
506 |         return queue_info.consumer_count
507 |     return 0
508 | 
509 | 
510 | def rabbitmq_queue_info(connection, queue: str):
511 |     try:
512 |         queue_info = connection.default_channel.queue_declare(queue=queue, passive=True)
513 |         return queue_info
514 |     except ChannelError as ex:
515 |         if "NOT_FOUND" in ex.message:
516 |             logger.debug(f"Queue '{queue}' not found")
517 |             return None
518 |         raise ex
519 | 


--------------------------------------------------------------------------------
/src/help.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=protected-access
 2 | from .exporter import Exporter
 3 | 
 4 | prometheus_logo = """
 5 | @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 6 | @@@@@@@@@@@@@@@@@@@((((((((((((((((((((((@@@@@@@@@@@@@@@@@@@
 7 | @@@@@@@@@@@@@@((((((((((((((@@((((((((((((((((@@@@@@@@@@@@@@
 8 | @@@@@@@@@@@((((((((((((((((@@@(((((((((((((((((((@@@@@@@@@@@
 9 | @@@@@@@@(((((((((((((((((((@@@@(((((@(((((((((((((((@@@@@@@@
10 | @@@@@@(((((((((((((((@@((((@@@@@(((@@(((((((((((((((((@@@@@@
11 | @@@@@((((((((((((((((@@@((@@@@@@@(@@@@((((((((((((((((((@@@@
12 | @@@(((((((((((((((((@@@@(@@@@@@@@(@@@@@((((((((((((((((((@@@
13 | @@(((((((((((((((((@@@@@@@@@@@@@@(@@@@@@((((((((((((((((((@@
14 | @(((((((((((((((((@@@@@@@@@@@@@@@@@@@@@@@((((((((((((((((((@
15 | @(((((((((((((((((@@@@@@@@@@@@@@@@@@@@@@@((((((((((((((((((@
16 | @(((((((((((((((((@@@@@@@@@@@@@@@@@@@@@@@(((((((((((((((((((
17 | (((((((((((((((((((@@@@@@@@@@@@@@@@@@@@@@(((((((((((((((((((
18 | @(((((((((((@@@@((((@@@@@@@@@@@@@@@@@@@(((((@@@@((((((((((((
19 | @((((((((((((@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@((((((((((((@
20 | @((((((((((((((@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@((((((((((((((@
21 | @@((((((((((((((((((((((((((((((((((((((((((((((((((((((((@@
22 | @@@(((((((((((((@@@@@@@@@@@@@@@@@@@@@@@@@@@@(((((((((((((@@@
23 | @@@@((((((((((((@@@@@@@@@@@@@@@@@@@@@@@@@@@@((((((((((((@@@@
24 | @@@@@@((((((((((((((((((((((((((((((((((((((((((((((((@@@@@@
25 | @@@@@@@@((((((((((((((@@@@@@@@@@@@@@@@((((((((((((((@@@@@@@@
26 | @@@@@@@@@@(((((((((((((@@@@@@@@@@@@@@(((((((((((((@@@@@@@@@@
27 | @@@@@@@@@@@@@@(((((((((((#@@@@@@@@@(((((((((((&@@@@@@@@@@@@@
28 | @@@@@@@@@@@@@@@@@@((((((((((((((((((((((((@@@@@@@@@@@@@@@@@@"""
29 | 
30 | cmd_help = (
31 |     prometheus_logo
32 |     + """
33 | 
34 | A Prometheus exporter for Celery.
35 | 
36 | Metrics exposed:
37 | """
38 | )
39 | 
40 | temp_exporter = Exporter()
41 | 
42 | for metric in temp_exporter.state_counters.values():
43 |     cmd_help += f"""
44 | \b
45 | {metric._name}_total
46 | {metric._documentation:30s}
47 | """
48 | 
49 | for metric in [
50 |     temp_exporter.celery_worker_up,
51 |     temp_exporter.worker_tasks_active,
52 |     temp_exporter.celery_task_runtime,
53 | ]:
54 |     cmd_help += f"""
55 | \b
56 | {metric._name}
57 | {metric._documentation:30s}
58 | """
59 | 


--------------------------------------------------------------------------------
/src/http_server.py:
--------------------------------------------------------------------------------
 1 | from threading import Thread
 2 | 
 3 | import kombu.exceptions
 4 | from flask import Blueprint, Flask, current_app, request
 5 | from loguru import logger
 6 | from prometheus_client.exposition import choose_encoder
 7 | from waitress import serve
 8 | 
 9 | blueprint = Blueprint("celery_exporter", __name__)
10 | 
11 | 
12 | @blueprint.route("/")
13 | def index():
14 |     return """
15 | <!doctype html>
16 | <html lang="en">
17 |   <head>
18 |     <!-- Required meta tags -->
19 |     <meta charset="utf-8">
20 |     <title>celery-exporter</title>
21 |   </head>
22 |   <body>
23 |     <h1>Celery Exporter</h1>
24 |     <p><a href="/metrics">Metrics</a></p>
25 |   </body>
26 | </html>
27 | """
28 | 
29 | 
30 | @blueprint.route("/metrics")
31 | def metrics():
32 |     current_app.config["metrics_puller"]()
33 |     encoder, content_type = choose_encoder(request.headers.get("accept"))
34 |     output = encoder(current_app.config["registry"])
35 |     return output, 200, {"Content-Type": content_type}
36 | 
37 | 
38 | @blueprint.route("/health")
39 | def health():
40 |     conn = current_app.config["celery_connection"]
41 |     uri = conn.as_uri()
42 | 
43 |     try:
44 |         conn.ensure_connection(max_retries=3)
45 |     except kombu.exceptions.OperationalError:
46 |         logger.error("Failed to connect to broker='{}'", uri)
47 |         return (f"Failed to connect to broker: '{uri}'", 500)
48 |     except Exception:  # pylint: disable=broad-except
49 |         logger.exception("Unrecognized error")
50 |         return ("Unknown exception", 500)
51 |     return f"Connected to the broker {conn.as_uri()}"
52 | 
53 | 
54 | def start_http_server(registry, celery_connection, host, port, metrics_puller):
55 |     app = Flask(__name__)
56 |     app.config["registry"] = registry
57 |     app.config["celery_connection"] = celery_connection
58 |     app.config["metrics_puller"] = metrics_puller
59 |     app.register_blueprint(blueprint)
60 |     Thread(
61 |         target=serve,
62 |         args=(app,),
63 |         kwargs=dict(host=host, port=port, _quiet=True),
64 |         daemon=True,
65 |     ).start()
66 |     logger.info("Started celery-exporter at host='{}' on port='{}'", host, port)
67 | 


--------------------------------------------------------------------------------
/src/test_cli.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import pytest
  4 | import requests
  5 | from celery.contrib.testing.worker import start_worker  # type: ignore
  6 | from requests.exceptions import HTTPError
  7 | 
  8 | 
  9 | @pytest.mark.celery()
 10 | def test_integration(broker, celery_app, threaded_exporter, hostname):
 11 |     exporter_url = f"http://localhost:{threaded_exporter.cfg['port']}/metrics"
 12 | 
 13 |     @celery_app.task
 14 |     def succeed():
 15 |         pass
 16 | 
 17 |     @celery_app.task
 18 |     def fail():
 19 |         raise HTTPError("Intentional error")
 20 | 
 21 |     time.sleep(1)
 22 |     # Before the first worker starts, make sure queues that the exporter is initialized
 23 |     # with are available anyway. Queues to be detected from workers should not be there yet
 24 |     res = requests.get(exporter_url, timeout=5)
 25 |     assert res.status_code == 200
 26 |     assert 'celery_queue_length{queue_name="queue_from_command_line"} 0.0' in res.text
 27 |     assert (
 28 |         'celery_active_worker_count{queue_name="queue_from_command_line"} 0.0'
 29 |         in res.text
 30 |     )
 31 |     assert (
 32 |         'celery_active_process_count{queue_name="queue_from_command_line"} 0.0'
 33 |         in res.text
 34 |     )
 35 |     assert 'celery_queue_length{queue_name="celery"}' not in res.text
 36 |     assert 'celery_active_worker_count{queue_name="celery"}' not in res.text
 37 |     assert 'celery_active_process_count{queue_name="celery"}' not in res.text
 38 | 
 39 |     # start worker first so the exporter can fetch and cache queue information
 40 |     with start_worker(celery_app, without_heartbeat=False):
 41 |         time.sleep(5)
 42 |         res = requests.get(exporter_url, timeout=5)
 43 |         assert res.status_code == 200
 44 |         assert 'celery_queue_length{queue_name="celery"} 0.0' in res.text, res.text
 45 | 
 46 |         # TODO: Fix this...
 47 |         if broker == "memory":
 48 |             assert (
 49 |                 'celery_active_consumer_count{queue_name="celery"} 0.0' in res.text
 50 |             ), res.text
 51 |         assert 'celery_active_worker_count{queue_name="celery"} 1.0' in res.text
 52 |         assert 'celery_active_process_count{queue_name="celery"} 1.0' in res.text
 53 | 
 54 |     succeed.apply_async()
 55 |     succeed.apply_async()
 56 |     fail.apply_async()
 57 | 
 58 |     # assert celery_queue_length when message in broker but no worker start
 59 |     res = requests.get(exporter_url, timeout=3)
 60 |     assert res.status_code == 200
 61 |     assert 'celery_queue_length{queue_name="celery"} 3.0' in res.text
 62 | 
 63 |     if broker == "memory":
 64 |         assert 'celery_active_consumer_count{queue_name="celery"} 0.0' in res.text
 65 |     assert 'celery_active_worker_count{queue_name="celery"} 0.0' in res.text
 66 |     assert 'celery_active_process_count{queue_name="celery"} 0.0' in res.text
 67 | 
 68 |     # start worker and consume message in broker
 69 |     with start_worker(celery_app, without_heartbeat=False):
 70 |         time.sleep(2)
 71 | 
 72 |     res = requests.get(exporter_url, timeout=3)
 73 |     assert res.status_code == 200
 74 |     # pylint: disable=line-too-long
 75 |     assert (
 76 |         f'celery_task_sent_total{{hostname="{hostname}",name="src.test_cli.succeed",queue_name="celery"}} 2.0'
 77 |         in res.text
 78 |     )
 79 |     assert (
 80 |         f'celery_task_sent_total{{hostname="{hostname}",name="src.test_cli.fail",queue_name="celery"}} 1.0'
 81 |         in res.text
 82 |     )
 83 |     assert (
 84 |         f'celery_task_received_total{{hostname="{hostname}",name="src.test_cli.succeed",queue_name="celery"}} 2.0'
 85 |         in res.text
 86 |     )
 87 |     assert (
 88 |         f'celery_task_received_total{{hostname="{hostname}",name="src.test_cli.fail",queue_name="celery"}} 1.0'
 89 |         in res.text
 90 |     )
 91 |     assert (
 92 |         f'celery_task_started_total{{hostname="{hostname}",name="src.test_cli.succeed",queue_name="celery"}} 2.0'
 93 |         in res.text
 94 |     )
 95 |     assert (
 96 |         f'celery_task_started_total{{hostname="{hostname}",name="src.test_cli.fail",queue_name="celery"}} 1.0'
 97 |         in res.text
 98 |     )
 99 |     assert (
100 |         f'celery_task_succeeded_total{{hostname="{hostname}",name="src.test_cli.succeed",queue_name="celery"}} 2.0'
101 |         in res.text
102 |     )
103 |     assert (
104 |         f'celery_task_failed_total{{exception="HTTPError",hostname="{hostname}",name="src.test_cli.fail",queue_name="celery"}} 1.0'
105 |         in res.text
106 |     )
107 |     assert (
108 |         f'celery_task_runtime_count{{hostname="{hostname}",name="src.test_cli.succeed",queue_name="celery"}} 2.0'
109 |         in res.text
110 |     )
111 |     assert 'celery_queue_length{queue_name="celery"} 0.0' in res.text
112 | 
113 |     # TODO: Fix this...
114 |     if broker == "memory":
115 |         assert 'celery_active_consumer_count{queue_name="celery"} 0.0' in res.text
116 |     assert 'celery_active_worker_count{queue_name="celery"} 0.0' in res.text
117 |     assert 'celery_active_process_count{queue_name="celery"} 0.0' in res.text
118 | 
119 | 
120 | # pylint: disable=too-many-statements
121 | @pytest.mark.celery()
122 | def test_integration_static_labels(
123 |     broker, celery_app, threaded_exporter_static_labels, hostname
124 | ):
125 |     exporter_url = (
126 |         f"http://localhost:{threaded_exporter_static_labels.cfg['port']}/metrics"
127 |     )
128 |     # Substring representing static labels in metrics labels
129 |     static_labels_str = ",".join(
130 |         [
131 |             f'{k}="{v}"'
132 |             for k, v in sorted(
133 |                 threaded_exporter_static_labels.cfg["static_label"].items()
134 |             )
135 |         ]
136 |     )
137 | 
138 |     @celery_app.task
139 |     def succeed():
140 |         pass
141 | 
142 |     @celery_app.task
143 |     def fail():
144 |         raise HTTPError("Intentional error")
145 | 
146 |     time.sleep(1)
147 |     # Before the first worker starts, make sure queues that the exporter is initialized
148 |     # with are available anyway. Queues to be detected from workers should not be there yet
149 |     res = requests.get(exporter_url, timeout=5)
150 |     assert res.status_code == 200
151 |     assert (
152 |         f'celery_queue_length{{queue_name="queue_from_command_line",{static_labels_str}}} 0.0'
153 |         in res.text
154 |     )
155 |     assert (
156 |         # pylint: disable=line-too-long
157 |         f'celery_active_worker_count{{queue_name="queue_from_command_line",{static_labels_str}}} 0.0'
158 |         in res.text
159 |     )
160 |     assert (
161 |         # pylint: disable=line-too-long
162 |         f'celery_active_process_count{{queue_name="queue_from_command_line",{static_labels_str}}} 0.0'
163 |         in res.text
164 |     )
165 |     assert (
166 |         f'celery_queue_length{{queue_name="celery",{static_labels_str}}}'
167 |         not in res.text
168 |     )
169 |     assert (
170 |         f'celery_active_worker_count{{queue_name="celery",{static_labels_str}}}'
171 |         not in res.text
172 |     )
173 |     assert (
174 |         f'celery_active_process_count{{queue_name="celery",{static_labels_str}}}'
175 |         not in res.text
176 |     )
177 | 
178 |     # start worker first so the exporter can fetch and cache queue information
179 |     with start_worker(celery_app, without_heartbeat=False):
180 |         time.sleep(5)
181 |         res = requests.get(exporter_url, timeout=5)
182 |         assert res.status_code == 200
183 |         assert (
184 |             f'celery_queue_length{{queue_name="celery",{static_labels_str}}} 0.0'
185 |             in res.text
186 |         ), res.text
187 | 
188 |         # TODO: Fix this...
189 |         if broker == "memory":
190 |             assert (
191 |                 f'celery_active_consumer_count{{queue_name="celery",{static_labels_str}}} 0.0'
192 |                 in res.text
193 |             ), res.text
194 |         assert (
195 |             f'celery_active_worker_count{{queue_name="celery",{static_labels_str}}} 1.0'
196 |             in res.text
197 |         )
198 |         assert (
199 |             f'celery_active_process_count{{queue_name="celery",{static_labels_str}}} 1.0'
200 |             in res.text
201 |         )
202 | 
203 |     succeed.apply_async()
204 |     succeed.apply_async()
205 |     fail.apply_async()
206 | 
207 |     # assert celery_queue_length when message in broker but no worker start
208 |     res = requests.get(exporter_url, timeout=3)
209 |     assert res.status_code == 200
210 |     assert (
211 |         f'celery_queue_length{{queue_name="celery",{static_labels_str}}} 3.0'
212 |         in res.text
213 |     )
214 | 
215 |     if broker == "memory":
216 |         assert (
217 |             f'celery_active_consumer_count{{queue_name="celery",{static_labels_str}}} 0.0'
218 |             in res.text
219 |         )
220 |     assert (
221 |         f'celery_active_worker_count{{queue_name="celery",{static_labels_str}}} 0.0'
222 |         in res.text
223 |     )
224 |     assert (
225 |         f'celery_active_process_count{{queue_name="celery",{static_labels_str}}} 0.0'
226 |         in res.text
227 |     )
228 | 
229 |     # start worker and consume message in broker
230 |     with start_worker(celery_app, without_heartbeat=False):
231 |         time.sleep(2)
232 | 
233 |     res = requests.get(exporter_url, timeout=3)
234 |     assert res.status_code == 200
235 |     # pylint: disable=line-too-long
236 |     assert (
237 |         f'celery_task_sent_total{{hostname="{hostname}",name="src.test_cli.succeed",queue_name="celery",{static_labels_str}}} 2.0'
238 |         in res.text
239 |     )
240 |     assert (
241 |         f'celery_task_sent_total{{hostname="{hostname}",name="src.test_cli.fail",queue_name="celery",{static_labels_str}}} 1.0'
242 |         in res.text
243 |     )
244 |     assert (
245 |         f'celery_task_received_total{{hostname="{hostname}",name="src.test_cli.succeed",queue_name="celery",{static_labels_str}}} 2.0'
246 |         in res.text
247 |     )
248 |     assert (
249 |         f'celery_task_received_total{{hostname="{hostname}",name="src.test_cli.fail",queue_name="celery",{static_labels_str}}} 1.0'
250 |         in res.text
251 |     )
252 |     assert (
253 |         f'celery_task_started_total{{hostname="{hostname}",name="src.test_cli.succeed",queue_name="celery",{static_labels_str}}} 2.0'
254 |         in res.text
255 |     )
256 |     assert (
257 |         f'celery_task_started_total{{hostname="{hostname}",name="src.test_cli.fail",queue_name="celery",{static_labels_str}}} 1.0'
258 |         in res.text
259 |     )
260 |     assert (
261 |         f'celery_task_succeeded_total{{hostname="{hostname}",name="src.test_cli.succeed",queue_name="celery",{static_labels_str}}} 2.0'
262 |         in res.text
263 |     )
264 |     assert (
265 |         f'celery_task_failed_total{{exception="HTTPError",hostname="{hostname}",name="src.test_cli.fail",queue_name="celery",{static_labels_str}}} 1.0'
266 |         in res.text
267 |     )
268 |     assert (
269 |         f'celery_task_runtime_count{{hostname="{hostname}",name="src.test_cli.succeed",queue_name="celery",{static_labels_str}}} 2.0'
270 |         in res.text
271 |     )
272 |     assert (
273 |         f'celery_queue_length{{queue_name="celery",{static_labels_str}}} 0.0'
274 |         in res.text
275 |     )
276 | 
277 |     # TODO: Fix this...
278 |     if broker == "memory":
279 |         assert (
280 |             f'celery_active_consumer_count{{queue_name="celery",{static_labels_str}}} 0.0'
281 |             in res.text
282 |         )
283 |     assert (
284 |         f'celery_active_worker_count{{queue_name="celery",{static_labels_str}}} 0.0'
285 |         in res.text
286 |     )
287 |     assert (
288 |         f'celery_active_process_count{{queue_name="celery",{static_labels_str}}} 0.0'
289 |         in res.text
290 |     )
291 | 


--------------------------------------------------------------------------------
/src/test_exporter.py:
--------------------------------------------------------------------------------
 1 | from .exporter import transform_option_value
 2 | 
 3 | 
 4 | def test_transform_option_value():
 5 |     test_cases = [
 6 |         {"input": "1423", "expected": 1423},
 7 |         {"input": '{"password": "pass"}', "expected": {"password": "pass"}},
 8 |         {
 9 |             "input": '{invalid_json: "value"}',
10 |             "expected": '{invalid_json: "value"}',
11 |         },
12 |         {"input": "my_master", "expected": "my_master"},
13 |     ]
14 | 
15 |     for case in test_cases:
16 |         assert transform_option_value(case["input"]) == case["expected"]
17 | 


--------------------------------------------------------------------------------
/src/test_http_server.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=unused-argument
 2 | import time
 3 | 
 4 | import pytest
 5 | import requests
 6 | 
 7 | 
 8 | @pytest.mark.celery()
 9 | def test_health(threaded_exporter):
10 |     time.sleep(1)
11 |     res = requests.get(
12 |         f"http://localhost:{threaded_exporter.cfg['port']}/health", timeout=3
13 |     )
14 |     res.raise_for_status()
15 | 
16 | 
17 | def test_index(threaded_exporter):
18 |     time.sleep(1)
19 |     res = requests.get(f"http://localhost:{threaded_exporter.cfg['port']}", timeout=3)
20 |     res.raise_for_status()
21 |     assert "/metrics" in res.text
22 | 


--------------------------------------------------------------------------------
/src/test_metrics.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import time
  3 | 
  4 | import pytest
  5 | from celery.contrib.testing.worker import start_worker  # type: ignore
  6 | from celery.utils.time import adjust_timestamp  # type: ignore
  7 | 
  8 | from src.exporter import reverse_adjust_timestamp
  9 | 
 10 | 
 11 | @pytest.fixture
 12 | def assert_exporter_metric_called(mocker, celery_app, celery_worker, hostname):
 13 |     def fn(metric):
 14 |         labels = mocker.patch.object(metric, "labels")
 15 | 
 16 |         @celery_app.task
 17 |         def slow_task():
 18 |             logging.info("Started the slow task")
 19 |             time.sleep(3)
 20 |             logging.info("Finished the slow task")
 21 | 
 22 |         # Reload so that the worker detects the task
 23 |         celery_worker.reload()
 24 |         slow_task.delay().get()
 25 |         assert labels.call_count >= 1
 26 |         labels.assert_called_with(hostname=hostname)
 27 |         labels.return_value.set.assert_any_call(1)
 28 | 
 29 |     return fn
 30 | 
 31 | 
 32 | @pytest.mark.celery()
 33 | def test_worker_tasks_active(broker, threaded_exporter, assert_exporter_metric_called):
 34 |     if broker != "memory":
 35 |         pytest.skip(
 36 |             reason="test_worker_tasks_active can only be tested for the in-memory broker"
 37 |         )
 38 | 
 39 |     assert_exporter_metric_called(threaded_exporter.worker_tasks_active)
 40 | 
 41 | 
 42 | @pytest.mark.celery()
 43 | def test_worker_heartbeat_status(
 44 |     broker, threaded_exporter, assert_exporter_metric_called
 45 | ):
 46 |     if broker != "memory":
 47 |         pytest.skip(
 48 |             reason="test_worker_tasks_active can only be tested for the in-memory broker"
 49 |         )
 50 | 
 51 |     assert_exporter_metric_called(threaded_exporter.celery_worker_up)
 52 | 
 53 | 
 54 | @pytest.mark.celery()
 55 | def test_worker_status(threaded_exporter, celery_app, hostname):
 56 |     time.sleep(5)
 57 | 
 58 |     with start_worker(celery_app, without_heartbeat=False):
 59 |         time.sleep(2)
 60 |         assert (
 61 |             threaded_exporter.registry.get_sample_value(
 62 |                 "celery_worker_up", labels={"hostname": hostname}
 63 |             )
 64 |             == 1.0
 65 |         )
 66 | 
 67 |     time.sleep(2)
 68 |     assert (
 69 |         threaded_exporter.registry.get_sample_value(
 70 |             "celery_worker_up", labels={"hostname": hostname}
 71 |         )
 72 |         == 0.0
 73 |     )
 74 | 
 75 | 
 76 | @pytest.mark.parametrize(
 77 |     "input_utcoffset, sleep_seconds, expected_metric_value",
 78 |     [
 79 |         (None, 5, 0.0),
 80 |         (0, 5, 0.0),
 81 |         (7, 5, 0.0),
 82 |         (7, 0, 1.0),
 83 |     ],  # Eg: PST (America/Los_Angeles)
 84 | )
 85 | def test_worker_timeout_status(
 86 |     input_utcoffset, sleep_seconds, expected_metric_value, threaded_exporter, hostname
 87 | ):
 88 |     ts = adjust_timestamp(time.time(), (input_utcoffset or 0))
 89 |     threaded_exporter.track_worker_status(
 90 |         {"hostname": hostname, "timestamp": ts, "utcoffset": input_utcoffset}, True
 91 |     )
 92 |     assert (
 93 |         threaded_exporter.registry.get_sample_value(
 94 |             "celery_worker_up", labels={"hostname": hostname}
 95 |         )
 96 |         == 1.0
 97 |     )
 98 |     assert threaded_exporter.worker_last_seen[hostname] == {
 99 |         "forgotten": False,
100 |         "ts": reverse_adjust_timestamp(ts, input_utcoffset),
101 |     }
102 | 
103 |     time.sleep(sleep_seconds)
104 |     threaded_exporter.scrape()
105 |     assert (
106 |         threaded_exporter.registry.get_sample_value(
107 |             "celery_worker_up", labels={"hostname": hostname}
108 |         )
109 |         == expected_metric_value
110 |     )
111 | 
112 | 
113 | @pytest.mark.parametrize(
114 |     "input_utcoffset, sleep_seconds, expected_metric_value",
115 |     [
116 |         (None, 15, None),
117 |         (0, 15, None),
118 |         (7, 15, None),
119 |         (7, 0, 1.0),
120 |     ],  # Eg: PST (America/Los_Angeles)
121 | )
122 | def test_purge_offline_worker_metrics(
123 |     input_utcoffset, sleep_seconds, expected_metric_value, threaded_exporter, hostname
124 | ):
125 |     ts = adjust_timestamp(time.time(), (input_utcoffset or 0))
126 |     threaded_exporter.track_worker_status(
127 |         {"hostname": hostname, "timestamp": ts, "utcoffset": input_utcoffset}, True
128 |     )
129 |     threaded_exporter.worker_tasks_active.labels(hostname=hostname).inc()
130 |     threaded_exporter.celery_task_runtime.labels(
131 |         name="boosh", hostname=hostname, queue_name="test"
132 |     ).observe(1.0)
133 |     threaded_exporter.state_counters["task-sent"].labels(
134 |         name="boosh", hostname=hostname, queue_name="test"
135 |     ).inc()
136 | 
137 |     assert (
138 |         threaded_exporter.registry.get_sample_value(
139 |             "celery_worker_up", labels={"hostname": hostname}
140 |         )
141 |         == 1.0
142 |     )
143 |     assert (
144 |         threaded_exporter.registry.get_sample_value(
145 |             "celery_worker_tasks_active", labels={"hostname": hostname}
146 |         )
147 |         == 1.0
148 |     )
149 |     assert (
150 |         threaded_exporter.registry.get_sample_value(
151 |             "celery_task_runtime_count",
152 |             labels={"hostname": hostname, "queue_name": "test", "name": "boosh"},
153 |         )
154 |         == 1.0
155 |     )
156 |     assert (
157 |         threaded_exporter.registry.get_sample_value(
158 |             "celery_task_sent_total",
159 |             labels={"hostname": hostname, "queue_name": "test", "name": "boosh"},
160 |         )
161 |         == 1.0
162 |     )
163 | 
164 |     assert threaded_exporter.worker_last_seen[hostname] == {
165 |         "forgotten": False,
166 |         "ts": reverse_adjust_timestamp(ts, input_utcoffset),
167 |     }
168 | 
169 |     time.sleep(sleep_seconds)
170 |     threaded_exporter.scrape()
171 |     assert (
172 |         threaded_exporter.registry.get_sample_value(
173 |             "celery_worker_up", labels={"hostname": hostname}
174 |         )
175 |         == expected_metric_value
176 |     )
177 |     assert (
178 |         threaded_exporter.registry.get_sample_value(
179 |             "celery_worker_tasks_active", labels={"hostname": hostname}
180 |         )
181 |         == expected_metric_value
182 |     )
183 |     assert (
184 |         threaded_exporter.registry.get_sample_value(
185 |             "celery_task_runtime_count",
186 |             labels={"hostname": hostname, "queue_name": "test", "name": "boosh"},
187 |         )
188 |         == expected_metric_value
189 |     )
190 |     assert (
191 |         threaded_exporter.registry.get_sample_value(
192 |             "celery_task_sent_total",
193 |             labels={"hostname": hostname, "queue_name": "test", "name": "boosh"},
194 |         )
195 |         == expected_metric_value
196 |     )
197 | 
198 | 
199 | def test_worker_generic_task_sent_hostname(threaded_exporter, celery_app):
200 |     threaded_exporter.generic_hostname_task_sent_metric = True
201 |     time.sleep(5)
202 | 
203 |     @celery_app.task
204 |     def succeed():
205 |         pass
206 | 
207 |     succeed.apply_async()
208 | 
209 |     with start_worker(celery_app, without_heartbeat=False):
210 |         time.sleep(5)
211 |         assert (
212 |             threaded_exporter.registry.get_sample_value(
213 |                 "celery_task_sent_total",
214 |                 labels={
215 |                     "hostname": "generic",
216 |                     "name": "src.test_metrics.succeed",
217 |                     "queue_name": "celery",
218 |                 },
219 |             )
220 |             == 1.0
221 |         )
222 | 


--------------------------------------------------------------------------------
/vendor/github.com/honeylogic-io/utils-libsonnet/lib/celery.libsonnet:
--------------------------------------------------------------------------------
 1 | local k = import 'github.com/grafana/jsonnet-libs/ksonnet-util/kausal.libsonnet';
 2 | local statefulSet = k.apps.v1.statefulSet;
 3 | local container = k.core.v1.container;
 4 | local deployment = k.apps.v1.deployment;
 5 | 
 6 | {
 7 |   createContainers(name, image, command, args, env):: container.new(name, image) +
 8 |                                                       container.withCommand(command) +
 9 |                                                       container.withArgs(args) +
10 |                                                       container.withEnvMap(env) +
11 |                                                       container.withImagePullPolicy('Always'),
12 | 
13 |   worker: {
14 |     new(name, image, replicas=1, command=['celery'], args, env): {
15 |       local containers = $.createContainers(name, image, command, args, env),
16 |       statefulSet: statefulSet.new(name, replicas, containers) +
17 |                    statefulSet.spec.withServiceName(name),
18 |     },
19 |   },
20 |   beat: {
21 |     new(name, image, command=['celery'], args, env): {
22 |       local containers = $.createContainers(name, image, command, args, env),
23 |       deployment: deployment.new(name, replicas=1, containers=containers),
24 |     },
25 |   },
26 | }
27 | 


--------------------------------------------------------------------------------
/vendor/github.com/honeylogic-io/utils-libsonnet/lib/django.libsonnet:
--------------------------------------------------------------------------------
 1 | local k = import 'github.com/grafana/jsonnet-libs/ksonnet-util/kausal.libsonnet';
 2 | local deployment = k.apps.v1.deployment;
 3 | local container = k.core.v1.container;
 4 | local port = k.core.v1.containerPort;
 5 | local service = k.core.v1.service;
 6 | local withInitContainers = deployment.spec.template.spec.withInitContainers;
 7 | local withArgs = container.withArgs;
 8 | 
 9 | {
10 |   new(name, image, envMap): {
11 |     local containers = container.new(name, image) +
12 |                        container.withImagePullPolicy('Always') +
13 |                        container.withVolumeMounts([{
14 |                          name: 'staticfiles',
15 |                          mountPath: '/app/staticfiles',
16 |                        }]) +
17 |                        container.withEnvMap(envMap),
18 |     local webArgs = withArgs(['config.wsgi', '--bind=0.0.0.0:80']),
19 |     local webContainer = containers + container.withPorts([port.new('http', 80)]) +
20 |                          container.withCommand(['gunicorn']) +
21 |                          webArgs,
22 |     local collectstaticArgs = withArgs(['collectstatic', '--no-input', '--clear']),
23 |     local collectstatic = containers +
24 |                           container.withName('collectstatic') +
25 |                           container.withCommand(['./manage.py']) +
26 |                           collectstaticArgs,
27 |     local migrate = containers + container.withName('migrate') +
28 |                     container.withCommand(['./manage.py']) +
29 |                     withArgs(['migrate']),
30 | 
31 |     deployment: deployment.new(name, replicas=1, containers=webContainer)
32 |                 + withInitContainers([collectstatic, migrate])
33 |                 + deployment.spec.template.spec.withVolumes([{
34 |                   name: 'staticfiles',
35 |                   emptyDir: {
36 |                     medium: 'Memory',
37 |                   },
38 |                 }]),
39 |     service: k.util.serviceFor(self.deployment),
40 |   },
41 | }
42 | 


--------------------------------------------------------------------------------
/vendor/github.com/honeylogic-io/utils-libsonnet/lib/drone.libsonnet:
--------------------------------------------------------------------------------
 1 | local pythonStepCommon = {
 2 |   depends_on: ['install-python-deps'],
 3 |   commands: [
 4 |     '. .poetry/env && . $(poetry env info -p)/bin/activate',
 5 |   ],
 6 | };
 7 | 
 8 | local installDepsStep = pythonStepCommon {
 9 |   name: 'install-python-deps',
10 |   depends_on: ['restore-cache'],
11 |   environment: {
12 |     POETRY_CACHE_DIR: '/drone/src/.poetry-cache',
13 |     POETRY_VIRTUALENVS_IN_PROJECT: 'false',
14 |   },
15 |   commands: [
16 |     |||
17 |       export POETRY_HOME=$DRONE_WORKSPACE/.poetry
18 |       if [ ! -d "$POETRY_HOME" ]; then
19 |         curl -fsS -o /tmp/get-poetry.py https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py
20 |         python /tmp/get-poetry.py -y
21 |       fi
22 |     |||,
23 |     '. .poetry/env',
24 |     'poetry install --no-root',
25 |   ],
26 | };
27 | 
28 | local formatStep = pythonStepCommon {
29 |   name: 'format',
30 |   commands+: [
31 |     'black . --check',
32 |     'isort --check-only .',
33 |   ],
34 | };
35 | 
36 | local mypyStep = pythonStepCommon {
37 |   name: 'typecheck',
38 |   commands+: [
39 |     'mypy .',
40 |   ],
41 | };
42 | 
43 | 
44 | local pylintStep = pythonStepCommon {
45 |   name: 'lint',
46 |   commands+: [
47 |     "pylint $(git ls-files -- '*.py' ':!:**/migrations/*.py')",
48 |   ],
49 | };
50 | 
51 | local testStep = pythonStepCommon {
52 |   name: 'test',
53 |   commands+: ['pytest --ignore .poetry --ignore .poetry-cache --cov'],
54 | };
55 | 
56 | 
57 | local pipelineCommon(image) = {
58 |   kind: 'pipeline',
59 |   type: 'docker',
60 |   name: 'python',
61 |   trigger: {
62 |     event: [
63 |       'push',
64 |     ],
65 |   },
66 |   volumes: [
67 |     {
68 |       name: 'cache',
69 |       host: {
70 |         path: '/tmp/cache',
71 |       },
72 |     },
73 |   ],
74 |   steps: [
75 |     installDepsStep { image: image },
76 |     formatStep { image: image },
77 |     mypyStep { image: image },
78 |     pylintStep { image: image },
79 |     testStep { image: image },
80 |   ],
81 | };
82 | 
83 | {
84 |   pythonPipeline: {
85 |     new(pipeline, image): pipelineCommon(image) + pipeline,
86 |   },
87 |   dockerPipeline: {
88 |     kind: 'pipeline',
89 |     type: 'docker',
90 |   },
91 | }
92 | 


--------------------------------------------------------------------------------
/vendor/github.com/honeylogic-io/utils-libsonnet/lib/ingress.libsonnet:
--------------------------------------------------------------------------------
 1 | local k = import 'github.com/grafana/jsonnet-libs/ksonnet-util/kausal.libsonnet';
 2 | local ingress = k.networking.v1.ingress;
 3 | 
 4 | local mapRules(host, service, servicePort) = ({ host: host, http: { paths: [{
 5 |                                                 path: '/',
 6 |                                                 pathType: 'Prefix',
 7 |                                                 backend: { service: { name: service, port: { number: servicePort } } },
 8 |                                               }] } });
 9 | 
10 | {
11 |   new(name, hosts, service, servicePort, annotations):
12 |     ingress.new(name)
13 |     + ingress.metadata.withAnnotations(annotations)
14 |     + ingress.spec.withTls([{ hosts: hosts, secretName: name + '-cert' }])
15 |     + ingress.spec.withRules([mapRules(host, service, servicePort) for host in hosts]),
16 | }
17 | 


--------------------------------------------------------------------------------
/vendor/lib:
--------------------------------------------------------------------------------
1 | github.com/honeylogic-io/utils-libsonnet/lib


--------------------------------------------------------------------------------