├── .coveragerc
├── .dockerignore
├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── Dockerfile-celery3
├── Dockerfile-celery4
├── LICENSE.txt
├── MANIFEST.in
├── Makefile
├── README.rst
├── celery_prometheus_exporter.py
├── celeryapp.py
├── docker-compose.yml
├── docker-entrypoint.sh
├── requirements
    ├── base.txt
    ├── celery3.txt
    ├── celery4.txt
    ├── promclient030.txt
    ├── promclient050.txt
    └── test.txt
├── setup.py
├── test
    ├── celery_test_utils.py
    └── test_unit.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source = celery_prometheus_exporter
 3 | 
 4 | [report]
 5 | fail_under = 100
 6 | show_missing = True
 7 | 
 8 | [paths]
 9 | source = celery_prometheus_exporter
10 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | *.img


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.img
 2 | /dist
 3 | /build
 4 | /*.egg-info
 5 | 
 6 | *.pyc
 7 | __pycache__
 8 | .coverage
 9 | .tox/
10 | .cache/
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: python
 3 | 
 4 | python:
 5 |   - "2.7" 
 6 |   - "3.4"
 7 |   - "3.5"
 8 |   - "3.6"
 9 | 
10 | install: pip install tox-travis tox
11 | script: tox
12 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | The initial release of celery-prometheus-exporter was intended as a minimal
2 | solution that would cover what I personally needed at my own projects. That
3 | being said, you might need completely different kinds of metrics being
4 | exposed. If you do, please feel free to create tickets and pull requests 🙂 As
5 | such, the more details you can provide in your tickets the better.
6 | 
7 | I will try to look into each issue but please note that I might not be available
8 | all the time and that timezones exist. Please be patient 😊
9 | 


--------------------------------------------------------------------------------
/Dockerfile-celery3:
--------------------------------------------------------------------------------
 1 | FROM python:3.6-alpine
 2 | MAINTAINER Horst Gutmann <horst@zerokspot.com>
 3 | 
 4 | RUN mkdir -p /app/requirements
 5 | ADD requirements/* /app/requirements/
 6 | WORKDIR /app
 7 | 
 8 | ENV PYTHONUNBUFFERED 1
 9 | RUN pip install -r requirements/promclient050.txt -r requirements/celery3.txt
10 | ADD celery_prometheus_exporter.py docker-entrypoint.sh /app/
11 | ENTRYPOINT ["/bin/sh", "/app/docker-entrypoint.sh"]
12 | CMD []
13 | 
14 | EXPOSE 8888
15 | 


--------------------------------------------------------------------------------
/Dockerfile-celery4:
--------------------------------------------------------------------------------
 1 | FROM python:3.6-alpine
 2 | MAINTAINER Horst Gutmann <horst@zerokspot.com>
 3 | 
 4 | RUN mkdir -p /app/requirements
 5 | ADD requirements/* /app/requirements/
 6 | WORKDIR /app
 7 | 
 8 | ENV PYTHONUNBUFFERED 1
 9 | RUN pip install -r requirements/promclient050.txt -r requirements/celery4.txt
10 | ADD celery_prometheus_exporter.py docker-entrypoint.sh /app/
11 | ENTRYPOINT ["/bin/sh", "/app/docker-entrypoint.sh"]
12 | CMD []
13 | 
14 | EXPOSE 8888
15 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016, Horst Gutmann
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst celery_prometheus_exporter.py


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: celery_exporter-celery3.img celery_exporter-celery4.img
 2 | 
 3 | celery_exporter-celery3.img: celery_prometheus_exporter.py Dockerfile-celery3 requirements/*
 4 | 	docker build -f Dockerfile-celery3 -t celery_exporter:1-celery3 .
 5 | 	docker save -o $@ celery_exporter:1-celery3
 6 | 
 7 | celery_exporter-celery4.img: celery_prometheus_exporter.py Dockerfile-celery4 requirements/*
 8 | 	docker build -f Dockerfile-celery4 -t celery_exporter:1-celery4 .
 9 | 	docker save -o $@ celery_exporter:1-celery4
10 | 
11 | .PHONY: clean all
12 | clean:
13 | 	rm -rf celery_exporter.img *.egg-info build dist
14 | 
15 | publish: all
16 | 	docker tag celery_exporter:1-celery3 zerok/celery_exporter:1-celery3
17 | 	docker tag celery_exporter:1-celery3 zerok/celery_exporter:1.3.0-celery3
18 | 	docker tag celery_exporter:1-celery4 zerok/celery_exporter:1-celery4
19 | 	docker tag celery_exporter:1-celery4 zerok/celery_exporter:1.3.0-celery4
20 | 	docker push zerok/celery_exporter:1-celery4
21 | 	docker push zerok/celery_exporter:1.3.0-celery4
22 | 	docker push zerok/celery_exporter:1-celery3
23 | 	docker push zerok/celery_exporter:1.3.0-celery3
24 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | ==========================
  2 | celery-prometheus-exporter
  3 | ==========================
  4 | 
  5 | .. admonition:: info
  6 | 
  7 |    Sadly, for the last couple of months at the time of writing this
  8 |    (Sept 2019) I couldn't find the time to maintain this package
  9 |    anymore. I therefore decided to archive it. If you find this code
 10 |    useful, please fork it!
 11 | 
 12 |    A big "THANK YOU" goes to everyone who contributed to this project
 13 |    over the years!
 14 | 
 15 | .. image:: https://img.shields.io/docker/automated/zerok/celery-prometheus-exporter.svg?maxAge=2592000
 16 |     :target: https://hub.docker.com/r/zerok/celery-prometheus-exporter/
 17 | 
 18 | celery-prometheus-exporter is a little exporter for Celery related metrics in
 19 | order to get picked up by Prometheus. As with other exporters like
 20 | mongodb\_exporter or node\_exporter this has been implemented as a
 21 | standalone-service to make reuse easier across different frameworks.
 22 | 
 23 | So far it provides access to the following metrics:
 24 | 
 25 | * ``celery_tasks`` exposes the number of tasks currently known to the queue
 26 |   grouped by ``state`` (RECEIVED, STARTED, ...).
 27 | * ``celery_tasks_by_name`` exposes the number of tasks currently known to the queue
 28 |   grouped by ``name`` and ``state``.
 29 | * ``celery_workers`` exposes the number of currently probably alive workers
 30 | * ``celery_task_latency`` exposes a histogram of task latency, i.e. the time until
 31 |   tasks are picked up by a worker
 32 | * ``celery_tasks_runtime_seconds`` tracks the number of seconds tasks take
 33 |   until completed as histogram
 34 | 
 35 | 
 36 | How to use
 37 | ==========
 38 | 
 39 | There are multiple ways to install this. The obvious one is using ``pip install
 40 | celery-prometheus-exporter`` and then using the ``celery-prometheus-exporter``
 41 | command::
 42 | 
 43 |   $ celery-prometheus-exporter
 44 |   Starting HTTPD on 0.0.0.0:8888
 45 | 
 46 | This package only depends on Celery directly, so you will have to install
 47 | whatever other dependencies you will need for it to speak with your broker 🙂
 48 | 
 49 | Celery workers have to be configured to send task-related events:
 50 | http://docs.celeryproject.org/en/latest/userguide/configuration.html#worker-send-task-events.
 51 | 
 52 | Running ``celery-prometheus-exporter`` with the ``--enable-events`` argument
 53 | will periodically enable events on the workers. This is useful because it
 54 | allows running celery workers with events disabled, until
 55 | ``celery-prometheus-exporter`` is deployed, at which time events get enabled
 56 | on the workers.
 57 | 
 58 | Alternatively, you can use the bundle Makefile and Dockerfile to generate a
 59 | Docker image.
 60 | 
 61 | By default, the HTTPD will listen at ``0.0.0.0:8888``. If you want the HTTPD
 62 | to listen to another port, use the ``--addr`` option or the environment variable
 63 | ``DEFAULT_ADDR``.
 64 | 
 65 | By default, this will expect the broker to be available through
 66 | ``redis://redis:6379/0``, although you can change via environment variable
 67 | ``BROKER_URL``. If you're using AMQP or something else other than
 68 | Redis, take a look at the Celery documentation and install the additioinal
 69 | requirements 😊 Also use the ``--broker`` option to specify a different broker
 70 | URL.
 71 | 
 72 | If you need to pass additional options to your broker's transport use the
 73 | ``--transport-options``  option. It tries to read a dict from a JSON object.
 74 | E.g. to set your master name when using Redis Sentinel for broker discovery:
 75 | ``--transport-options '{"master_name": "mymaster"}'``
 76 | 
 77 | Use ``--tz`` to specify the timezone the Celery app is using. Otherwise the
 78 | systems local time will be used.
 79 | 
 80 | By default, buckets for histograms are the same as default ones in the prometheus client:
 81 | https://github.com/prometheus/client_python#histogram.
 82 | It means they are intended to cover typical web/rpc requests from milliseconds to seconds,
 83 | so you may want to customize them.
 84 | It can be done via environment variable ``RUNTIME_HISTOGRAM_BUCKETS`` for tasks runtime and
 85 | via environment variable ``LATENCY_HISTOGRAM_BUCKETS`` for tasks latency.
 86 | Buckets should be passed as a list of float values separated by a comma.
 87 | E.g. ``".005, .05, 0.1, 1.0, 2.5"``.
 88 | 
 89 | Use ``--queue-list`` to specify the list of queues that will have its length
 90 | monitored (Automatic Discovery of queues isn't supported right now, see limitations/
 91 | caveats. You can use the `QUEUE_LIST` environment variable as well.
 92 | 
 93 | If you then look at the exposed metrics, you should see something like this::
 94 | 
 95 |   $ http get http://localhost:8888/metrics | grep celery_
 96 |   # HELP celery_workers Number of alive workers
 97 |   # TYPE celery_workers gauge
 98 |   celery_workers 1.0
 99 |   # HELP celery_tasks Number of tasks per state
100 |   # TYPE celery_tasks gauge
101 |   celery_tasks{state="RECEIVED"} 3.0
102 |   celery_tasks{state="PENDING"} 0.0
103 |   celery_tasks{state="STARTED"} 1.0
104 |   celery_tasks{state="RETRY"} 2.0
105 |   celery_tasks{state="FAILURE"} 1.0
106 |   celery_tasks{state="REVOKED"} 0.0
107 |   celery_tasks{state="SUCCESS"} 8.0
108 |   # HELP celery_tasks_by_name Number of tasks per state
109 |   # TYPE celery_tasks_by_name gauge
110 |   celery_tasks_by_name{name="my_app.tasks.calculate_something",state="RECEIVED"} 0.0
111 |   celery_tasks_by_name{name="my_app.tasks.calculate_something",state="PENDING"} 0.0
112 |   celery_tasks_by_name{name="my_app.tasks.calculate_something",state="STARTED"} 0.0
113 |   celery_tasks_by_name{name="my_app.tasks.calculate_something",state="RETRY"} 0.0
114 |   celery_tasks_by_name{name="my_app.tasks.calculate_something",state="FAILURE"} 0.0
115 |   celery_tasks_by_name{name="my_app.tasks.calculate_something",state="REVOKED"} 0.0
116 |   celery_tasks_by_name{name="my_app.tasks.calculate_something",state="SUCCESS"} 1.0
117 |   celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="RECEIVED"} 3.0
118 |   celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="PENDING"} 0.0
119 |   celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="STARTED"} 1.0
120 |   celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="RETRY"} 2.0
121 |   celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="FAILURE"} 1.0
122 |   celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="REVOKED"} 0.0
123 |   celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="SUCCESS"} 7.0
124 |   # HELP celery_task_latency Seconds between a task is received and started.
125 |   # TYPE celery_task_latency histogram
126 |   celery_task_latency_bucket{le="0.005"} 2.0
127 |   celery_task_latency_bucket{le="0.01"} 3.0
128 |   celery_task_latency_bucket{le="0.025"} 4.0
129 |   celery_task_latency_bucket{le="0.05"} 4.0
130 |   celery_task_latency_bucket{le="0.075"} 5.0
131 |   celery_task_latency_bucket{le="0.1"} 5.0
132 |   celery_task_latency_bucket{le="0.25"} 5.0
133 |   celery_task_latency_bucket{le="0.5"} 5.0
134 |   celery_task_latency_bucket{le="0.75"} 5.0
135 |   celery_task_latency_bucket{le="1.0"} 5.0
136 |   celery_task_latency_bucket{le="2.5"} 8.0
137 |   celery_task_latency_bucket{le="5.0"} 11.0
138 |   celery_task_latency_bucket{le="7.5"} 11.0
139 |   celery_task_latency_bucket{le="10.0"} 11.0
140 |   celery_task_latency_bucket{le="+Inf"} 11.0
141 |   celery_task_latency_count 11.0
142 |   celery_task_latency_sum 16.478713035583496
143 |   celery_queue_length{queue_name="queue1"} 35.0
144 |   celery_queue_length{queue_name="queue2"} 0.0
145 | 
146 | Limitations
147 | ===========
148 | 
149 | * Among tons of other features celery-prometheus-exporter doesn't support stats
150 |   for multiple queues. As far as I can tell, only the routing key is exposed
151 |   through the events API which might be enough to figure out the final queue,
152 |   though.
153 | * This has only been tested with Redis so far.
154 | * At this point, you should specify the queues that will be monitored using an
155 |   environment variable or an arg (`--queue-list`).
156 | 


--------------------------------------------------------------------------------
/celery_prometheus_exporter.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import celery
  4 | import celery.states
  5 | import celery.events
  6 | import collections
  7 | from itertools import chain
  8 | import logging
  9 | import prometheus_client
 10 | import signal
 11 | import sys
 12 | import threading
 13 | import time
 14 | import json
 15 | import os
 16 | from celery.utils.objects import FallbackContext
 17 | import amqp.exceptions
 18 | 
 19 | __VERSION__ = (1, 2, 0, 'final', 0)
 20 | 
 21 | 
 22 | def decode_buckets(buckets_list):
 23 |     return [float(x) for x in buckets_list.split(',')]
 24 | 
 25 | 
 26 | def get_histogram_buckets_from_evn(env_name):
 27 |     if env_name in os.environ:
 28 |         buckets = decode_buckets(os.environ.get(env_name))
 29 |     else:
 30 |         if hasattr(prometheus_client.Histogram, 'DEFAULT_BUCKETS'): # pragma: no cover
 31 |             buckets = prometheus_client.Histogram.DEFAULT_BUCKETS
 32 |         else: # pragma: no cover
 33 |             # For prometheus-client < 0.3.0 we cannot easily access
 34 |             # the default buckets:
 35 |             buckets = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, float('inf'))
 36 |     return buckets
 37 | 
 38 | 
 39 | DEFAULT_BROKER = os.environ.get('BROKER_URL', 'redis://redis:6379/0')
 40 | DEFAULT_ADDR = os.environ.get('DEFAULT_ADDR', '0.0.0.0:8888')
 41 | DEFAULT_MAX_TASKS_IN_MEMORY = int(os.environ.get('DEFAULT_MAX_TASKS_IN_MEMORY',
 42 |                                                  '10000'))
 43 | RUNTIME_HISTOGRAM_BUCKETS = get_histogram_buckets_from_evn('RUNTIME_HISTOGRAM_BUCKETS')
 44 | LATENCY_HISTOGRAM_BUCKETS = get_histogram_buckets_from_evn('LATENCY_HISTOGRAM_BUCKETS')
 45 | DEFAULT_QUEUE_LIST = os.environ.get('QUEUE_LIST', [])
 46 | 
 47 | LOG_FORMAT = '[%(asctime)s] %(name)s:%(levelname)s: %(message)s'
 48 | 
 49 | TASKS = prometheus_client.Gauge(
 50 |     'celery_tasks', 'Number of tasks per state', ['state'])
 51 | TASKS_NAME = prometheus_client.Gauge(
 52 |     'celery_tasks_by_name', 'Number of tasks per state and name',
 53 |     ['state', 'name'])
 54 | TASKS_RUNTIME = prometheus_client.Histogram(
 55 |     'celery_tasks_runtime_seconds', 'Task runtime (seconds)', ['name'], buckets=RUNTIME_HISTOGRAM_BUCKETS)
 56 | WORKERS = prometheus_client.Gauge(
 57 |     'celery_workers', 'Number of alive workers')
 58 | LATENCY = prometheus_client.Histogram(
 59 |     'celery_task_latency', 'Seconds between a task is received and started.', buckets=LATENCY_HISTOGRAM_BUCKETS)
 60 | 
 61 | QUEUE_LENGTH = prometheus_client.Gauge(
 62 |     'celery_queue_length', 'Number of tasks in the queue.',
 63 |     ['queue_name']
 64 | )
 65 | 
 66 | 
 67 | class MonitorThread(threading.Thread):
 68 |     """
 69 |     MonitorThread is the thread that will collect the data that is later
 70 |     exposed from Celery using its eventing system.
 71 |     """
 72 | 
 73 |     def __init__(self, app=None, *args, **kwargs):
 74 |         self._app = app
 75 |         self.log = logging.getLogger('monitor')
 76 |         self.log.info('Setting up monitor...')
 77 |         max_tasks_in_memory = kwargs.pop('max_tasks_in_memory',
 78 |                                          DEFAULT_MAX_TASKS_IN_MEMORY)
 79 |         self._state = self._app.events.State(
 80 |             max_tasks_in_memory=max_tasks_in_memory)
 81 |         self._known_states = set()
 82 |         self._known_states_names = set()
 83 |         self._tasks_started = dict()
 84 |         super(MonitorThread, self).__init__(*args, **kwargs)
 85 | 
 86 |     def run(self):  # pragma: no cover
 87 |         self._monitor()
 88 | 
 89 |     def _process_event(self, evt):
 90 |         # Events might come in in parallel. Celery already has a lock
 91 |         # that deals with this exact situation so we'll use that for now.
 92 |         with self._state._mutex:
 93 |             if celery.events.group_from(evt['type']) == 'task':
 94 |                 evt_state = evt['type'][5:]
 95 |                 try:
 96 |                     # Celery 4
 97 |                     state = celery.events.state.TASK_EVENT_TO_STATE[evt_state]
 98 |                 except AttributeError:  # pragma: no cover
 99 |                     # Celery 3
100 |                     task = celery.events.state.Task()
101 |                     task.event(evt_state)
102 |                     state = task.state
103 |                 if state == celery.states.STARTED:
104 |                     self._observe_latency(evt)
105 |                 self._collect_tasks(evt, state)
106 | 
107 |     def _observe_latency(self, evt):
108 |         try:
109 |             prev_evt = self._state.tasks[evt['uuid']]
110 |         except KeyError:  # pragma: no cover
111 |             pass
112 |         else:
113 |             # ignore latency if it is a retry
114 |             if prev_evt.state == celery.states.RECEIVED:
115 |                 LATENCY.observe(
116 |                     evt['local_received'] - prev_evt.local_received)
117 | 
118 |     def _collect_tasks(self, evt, state):
119 |         if state in celery.states.READY_STATES:
120 |             self._incr_ready_task(evt, state)
121 |         else:
122 |             # add event to list of in-progress tasks
123 |             self._state._event(evt)
124 |         self._collect_unready_tasks()
125 | 
126 |     def _incr_ready_task(self, evt, state):
127 |         TASKS.labels(state=state).inc()
128 |         try:
129 |             # remove event from list of in-progress tasks
130 |             event = self._state.tasks.pop(evt['uuid'])
131 |             TASKS_NAME.labels(state=state, name=event.name).inc()
132 |             if 'runtime' in evt:
133 |                 TASKS_RUNTIME.labels(name=event.name) \
134 |                              .observe(evt['runtime'])
135 |         except (KeyError, AttributeError):  # pragma: no cover
136 |             pass
137 | 
138 |     def _collect_unready_tasks(self):
139 |         # count unready tasks by state
140 |         cnt = collections.Counter(t.state for t in self._state.tasks.values())
141 |         self._known_states.update(cnt.elements())
142 |         for task_state in self._known_states:
143 |             TASKS.labels(state=task_state).set(cnt[task_state])
144 | 
145 |         # count unready tasks by state and name
146 |         cnt = collections.Counter(
147 |             (t.state, t.name) for t in self._state.tasks.values() if t.name)
148 |         self._known_states_names.update(cnt.elements())
149 |         for task_state in self._known_states_names:
150 |             TASKS_NAME.labels(
151 |                 state=task_state[0],
152 |                 name=task_state[1],
153 |             ).set(cnt[task_state])
154 | 
155 |     def _monitor(self):  # pragma: no cover
156 |         while True:
157 |             try:
158 |                 self.log.info('Connecting to broker...')
159 |                 with self._app.connection() as conn:
160 |                     recv = self._app.events.Receiver(conn, handlers={
161 |                         '*': self._process_event,
162 |                     })
163 |                     setup_metrics(self._app)
164 |                     recv.capture(limit=None, timeout=None, wakeup=True)
165 |                     self.log.info("Connected to broker")
166 |             except Exception:
167 |                 self.log.exception("Queue connection failed")
168 |                 setup_metrics(self._app)
169 |                 time.sleep(5)
170 | 
171 | 
172 | class WorkerMonitoringThread(threading.Thread):
173 |     celery_ping_timeout_seconds = 5
174 |     periodicity_seconds = 5
175 | 
176 |     def __init__(self, app=None, *args, **kwargs):
177 |         self._app = app
178 |         self.log = logging.getLogger('workers-monitor')
179 |         super(WorkerMonitoringThread, self).__init__(*args, **kwargs)
180 | 
181 |     def run(self):  # pragma: no cover
182 |         while True:
183 |             self.update_workers_count()
184 |             time.sleep(self.periodicity_seconds)
185 | 
186 |     def update_workers_count(self):
187 |         try:
188 |             WORKERS.set(len(self._app.control.ping(
189 |                 timeout=self.celery_ping_timeout_seconds)))
190 |         except Exception:  # pragma: no cover
191 |             self.log.exception("Error while pinging workers")
192 | 
193 | 
194 | class EnableEventsThread(threading.Thread):
195 |     periodicity_seconds = 5
196 | 
197 |     def __init__(self, app=None, *args, **kwargs):  # pragma: no cover
198 |         self._app = app
199 |         self.log = logging.getLogger('enable-events')
200 |         super(EnableEventsThread, self).__init__(*args, **kwargs)
201 | 
202 |     def run(self):  # pragma: no cover
203 |         while True:
204 |             try:
205 |                 self.enable_events()
206 |             except Exception:
207 |                 self.log.exception("Error while trying to enable events")
208 |             time.sleep(self.periodicity_seconds)
209 | 
210 |     def enable_events(self):
211 |         self._app.control.enable_events()
212 | 
213 | 
214 | class QueueLengthMonitoringThread(threading.Thread):
215 |     periodicity_seconds = 30
216 | 
217 |     def __init__(self, app, queue_list):
218 |         # type: (celery.Celery, [str]) -> None
219 |         self.celery_app = app
220 |         self.queue_list = queue_list
221 |         self.connection = self.celery_app.connection_or_acquire()
222 | 
223 |         if isinstance(self.connection, FallbackContext):
224 |             self.connection = self.connection.fallback()
225 | 
226 |         super(QueueLengthMonitoringThread, self).__init__()
227 | 
228 |     def measure_queues_length(self):
229 |         for queue in self.queue_list:
230 |             try:
231 |                 length = self.connection.default_channel.queue_declare(queue=queue, passive=True).message_count
232 |             except (amqp.exceptions.ChannelError,) as e:
233 |                 logging.warning("Queue Not Found: {}. Setting its value to zero. Error: {}".format(queue, str(e)))
234 |                 length = 0
235 | 
236 |             self.set_queue_length(queue, length)
237 | 
238 |     def set_queue_length(self, queue, length):
239 |         QUEUE_LENGTH.labels(queue).set(length)
240 | 
241 |     def run(self):  # pragma: no cover
242 |         while True:
243 |             self.measure_queues_length()
244 |             time.sleep(self.periodicity_seconds)
245 | 
246 | def setup_metrics(app):
247 |     """
248 |     This initializes the available metrics with default values so that
249 |     even before the first event is received, data can be exposed.
250 |     """
251 |     WORKERS.set(0)
252 |     logging.info('Setting up metrics, trying to connect to broker...')
253 |     try:
254 |         registered_tasks = app.control.inspect().registered_tasks().values()
255 |     except Exception:  # pragma: no cover
256 |         for metric in TASKS.collect():
257 |             for sample in metric.samples:
258 |                 TASKS.labels(**sample[1]).set(0)
259 |         for metric in TASKS_NAME.collect():
260 |             for sample in metric.samples:
261 |                 TASKS_NAME.labels(**sample[1]).set(0)
262 | 
263 |     else:
264 |         for state in celery.states.ALL_STATES:
265 |             TASKS.labels(state=state).set(0)
266 |             for task_name in set(chain.from_iterable(registered_tasks)):
267 |                 TASKS_NAME.labels(state=state, name=task_name).set(0)
268 | 
269 | 
270 | def start_httpd(addr):  # pragma: no cover
271 |     """
272 |     Starts the exposing HTTPD using the addr provided in a separate
273 |     thread.
274 |     """
275 |     host, port = addr.split(':')
276 |     logging.info('Starting HTTPD on {}:{}'.format(host, port))
277 |     prometheus_client.start_http_server(int(port), host)
278 | 
279 | 
280 | def shutdown(signum, frame):  # pragma: no cover
281 |     """
282 |     Shutdown is called if the process receives a TERM signal. This way
283 |     we try to prevent an ugly stacktrace being rendered to the user on
284 |     a normal shutdown.
285 |     """
286 |     logging.info("Shutting down")
287 |     sys.exit(0)
288 | 
289 | 
290 | def main():  # pragma: no cover
291 |     parser = argparse.ArgumentParser()
292 |     parser.add_argument(
293 |         '--broker', dest='broker', default=DEFAULT_BROKER,
294 |         help="URL to the Celery broker. Defaults to {}".format(DEFAULT_BROKER))
295 |     parser.add_argument(
296 |         '--transport-options', dest='transport_options',
297 |         help=("JSON object with additional options passed to the underlying "
298 |               "transport."))
299 |     parser.add_argument(
300 |         '--addr', dest='addr', default=DEFAULT_ADDR,
301 |         help="Address the HTTPD should listen on. Defaults to {}".format(
302 |             DEFAULT_ADDR))
303 |     parser.add_argument(
304 |         '--enable-events', action='store_true',
305 |         help="Periodically enable Celery events")
306 |     parser.add_argument(
307 |         '--tz', dest='tz',
308 |         help="Timezone used by the celery app.")
309 |     parser.add_argument(
310 |         '--verbose', action='store_true', default=False,
311 |         help="Enable verbose logging")
312 |     parser.add_argument(
313 |         '--max_tasks_in_memory', dest='max_tasks_in_memory',
314 |         default=DEFAULT_MAX_TASKS_IN_MEMORY, type=int,
315 |         help="Tasks cache size. Defaults to {}".format(
316 |             DEFAULT_MAX_TASKS_IN_MEMORY))
317 |     parser.add_argument(
318 |         '--queue-list', dest='queue_list',
319 |         default=DEFAULT_QUEUE_LIST, nargs='+',
320 |         help="Queue List. Will be checked for its length."
321 |     )
322 |     parser.add_argument(
323 |         '--version', action='version',
324 |         version='.'.join([str(x) for x in __VERSION__]))
325 |     opts = parser.parse_args()
326 | 
327 |     if opts.verbose:
328 |         logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT)
329 |     else:
330 |         logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
331 | 
332 |     signal.signal(signal.SIGINT, shutdown)
333 |     signal.signal(signal.SIGTERM, shutdown)
334 | 
335 |     if opts.tz:
336 |         os.environ['TZ'] = opts.tz
337 |         time.tzset()
338 | 
339 |     logging.info('Setting up celery for {}'.format(opts.broker))
340 |     app = celery.Celery(broker=opts.broker)
341 | 
342 |     if opts.transport_options:
343 |         try:
344 |             transport_options = json.loads(opts.transport_options)
345 |         except ValueError:
346 |             print("Error parsing broker transport options from JSON '{}'"
347 |                   .format(opts.transport_options), file=sys.stderr)
348 |             sys.exit(1)
349 |         else:
350 |             app.conf.broker_transport_options = transport_options
351 | 
352 |     setup_metrics(app)
353 | 
354 |     t = MonitorThread(app=app, max_tasks_in_memory=opts.max_tasks_in_memory)
355 |     t.daemon = True
356 |     t.start()
357 | 
358 |     w = WorkerMonitoringThread(app=app)
359 |     w.daemon = True
360 |     w.start()
361 | 
362 |     if opts.queue_list:
363 |         if type(opts.queue_list) == str:
364 |             queue_list = opts.queue_list.split(',')
365 |         else:
366 |             queue_list = opts.queue_list
367 | 
368 |         q = QueueLengthMonitoringThread(app=app, queue_list=queue_list)
369 | 
370 |         q.daemon = True
371 |         q.start()
372 | 
373 |     e = None
374 |     if opts.enable_events:
375 |         e = EnableEventsThread(app=app)
376 |         e.daemon = True
377 |         e.start()
378 |     start_httpd(opts.addr)
379 |     t.join()
380 |     w.join()
381 |     if e is not None:
382 |         e.join()
383 | 
384 | 
385 | if __name__ == '__main__':  # pragma: no cover
386 |     main()
387 | 


--------------------------------------------------------------------------------
/celeryapp.py:
--------------------------------------------------------------------------------
 1 | from celery import Celery
 2 | from kombu import Queue, Exchange
 3 | 
 4 | import os
 5 | import time
 6 | 
 7 | BROKER_URL = os.getenv("BROKER_URL")
 8 | RESULT_BACKEND_URL = os.getenv("RESULT_BACKEND_URL", None)
 9 | 
10 | celery_app = Celery(
11 |     broker=BROKER_URL,
12 | )
13 | 
14 | if RESULT_BACKEND_URL:
15 |     celery_app.conf.update(backend=RESULT_BACKEND_URL)
16 | 
17 | celery_app.conf.update(
18 |     CELERY_DEFAULT_QUEUE="queue1",
19 |     CELERY_QUEUES=(
20 |         Queue('queue1', exchange=Exchange('queue1', type='direct'), routing_key='queue1'),
21 |         Queue('queue2', exchange=Exchange('queue2', type='direct'), routing_key='queue2'),
22 |         Queue('queue3', exchange=Exchange('queue3', type='direct'), routing_key='queue3'),
23 |     ),
24 |     CELERY_ROUTES={
25 |         'task1': {'queue': 'queue1', 'routing_key': 'queue1'},
26 |         'task2': {'queue': 'queue2', 'routing_key': 'queue2'},
27 |         'task3': {'queue': 'queue3', 'routing_key': 'queue3'},
28 |     }
29 | )
30 | 
31 | @celery_app.task
32 | def task1():
33 |     time.sleep(20)
34 | 
35 | @celery_app.task
36 | def task2():
37 |     time.sleep(20)
38 | 
39 | @celery_app.task
40 | def task3():
41 |     time.sleep(20)
42 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '2'
 2 | 
 3 | services:
 4 |   app:
 5 |     image: celery-exporter:3
 6 |     build:
 7 |       context: .
 8 |       dockerfile: Dockerfile-celery3
 9 |     user: "65534"
10 |     volumes:
11 |       - ./:/app
12 |     environment:
13 |     - BROKER_URL=amqp://rabbit
14 |     entrypoint: celery -A celeryapp worker
15 | 
16 |   exporter:
17 |     image: celery-exporter:3
18 |     build:
19 |       context: .
20 |       dockerfile: Dockerfile-celery3
21 |     volumes:
22 |       - ./:/app
23 |     environment:
24 |     - BROKER_URL=amqp://rabbit
25 |     - QUEUE_LIST=queue1,queue2,queue3
26 |     ports:
27 |       - 8888:8888
28 | 
29 |   cache:
30 |     image: redis:alpine
31 | 
32 |   rabbit:
33 |     image: rabbitmq:alpine
34 | 


--------------------------------------------------------------------------------
/docker-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | exec python /app/celery_prometheus_exporter.py $@
3 | 


--------------------------------------------------------------------------------
/requirements/base.txt:
--------------------------------------------------------------------------------
1 | redis==2.10.6
2 | 


--------------------------------------------------------------------------------
/requirements/celery3.txt:
--------------------------------------------------------------------------------
1 | -r base.txt
2 | celery==3.1.25
3 | 


--------------------------------------------------------------------------------
/requirements/celery4.txt:
--------------------------------------------------------------------------------
1 | -r base.txt
2 | celery==4.2.0
3 | kombu==4.3.0
4 | 


--------------------------------------------------------------------------------
/requirements/promclient030.txt:
--------------------------------------------------------------------------------
1 | prometheus_client==0.3.0
2 | 


--------------------------------------------------------------------------------
/requirements/promclient050.txt:
--------------------------------------------------------------------------------
1 | prometheus_client==0.5.0
2 | 


--------------------------------------------------------------------------------
/requirements/test.txt:
--------------------------------------------------------------------------------
1 | -r base.txt
2 | pytest
3 | coverage


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | 
 3 | from setuptools import setup
 4 | 
 5 | 
 6 | long_description = "See https://github.com/zerok/celery-prometheus-exporter"
 7 | with io.open('README.rst', encoding='utf-8') as fp:
 8 |     long_description = fp.read()
 9 | 
10 | setup(
11 |     name='celery-prometheus-exporter',
12 |     description="Simple Prometheus metrics exporter for Celery",
13 |     long_description=long_description,
14 |     version='1.7.0',
15 |     author='Horst Gutmann',
16 |     license='MIT',
17 |     author_email='horst@zerokspot.com',
18 |     url='https://github.com/zerok/celery-prometheus-exporter',
19 |     classifiers=[
20 |         'Development Status :: 3 - Alpha',
21 |         'Environment :: Console',
22 |         'License :: OSI Approved :: MIT License',
23 |         'Programming Language :: Python :: 3.5',
24 |         'Programming Language :: Python :: 3 :: Only',
25 |     ],
26 |     py_modules=[
27 |         'celery_prometheus_exporter',
28 |     ],
29 |     install_requires=[
30 |         'celery>=3',
31 |         'prometheus_client>=0.0.20',
32 |     ],
33 |     entry_points={
34 |         'console_scripts': [
35 |             'celery-prometheus-exporter = celery_prometheus_exporter:main',
36 |         ],
37 |     }
38 | )
39 | 


--------------------------------------------------------------------------------
/test/celery_test_utils.py:
--------------------------------------------------------------------------------
 1 | import celery
 2 | import time
 3 | from kombu import Queue, Exchange
 4 | 
 5 | 
 6 | def get_celery_app(queue=None):
 7 |     app = celery.Celery(broker='memory://', backend='cache+memory://')
 8 | 
 9 |     if queue:
10 |         app.conf.update(
11 |             CELERY_DEFAULT_QUEUE=queue,
12 |             CELERY_QUEUES=(
13 |                 Queue(queue, exchange=Exchange(queue, type='direct'), routing_key=queue),
14 |             ),
15 |             CELERY_ROUTES={
16 |                 'task1': {'queue': queue, 'routing_key': queue},
17 |             }
18 |         )
19 | 
20 |     return app
21 | 
22 | 
23 | class SampleTask(celery.Task):
24 |     name = 'sample-task'
25 | 
26 |     def run(self, *args, **kwargs):
27 |         time.sleep(10)
28 | 


--------------------------------------------------------------------------------
/test/test_unit.py:
--------------------------------------------------------------------------------
  1 | from time import time
  2 | 
  3 | import os
  4 | import celery
  5 | import celery.states
  6 | import amqp.exceptions
  7 | 
  8 | from celery.events import Event
  9 | from celery.utils import uuid
 10 | from prometheus_client import REGISTRY
 11 | from unittest import TestCase
 12 | try:
 13 |     from unittest.mock import patch
 14 | except ImportError:
 15 |     from mock import patch
 16 | 
 17 | from celery_prometheus_exporter import (
 18 |     WorkerMonitoringThread, setup_metrics, MonitorThread, EnableEventsThread,
 19 |     TASKS,
 20 |     get_histogram_buckets_from_evn,
 21 |     QueueLengthMonitoringThread, QUEUE_LENGTH)
 22 | 
 23 | from celery_test_utils import get_celery_app, SampleTask
 24 | 
 25 | 
 26 | class TestBucketLoading(TestCase):
 27 |     def tearDown(self):
 28 |         if 'TEST_BUCKETS' in os.environ:
 29 |             del os.environ['TEST_BUCKETS']
 30 | 
 31 |     def test_default_buckets(self):
 32 |         self.assertIsNotNone(get_histogram_buckets_from_evn('TEST_BUCKETS'))
 33 | 
 34 |     def test_from_env(self):
 35 |         os.environ['TEST_BUCKETS'] = '1,2,3'
 36 |         self.assertEqual([1.0, 2.0, 3.0], get_histogram_buckets_from_evn('TEST_BUCKETS'))
 37 | 
 38 | class TestFallbackSetup(TestCase):
 39 |     def test_fallback(self):
 40 |         TASKS.labels(state='RUNNING').set(0)
 41 |         setup_metrics(None)
 42 | 
 43 | 
 44 | class TestMockedCelery(TestCase):
 45 |     task = 'my_task'
 46 | 
 47 |     def setUp(self):
 48 |         self.app = get_celery_app()
 49 |         with patch('celery.task.control.inspect.registered_tasks') as tasks:
 50 |             tasks.return_value = {'worker1': [self.task]}
 51 |             setup_metrics(self.app)  # reset metrics
 52 | 
 53 |     def test_initial_metric_values(self):
 54 |         self._assert_task_states(celery.states.ALL_STATES, 0)
 55 |         assert REGISTRY.get_sample_value('celery_workers') == 0
 56 |         assert REGISTRY.get_sample_value('celery_task_latency_count') == 0
 57 |         assert REGISTRY.get_sample_value('celery_task_latency_sum') == 0
 58 | 
 59 |     def test_workers_count(self):
 60 |         assert REGISTRY.get_sample_value('celery_workers') == 0
 61 | 
 62 |         with patch.object(self.app.control, 'ping') as mock_ping:
 63 |             w = WorkerMonitoringThread(app=self.app)
 64 | 
 65 |             mock_ping.return_value = []
 66 |             w.update_workers_count()
 67 |             assert REGISTRY.get_sample_value('celery_workers') == 0
 68 | 
 69 |             mock_ping.return_value = [0]  # 1 worker
 70 |             w.update_workers_count()
 71 |             assert REGISTRY.get_sample_value('celery_workers') == 1
 72 | 
 73 |             mock_ping.return_value = [0, 0]  # 2 workers
 74 |             w.update_workers_count()
 75 |             assert REGISTRY.get_sample_value('celery_workers') == 2
 76 | 
 77 |             mock_ping.return_value = []
 78 |             w.update_workers_count()
 79 |             assert REGISTRY.get_sample_value('celery_workers') == 0
 80 | 
 81 |     def test_tasks_events(self):
 82 |         task_uuid = uuid()
 83 |         hostname = 'myhost'
 84 |         local_received = time()
 85 |         latency_before_started = 123.45
 86 |         runtime = 234.5
 87 | 
 88 |         m = MonitorThread(app=self.app)
 89 | 
 90 |         self._assert_task_states(celery.states.ALL_STATES, 0)
 91 |         assert REGISTRY.get_sample_value('celery_task_latency_count') == 0
 92 |         assert REGISTRY.get_sample_value('celery_task_latency_sum') == 0
 93 | 
 94 |         m._process_event(Event(
 95 |             'task-received', uuid=task_uuid,  name=self.task,
 96 |             args='()', kwargs='{}', retries=0, eta=None, hostname=hostname,
 97 |             clock=0,
 98 |             local_received=local_received))
 99 |         self._assert_all_states({celery.states.RECEIVED})
100 | 
101 |         m._process_event(Event(
102 |             'task-started', uuid=task_uuid, hostname=hostname,
103 |             clock=1, name=self.task,
104 |             local_received=local_received + latency_before_started))
105 |         self._assert_all_states({celery.states.STARTED})
106 | 
107 |         m._process_event(Event(
108 |             'task-succeeded', uuid=task_uuid, result='42',
109 |             runtime=runtime, hostname=hostname, clock=2,
110 |             local_received=local_received + latency_before_started + runtime))
111 |         self._assert_all_states({celery.states.SUCCESS})
112 | 
113 |         assert REGISTRY.get_sample_value('celery_task_latency_count') == 1
114 |         self.assertAlmostEqual(REGISTRY.get_sample_value(
115 |             'celery_task_latency_sum'), latency_before_started)
116 |         assert REGISTRY.get_sample_value(
117 |             'celery_tasks_runtime_seconds_count',
118 |             labels=dict(name=self.task)) == 1
119 |         assert REGISTRY.get_sample_value(
120 |             'celery_tasks_runtime_seconds_sum',
121 |             labels=dict(name=self.task)) == 234.5
122 | 
123 |     def test_enable_events(self):
124 |         with patch.object(
125 |                 self.app.control, 'enable_events') as mock_enable_events:
126 |             e = EnableEventsThread(app=self.app)
127 |             e.enable_events()
128 |             mock_enable_events.assert_called_once_with()
129 | 
130 |     def test_can_measure_queue_length(self):
131 |         celery_app = get_celery_app(queue='realqueue')
132 |         sample_task = SampleTask()
133 |         sample_task.app = celery_app
134 |         monitoring_thread_instance = QueueLengthMonitoringThread(celery_app, queue_list=['realqueue'])
135 | 
136 |         sample_task.delay()
137 |         monitoring_thread_instance.measure_queues_length()
138 |         sample = REGISTRY.get_sample_value('celery_queue_length', {'queue_name':'realqueue'})
139 | 
140 |         self.assertEqual(1.0, sample)
141 | 
142 |     def test_set_zero_on_queue_length_when_an_channel_layer_error_occurs_during_queue_read(self):
143 |         instance = QueueLengthMonitoringThread(app=self.app, queue_list=['noqueue'])
144 | 
145 |         instance.measure_queues_length()
146 |         sample = REGISTRY.get_sample_value('celery_queue_length', {'queue_name':'noqueue'})
147 | 
148 |         self.assertEqual(0.0, sample)
149 | 
150 |     def _assert_task_states(self, states, cnt):
151 |         for state in states:
152 |             assert REGISTRY.get_sample_value(
153 |                 'celery_tasks', labels=dict(state=state)) == cnt
154 |             task_by_name_label = dict(state=state, name=self.task)
155 |             assert REGISTRY.get_sample_value(
156 |                 'celery_tasks_by_name', labels=task_by_name_label) == cnt
157 | 
158 |     def _assert_all_states(self, exclude):
159 |         self._assert_task_states(celery.states.ALL_STATES - exclude, 0)
160 |         self._assert_task_states(exclude, 1)
161 | 
162 |     def _setup_task_with_celery_and_queue_support(self, queue_name, task, celery_app):
163 |         task.app = celery_app
164 | 
165 |         return task
166 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py{27,34,35,36}-celery{3,4}-promclient{030,050}, lint
 3 | 
 4 | [testenv]
 5 | deps =
 6 |     -rrequirements/test.txt
 7 |     py27: mock
 8 |     promclient030: -rrequirements/promclient030.txt
 9 |     promclient050: -rrequirements/promclient050.txt
10 |     celery3: -rrequirements/celery3.txt
11 |     celery4: -rrequirements/celery4.txt
12 | commands =
13 |     coverage run -m py.test -s -v {toxinidir}/test/
14 |     coverage report
15 | 
16 | [testenv:lint]
17 | basepython = python3
18 | deps = flake8>=3.3.0,<4
19 | commands = flake8 --max-complexity 15 celery_prometheus_exporter.py test
20 | 


--------------------------------------------------------------------------------