├── .coveragerc ├── .dockerignore ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── Dockerfile-celery3 ├── Dockerfile-celery4 ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.rst ├── celery_prometheus_exporter.py ├── celeryapp.py ├── docker-compose.yml ├── docker-entrypoint.sh ├── requirements ├── base.txt ├── celery3.txt ├── celery4.txt ├── promclient030.txt ├── promclient050.txt └── test.txt ├── setup.py ├── test ├── celery_test_utils.py └── test_unit.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = celery_prometheus_exporter 3 | 4 | [report] 5 | fail_under = 100 6 | show_missing = True 7 | 8 | [paths] 9 | source = celery_prometheus_exporter 10 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | *.img -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.img 2 | /dist 3 | /build 4 | /*.egg-info 5 | 6 | *.pyc 7 | __pycache__ 8 | .coverage 9 | .tox/ 10 | .cache/ 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | 4 | python: 5 | - "2.7" 6 | - "3.4" 7 | - "3.5" 8 | - "3.6" 9 | 10 | install: pip install tox-travis tox 11 | script: tox 12 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | The initial release of celery-prometheus-exporter was intended as a minimal 2 | solution that would cover what I personally needed at my own projects. That 3 | being said, you might need completely different kinds of metrics being 4 | exposed. If you do, please feel free to create tickets and pull requests 🙂 As 5 | such, the more details you can provide in your tickets the better. 6 | 7 | I will try to look into each issue but please note that I might not be available 8 | all the time and that timezones exist. Please be patient 😊 9 | -------------------------------------------------------------------------------- /Dockerfile-celery3: -------------------------------------------------------------------------------- 1 | FROM python:3.6-alpine 2 | MAINTAINER Horst Gutmann 3 | 4 | RUN mkdir -p /app/requirements 5 | ADD requirements/* /app/requirements/ 6 | WORKDIR /app 7 | 8 | ENV PYTHONUNBUFFERED 1 9 | RUN pip install -r requirements/promclient050.txt -r requirements/celery3.txt 10 | ADD celery_prometheus_exporter.py docker-entrypoint.sh /app/ 11 | ENTRYPOINT ["/bin/sh", "/app/docker-entrypoint.sh"] 12 | CMD [] 13 | 14 | EXPOSE 8888 15 | -------------------------------------------------------------------------------- /Dockerfile-celery4: -------------------------------------------------------------------------------- 1 | FROM python:3.6-alpine 2 | MAINTAINER Horst Gutmann 3 | 4 | RUN mkdir -p /app/requirements 5 | ADD requirements/* /app/requirements/ 6 | WORKDIR /app 7 | 8 | ENV PYTHONUNBUFFERED 1 9 | RUN pip install -r requirements/promclient050.txt -r requirements/celery4.txt 10 | ADD celery_prometheus_exporter.py docker-entrypoint.sh /app/ 11 | ENTRYPOINT ["/bin/sh", "/app/docker-entrypoint.sh"] 12 | CMD [] 13 | 14 | EXPOSE 8888 15 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016, Horst Gutmann 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst celery_prometheus_exporter.py -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: celery_exporter-celery3.img celery_exporter-celery4.img 2 | 3 | celery_exporter-celery3.img: celery_prometheus_exporter.py Dockerfile-celery3 requirements/* 4 | docker build -f Dockerfile-celery3 -t celery_exporter:1-celery3 . 5 | docker save -o $@ celery_exporter:1-celery3 6 | 7 | celery_exporter-celery4.img: celery_prometheus_exporter.py Dockerfile-celery4 requirements/* 8 | docker build -f Dockerfile-celery4 -t celery_exporter:1-celery4 . 9 | docker save -o $@ celery_exporter:1-celery4 10 | 11 | .PHONY: clean all 12 | clean: 13 | rm -rf celery_exporter.img *.egg-info build dist 14 | 15 | publish: all 16 | docker tag celery_exporter:1-celery3 zerok/celery_exporter:1-celery3 17 | docker tag celery_exporter:1-celery3 zerok/celery_exporter:1.3.0-celery3 18 | docker tag celery_exporter:1-celery4 zerok/celery_exporter:1-celery4 19 | docker tag celery_exporter:1-celery4 zerok/celery_exporter:1.3.0-celery4 20 | docker push zerok/celery_exporter:1-celery4 21 | docker push zerok/celery_exporter:1.3.0-celery4 22 | docker push zerok/celery_exporter:1-celery3 23 | docker push zerok/celery_exporter:1.3.0-celery3 24 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ========================== 2 | celery-prometheus-exporter 3 | ========================== 4 | 5 | .. admonition:: info 6 | 7 | Sadly, for the last couple of months at the time of writing this 8 | (Sept 2019) I couldn't find the time to maintain this package 9 | anymore. I therefore decided to archive it. If you find this code 10 | useful, please fork it! 11 | 12 | A big "THANK YOU" goes to everyone who contributed to this project 13 | over the years! 14 | 15 | .. image:: https://img.shields.io/docker/automated/zerok/celery-prometheus-exporter.svg?maxAge=2592000 16 | :target: https://hub.docker.com/r/zerok/celery-prometheus-exporter/ 17 | 18 | celery-prometheus-exporter is a little exporter for Celery related metrics in 19 | order to get picked up by Prometheus. As with other exporters like 20 | mongodb\_exporter or node\_exporter this has been implemented as a 21 | standalone-service to make reuse easier across different frameworks. 22 | 23 | So far it provides access to the following metrics: 24 | 25 | * ``celery_tasks`` exposes the number of tasks currently known to the queue 26 | grouped by ``state`` (RECEIVED, STARTED, ...). 27 | * ``celery_tasks_by_name`` exposes the number of tasks currently known to the queue 28 | grouped by ``name`` and ``state``. 29 | * ``celery_workers`` exposes the number of currently probably alive workers 30 | * ``celery_task_latency`` exposes a histogram of task latency, i.e. the time until 31 | tasks are picked up by a worker 32 | * ``celery_tasks_runtime_seconds`` tracks the number of seconds tasks take 33 | until completed as histogram 34 | 35 | 36 | How to use 37 | ========== 38 | 39 | There are multiple ways to install this. The obvious one is using ``pip install 40 | celery-prometheus-exporter`` and then using the ``celery-prometheus-exporter`` 41 | command:: 42 | 43 | $ celery-prometheus-exporter 44 | Starting HTTPD on 0.0.0.0:8888 45 | 46 | This package only depends on Celery directly, so you will have to install 47 | whatever other dependencies you will need for it to speak with your broker 🙂 48 | 49 | Celery workers have to be configured to send task-related events: 50 | http://docs.celeryproject.org/en/latest/userguide/configuration.html#worker-send-task-events. 51 | 52 | Running ``celery-prometheus-exporter`` with the ``--enable-events`` argument 53 | will periodically enable events on the workers. This is useful because it 54 | allows running celery workers with events disabled, until 55 | ``celery-prometheus-exporter`` is deployed, at which time events get enabled 56 | on the workers. 57 | 58 | Alternatively, you can use the bundle Makefile and Dockerfile to generate a 59 | Docker image. 60 | 61 | By default, the HTTPD will listen at ``0.0.0.0:8888``. If you want the HTTPD 62 | to listen to another port, use the ``--addr`` option or the environment variable 63 | ``DEFAULT_ADDR``. 64 | 65 | By default, this will expect the broker to be available through 66 | ``redis://redis:6379/0``, although you can change via environment variable 67 | ``BROKER_URL``. If you're using AMQP or something else other than 68 | Redis, take a look at the Celery documentation and install the additioinal 69 | requirements 😊 Also use the ``--broker`` option to specify a different broker 70 | URL. 71 | 72 | If you need to pass additional options to your broker's transport use the 73 | ``--transport-options`` option. It tries to read a dict from a JSON object. 74 | E.g. to set your master name when using Redis Sentinel for broker discovery: 75 | ``--transport-options '{"master_name": "mymaster"}'`` 76 | 77 | Use ``--tz`` to specify the timezone the Celery app is using. Otherwise the 78 | systems local time will be used. 79 | 80 | By default, buckets for histograms are the same as default ones in the prometheus client: 81 | https://github.com/prometheus/client_python#histogram. 82 | It means they are intended to cover typical web/rpc requests from milliseconds to seconds, 83 | so you may want to customize them. 84 | It can be done via environment variable ``RUNTIME_HISTOGRAM_BUCKETS`` for tasks runtime and 85 | via environment variable ``LATENCY_HISTOGRAM_BUCKETS`` for tasks latency. 86 | Buckets should be passed as a list of float values separated by a comma. 87 | E.g. ``".005, .05, 0.1, 1.0, 2.5"``. 88 | 89 | Use ``--queue-list`` to specify the list of queues that will have its length 90 | monitored (Automatic Discovery of queues isn't supported right now, see limitations/ 91 | caveats. You can use the `QUEUE_LIST` environment variable as well. 92 | 93 | If you then look at the exposed metrics, you should see something like this:: 94 | 95 | $ http get http://localhost:8888/metrics | grep celery_ 96 | # HELP celery_workers Number of alive workers 97 | # TYPE celery_workers gauge 98 | celery_workers 1.0 99 | # HELP celery_tasks Number of tasks per state 100 | # TYPE celery_tasks gauge 101 | celery_tasks{state="RECEIVED"} 3.0 102 | celery_tasks{state="PENDING"} 0.0 103 | celery_tasks{state="STARTED"} 1.0 104 | celery_tasks{state="RETRY"} 2.0 105 | celery_tasks{state="FAILURE"} 1.0 106 | celery_tasks{state="REVOKED"} 0.0 107 | celery_tasks{state="SUCCESS"} 8.0 108 | # HELP celery_tasks_by_name Number of tasks per state 109 | # TYPE celery_tasks_by_name gauge 110 | celery_tasks_by_name{name="my_app.tasks.calculate_something",state="RECEIVED"} 0.0 111 | celery_tasks_by_name{name="my_app.tasks.calculate_something",state="PENDING"} 0.0 112 | celery_tasks_by_name{name="my_app.tasks.calculate_something",state="STARTED"} 0.0 113 | celery_tasks_by_name{name="my_app.tasks.calculate_something",state="RETRY"} 0.0 114 | celery_tasks_by_name{name="my_app.tasks.calculate_something",state="FAILURE"} 0.0 115 | celery_tasks_by_name{name="my_app.tasks.calculate_something",state="REVOKED"} 0.0 116 | celery_tasks_by_name{name="my_app.tasks.calculate_something",state="SUCCESS"} 1.0 117 | celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="RECEIVED"} 3.0 118 | celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="PENDING"} 0.0 119 | celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="STARTED"} 1.0 120 | celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="RETRY"} 2.0 121 | celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="FAILURE"} 1.0 122 | celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="REVOKED"} 0.0 123 | celery_tasks_by_name{name="my_app.tasks.fetch_some_data",state="SUCCESS"} 7.0 124 | # HELP celery_task_latency Seconds between a task is received and started. 125 | # TYPE celery_task_latency histogram 126 | celery_task_latency_bucket{le="0.005"} 2.0 127 | celery_task_latency_bucket{le="0.01"} 3.0 128 | celery_task_latency_bucket{le="0.025"} 4.0 129 | celery_task_latency_bucket{le="0.05"} 4.0 130 | celery_task_latency_bucket{le="0.075"} 5.0 131 | celery_task_latency_bucket{le="0.1"} 5.0 132 | celery_task_latency_bucket{le="0.25"} 5.0 133 | celery_task_latency_bucket{le="0.5"} 5.0 134 | celery_task_latency_bucket{le="0.75"} 5.0 135 | celery_task_latency_bucket{le="1.0"} 5.0 136 | celery_task_latency_bucket{le="2.5"} 8.0 137 | celery_task_latency_bucket{le="5.0"} 11.0 138 | celery_task_latency_bucket{le="7.5"} 11.0 139 | celery_task_latency_bucket{le="10.0"} 11.0 140 | celery_task_latency_bucket{le="+Inf"} 11.0 141 | celery_task_latency_count 11.0 142 | celery_task_latency_sum 16.478713035583496 143 | celery_queue_length{queue_name="queue1"} 35.0 144 | celery_queue_length{queue_name="queue2"} 0.0 145 | 146 | Limitations 147 | =========== 148 | 149 | * Among tons of other features celery-prometheus-exporter doesn't support stats 150 | for multiple queues. As far as I can tell, only the routing key is exposed 151 | through the events API which might be enough to figure out the final queue, 152 | though. 153 | * This has only been tested with Redis so far. 154 | * At this point, you should specify the queues that will be monitored using an 155 | environment variable or an arg (`--queue-list`). 156 | -------------------------------------------------------------------------------- /celery_prometheus_exporter.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import celery 4 | import celery.states 5 | import celery.events 6 | import collections 7 | from itertools import chain 8 | import logging 9 | import prometheus_client 10 | import signal 11 | import sys 12 | import threading 13 | import time 14 | import json 15 | import os 16 | from celery.utils.objects import FallbackContext 17 | import amqp.exceptions 18 | 19 | __VERSION__ = (1, 2, 0, 'final', 0) 20 | 21 | 22 | def decode_buckets(buckets_list): 23 | return [float(x) for x in buckets_list.split(',')] 24 | 25 | 26 | def get_histogram_buckets_from_evn(env_name): 27 | if env_name in os.environ: 28 | buckets = decode_buckets(os.environ.get(env_name)) 29 | else: 30 | if hasattr(prometheus_client.Histogram, 'DEFAULT_BUCKETS'): # pragma: no cover 31 | buckets = prometheus_client.Histogram.DEFAULT_BUCKETS 32 | else: # pragma: no cover 33 | # For prometheus-client < 0.3.0 we cannot easily access 34 | # the default buckets: 35 | buckets = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, float('inf')) 36 | return buckets 37 | 38 | 39 | DEFAULT_BROKER = os.environ.get('BROKER_URL', 'redis://redis:6379/0') 40 | DEFAULT_ADDR = os.environ.get('DEFAULT_ADDR', '0.0.0.0:8888') 41 | DEFAULT_MAX_TASKS_IN_MEMORY = int(os.environ.get('DEFAULT_MAX_TASKS_IN_MEMORY', 42 | '10000')) 43 | RUNTIME_HISTOGRAM_BUCKETS = get_histogram_buckets_from_evn('RUNTIME_HISTOGRAM_BUCKETS') 44 | LATENCY_HISTOGRAM_BUCKETS = get_histogram_buckets_from_evn('LATENCY_HISTOGRAM_BUCKETS') 45 | DEFAULT_QUEUE_LIST = os.environ.get('QUEUE_LIST', []) 46 | 47 | LOG_FORMAT = '[%(asctime)s] %(name)s:%(levelname)s: %(message)s' 48 | 49 | TASKS = prometheus_client.Gauge( 50 | 'celery_tasks', 'Number of tasks per state', ['state']) 51 | TASKS_NAME = prometheus_client.Gauge( 52 | 'celery_tasks_by_name', 'Number of tasks per state and name', 53 | ['state', 'name']) 54 | TASKS_RUNTIME = prometheus_client.Histogram( 55 | 'celery_tasks_runtime_seconds', 'Task runtime (seconds)', ['name'], buckets=RUNTIME_HISTOGRAM_BUCKETS) 56 | WORKERS = prometheus_client.Gauge( 57 | 'celery_workers', 'Number of alive workers') 58 | LATENCY = prometheus_client.Histogram( 59 | 'celery_task_latency', 'Seconds between a task is received and started.', buckets=LATENCY_HISTOGRAM_BUCKETS) 60 | 61 | QUEUE_LENGTH = prometheus_client.Gauge( 62 | 'celery_queue_length', 'Number of tasks in the queue.', 63 | ['queue_name'] 64 | ) 65 | 66 | 67 | class MonitorThread(threading.Thread): 68 | """ 69 | MonitorThread is the thread that will collect the data that is later 70 | exposed from Celery using its eventing system. 71 | """ 72 | 73 | def __init__(self, app=None, *args, **kwargs): 74 | self._app = app 75 | self.log = logging.getLogger('monitor') 76 | self.log.info('Setting up monitor...') 77 | max_tasks_in_memory = kwargs.pop('max_tasks_in_memory', 78 | DEFAULT_MAX_TASKS_IN_MEMORY) 79 | self._state = self._app.events.State( 80 | max_tasks_in_memory=max_tasks_in_memory) 81 | self._known_states = set() 82 | self._known_states_names = set() 83 | self._tasks_started = dict() 84 | super(MonitorThread, self).__init__(*args, **kwargs) 85 | 86 | def run(self): # pragma: no cover 87 | self._monitor() 88 | 89 | def _process_event(self, evt): 90 | # Events might come in in parallel. Celery already has a lock 91 | # that deals with this exact situation so we'll use that for now. 92 | with self._state._mutex: 93 | if celery.events.group_from(evt['type']) == 'task': 94 | evt_state = evt['type'][5:] 95 | try: 96 | # Celery 4 97 | state = celery.events.state.TASK_EVENT_TO_STATE[evt_state] 98 | except AttributeError: # pragma: no cover 99 | # Celery 3 100 | task = celery.events.state.Task() 101 | task.event(evt_state) 102 | state = task.state 103 | if state == celery.states.STARTED: 104 | self._observe_latency(evt) 105 | self._collect_tasks(evt, state) 106 | 107 | def _observe_latency(self, evt): 108 | try: 109 | prev_evt = self._state.tasks[evt['uuid']] 110 | except KeyError: # pragma: no cover 111 | pass 112 | else: 113 | # ignore latency if it is a retry 114 | if prev_evt.state == celery.states.RECEIVED: 115 | LATENCY.observe( 116 | evt['local_received'] - prev_evt.local_received) 117 | 118 | def _collect_tasks(self, evt, state): 119 | if state in celery.states.READY_STATES: 120 | self._incr_ready_task(evt, state) 121 | else: 122 | # add event to list of in-progress tasks 123 | self._state._event(evt) 124 | self._collect_unready_tasks() 125 | 126 | def _incr_ready_task(self, evt, state): 127 | TASKS.labels(state=state).inc() 128 | try: 129 | # remove event from list of in-progress tasks 130 | event = self._state.tasks.pop(evt['uuid']) 131 | TASKS_NAME.labels(state=state, name=event.name).inc() 132 | if 'runtime' in evt: 133 | TASKS_RUNTIME.labels(name=event.name) \ 134 | .observe(evt['runtime']) 135 | except (KeyError, AttributeError): # pragma: no cover 136 | pass 137 | 138 | def _collect_unready_tasks(self): 139 | # count unready tasks by state 140 | cnt = collections.Counter(t.state for t in self._state.tasks.values()) 141 | self._known_states.update(cnt.elements()) 142 | for task_state in self._known_states: 143 | TASKS.labels(state=task_state).set(cnt[task_state]) 144 | 145 | # count unready tasks by state and name 146 | cnt = collections.Counter( 147 | (t.state, t.name) for t in self._state.tasks.values() if t.name) 148 | self._known_states_names.update(cnt.elements()) 149 | for task_state in self._known_states_names: 150 | TASKS_NAME.labels( 151 | state=task_state[0], 152 | name=task_state[1], 153 | ).set(cnt[task_state]) 154 | 155 | def _monitor(self): # pragma: no cover 156 | while True: 157 | try: 158 | self.log.info('Connecting to broker...') 159 | with self._app.connection() as conn: 160 | recv = self._app.events.Receiver(conn, handlers={ 161 | '*': self._process_event, 162 | }) 163 | setup_metrics(self._app) 164 | recv.capture(limit=None, timeout=None, wakeup=True) 165 | self.log.info("Connected to broker") 166 | except Exception: 167 | self.log.exception("Queue connection failed") 168 | setup_metrics(self._app) 169 | time.sleep(5) 170 | 171 | 172 | class WorkerMonitoringThread(threading.Thread): 173 | celery_ping_timeout_seconds = 5 174 | periodicity_seconds = 5 175 | 176 | def __init__(self, app=None, *args, **kwargs): 177 | self._app = app 178 | self.log = logging.getLogger('workers-monitor') 179 | super(WorkerMonitoringThread, self).__init__(*args, **kwargs) 180 | 181 | def run(self): # pragma: no cover 182 | while True: 183 | self.update_workers_count() 184 | time.sleep(self.periodicity_seconds) 185 | 186 | def update_workers_count(self): 187 | try: 188 | WORKERS.set(len(self._app.control.ping( 189 | timeout=self.celery_ping_timeout_seconds))) 190 | except Exception: # pragma: no cover 191 | self.log.exception("Error while pinging workers") 192 | 193 | 194 | class EnableEventsThread(threading.Thread): 195 | periodicity_seconds = 5 196 | 197 | def __init__(self, app=None, *args, **kwargs): # pragma: no cover 198 | self._app = app 199 | self.log = logging.getLogger('enable-events') 200 | super(EnableEventsThread, self).__init__(*args, **kwargs) 201 | 202 | def run(self): # pragma: no cover 203 | while True: 204 | try: 205 | self.enable_events() 206 | except Exception: 207 | self.log.exception("Error while trying to enable events") 208 | time.sleep(self.periodicity_seconds) 209 | 210 | def enable_events(self): 211 | self._app.control.enable_events() 212 | 213 | 214 | class QueueLengthMonitoringThread(threading.Thread): 215 | periodicity_seconds = 30 216 | 217 | def __init__(self, app, queue_list): 218 | # type: (celery.Celery, [str]) -> None 219 | self.celery_app = app 220 | self.queue_list = queue_list 221 | self.connection = self.celery_app.connection_or_acquire() 222 | 223 | if isinstance(self.connection, FallbackContext): 224 | self.connection = self.connection.fallback() 225 | 226 | super(QueueLengthMonitoringThread, self).__init__() 227 | 228 | def measure_queues_length(self): 229 | for queue in self.queue_list: 230 | try: 231 | length = self.connection.default_channel.queue_declare(queue=queue, passive=True).message_count 232 | except (amqp.exceptions.ChannelError,) as e: 233 | logging.warning("Queue Not Found: {}. Setting its value to zero. Error: {}".format(queue, str(e))) 234 | length = 0 235 | 236 | self.set_queue_length(queue, length) 237 | 238 | def set_queue_length(self, queue, length): 239 | QUEUE_LENGTH.labels(queue).set(length) 240 | 241 | def run(self): # pragma: no cover 242 | while True: 243 | self.measure_queues_length() 244 | time.sleep(self.periodicity_seconds) 245 | 246 | def setup_metrics(app): 247 | """ 248 | This initializes the available metrics with default values so that 249 | even before the first event is received, data can be exposed. 250 | """ 251 | WORKERS.set(0) 252 | logging.info('Setting up metrics, trying to connect to broker...') 253 | try: 254 | registered_tasks = app.control.inspect().registered_tasks().values() 255 | except Exception: # pragma: no cover 256 | for metric in TASKS.collect(): 257 | for sample in metric.samples: 258 | TASKS.labels(**sample[1]).set(0) 259 | for metric in TASKS_NAME.collect(): 260 | for sample in metric.samples: 261 | TASKS_NAME.labels(**sample[1]).set(0) 262 | 263 | else: 264 | for state in celery.states.ALL_STATES: 265 | TASKS.labels(state=state).set(0) 266 | for task_name in set(chain.from_iterable(registered_tasks)): 267 | TASKS_NAME.labels(state=state, name=task_name).set(0) 268 | 269 | 270 | def start_httpd(addr): # pragma: no cover 271 | """ 272 | Starts the exposing HTTPD using the addr provided in a separate 273 | thread. 274 | """ 275 | host, port = addr.split(':') 276 | logging.info('Starting HTTPD on {}:{}'.format(host, port)) 277 | prometheus_client.start_http_server(int(port), host) 278 | 279 | 280 | def shutdown(signum, frame): # pragma: no cover 281 | """ 282 | Shutdown is called if the process receives a TERM signal. This way 283 | we try to prevent an ugly stacktrace being rendered to the user on 284 | a normal shutdown. 285 | """ 286 | logging.info("Shutting down") 287 | sys.exit(0) 288 | 289 | 290 | def main(): # pragma: no cover 291 | parser = argparse.ArgumentParser() 292 | parser.add_argument( 293 | '--broker', dest='broker', default=DEFAULT_BROKER, 294 | help="URL to the Celery broker. Defaults to {}".format(DEFAULT_BROKER)) 295 | parser.add_argument( 296 | '--transport-options', dest='transport_options', 297 | help=("JSON object with additional options passed to the underlying " 298 | "transport.")) 299 | parser.add_argument( 300 | '--addr', dest='addr', default=DEFAULT_ADDR, 301 | help="Address the HTTPD should listen on. Defaults to {}".format( 302 | DEFAULT_ADDR)) 303 | parser.add_argument( 304 | '--enable-events', action='store_true', 305 | help="Periodically enable Celery events") 306 | parser.add_argument( 307 | '--tz', dest='tz', 308 | help="Timezone used by the celery app.") 309 | parser.add_argument( 310 | '--verbose', action='store_true', default=False, 311 | help="Enable verbose logging") 312 | parser.add_argument( 313 | '--max_tasks_in_memory', dest='max_tasks_in_memory', 314 | default=DEFAULT_MAX_TASKS_IN_MEMORY, type=int, 315 | help="Tasks cache size. Defaults to {}".format( 316 | DEFAULT_MAX_TASKS_IN_MEMORY)) 317 | parser.add_argument( 318 | '--queue-list', dest='queue_list', 319 | default=DEFAULT_QUEUE_LIST, nargs='+', 320 | help="Queue List. Will be checked for its length." 321 | ) 322 | parser.add_argument( 323 | '--version', action='version', 324 | version='.'.join([str(x) for x in __VERSION__])) 325 | opts = parser.parse_args() 326 | 327 | if opts.verbose: 328 | logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT) 329 | else: 330 | logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) 331 | 332 | signal.signal(signal.SIGINT, shutdown) 333 | signal.signal(signal.SIGTERM, shutdown) 334 | 335 | if opts.tz: 336 | os.environ['TZ'] = opts.tz 337 | time.tzset() 338 | 339 | logging.info('Setting up celery for {}'.format(opts.broker)) 340 | app = celery.Celery(broker=opts.broker) 341 | 342 | if opts.transport_options: 343 | try: 344 | transport_options = json.loads(opts.transport_options) 345 | except ValueError: 346 | print("Error parsing broker transport options from JSON '{}'" 347 | .format(opts.transport_options), file=sys.stderr) 348 | sys.exit(1) 349 | else: 350 | app.conf.broker_transport_options = transport_options 351 | 352 | setup_metrics(app) 353 | 354 | t = MonitorThread(app=app, max_tasks_in_memory=opts.max_tasks_in_memory) 355 | t.daemon = True 356 | t.start() 357 | 358 | w = WorkerMonitoringThread(app=app) 359 | w.daemon = True 360 | w.start() 361 | 362 | if opts.queue_list: 363 | if type(opts.queue_list) == str: 364 | queue_list = opts.queue_list.split(',') 365 | else: 366 | queue_list = opts.queue_list 367 | 368 | q = QueueLengthMonitoringThread(app=app, queue_list=queue_list) 369 | 370 | q.daemon = True 371 | q.start() 372 | 373 | e = None 374 | if opts.enable_events: 375 | e = EnableEventsThread(app=app) 376 | e.daemon = True 377 | e.start() 378 | start_httpd(opts.addr) 379 | t.join() 380 | w.join() 381 | if e is not None: 382 | e.join() 383 | 384 | 385 | if __name__ == '__main__': # pragma: no cover 386 | main() 387 | -------------------------------------------------------------------------------- /celeryapp.py: -------------------------------------------------------------------------------- 1 | from celery import Celery 2 | from kombu import Queue, Exchange 3 | 4 | import os 5 | import time 6 | 7 | BROKER_URL = os.getenv("BROKER_URL") 8 | RESULT_BACKEND_URL = os.getenv("RESULT_BACKEND_URL", None) 9 | 10 | celery_app = Celery( 11 | broker=BROKER_URL, 12 | ) 13 | 14 | if RESULT_BACKEND_URL: 15 | celery_app.conf.update(backend=RESULT_BACKEND_URL) 16 | 17 | celery_app.conf.update( 18 | CELERY_DEFAULT_QUEUE="queue1", 19 | CELERY_QUEUES=( 20 | Queue('queue1', exchange=Exchange('queue1', type='direct'), routing_key='queue1'), 21 | Queue('queue2', exchange=Exchange('queue2', type='direct'), routing_key='queue2'), 22 | Queue('queue3', exchange=Exchange('queue3', type='direct'), routing_key='queue3'), 23 | ), 24 | CELERY_ROUTES={ 25 | 'task1': {'queue': 'queue1', 'routing_key': 'queue1'}, 26 | 'task2': {'queue': 'queue2', 'routing_key': 'queue2'}, 27 | 'task3': {'queue': 'queue3', 'routing_key': 'queue3'}, 28 | } 29 | ) 30 | 31 | @celery_app.task 32 | def task1(): 33 | time.sleep(20) 34 | 35 | @celery_app.task 36 | def task2(): 37 | time.sleep(20) 38 | 39 | @celery_app.task 40 | def task3(): 41 | time.sleep(20) 42 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | 3 | services: 4 | app: 5 | image: celery-exporter:3 6 | build: 7 | context: . 8 | dockerfile: Dockerfile-celery3 9 | user: "65534" 10 | volumes: 11 | - ./:/app 12 | environment: 13 | - BROKER_URL=amqp://rabbit 14 | entrypoint: celery -A celeryapp worker 15 | 16 | exporter: 17 | image: celery-exporter:3 18 | build: 19 | context: . 20 | dockerfile: Dockerfile-celery3 21 | volumes: 22 | - ./:/app 23 | environment: 24 | - BROKER_URL=amqp://rabbit 25 | - QUEUE_LIST=queue1,queue2,queue3 26 | ports: 27 | - 8888:8888 28 | 29 | cache: 30 | image: redis:alpine 31 | 32 | rabbit: 33 | image: rabbitmq:alpine 34 | -------------------------------------------------------------------------------- /docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | exec python /app/celery_prometheus_exporter.py $@ 3 | -------------------------------------------------------------------------------- /requirements/base.txt: -------------------------------------------------------------------------------- 1 | redis==2.10.6 2 | -------------------------------------------------------------------------------- /requirements/celery3.txt: -------------------------------------------------------------------------------- 1 | -r base.txt 2 | celery==3.1.25 3 | -------------------------------------------------------------------------------- /requirements/celery4.txt: -------------------------------------------------------------------------------- 1 | -r base.txt 2 | celery==4.2.0 3 | kombu==4.3.0 4 | -------------------------------------------------------------------------------- /requirements/promclient030.txt: -------------------------------------------------------------------------------- 1 | prometheus_client==0.3.0 2 | -------------------------------------------------------------------------------- /requirements/promclient050.txt: -------------------------------------------------------------------------------- 1 | prometheus_client==0.5.0 2 | -------------------------------------------------------------------------------- /requirements/test.txt: -------------------------------------------------------------------------------- 1 | -r base.txt 2 | pytest 3 | coverage -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | 3 | from setuptools import setup 4 | 5 | 6 | long_description = "See https://github.com/zerok/celery-prometheus-exporter" 7 | with io.open('README.rst', encoding='utf-8') as fp: 8 | long_description = fp.read() 9 | 10 | setup( 11 | name='celery-prometheus-exporter', 12 | description="Simple Prometheus metrics exporter for Celery", 13 | long_description=long_description, 14 | version='1.7.0', 15 | author='Horst Gutmann', 16 | license='MIT', 17 | author_email='horst@zerokspot.com', 18 | url='https://github.com/zerok/celery-prometheus-exporter', 19 | classifiers=[ 20 | 'Development Status :: 3 - Alpha', 21 | 'Environment :: Console', 22 | 'License :: OSI Approved :: MIT License', 23 | 'Programming Language :: Python :: 3.5', 24 | 'Programming Language :: Python :: 3 :: Only', 25 | ], 26 | py_modules=[ 27 | 'celery_prometheus_exporter', 28 | ], 29 | install_requires=[ 30 | 'celery>=3', 31 | 'prometheus_client>=0.0.20', 32 | ], 33 | entry_points={ 34 | 'console_scripts': [ 35 | 'celery-prometheus-exporter = celery_prometheus_exporter:main', 36 | ], 37 | } 38 | ) 39 | -------------------------------------------------------------------------------- /test/celery_test_utils.py: -------------------------------------------------------------------------------- 1 | import celery 2 | import time 3 | from kombu import Queue, Exchange 4 | 5 | 6 | def get_celery_app(queue=None): 7 | app = celery.Celery(broker='memory://', backend='cache+memory://') 8 | 9 | if queue: 10 | app.conf.update( 11 | CELERY_DEFAULT_QUEUE=queue, 12 | CELERY_QUEUES=( 13 | Queue(queue, exchange=Exchange(queue, type='direct'), routing_key=queue), 14 | ), 15 | CELERY_ROUTES={ 16 | 'task1': {'queue': queue, 'routing_key': queue}, 17 | } 18 | ) 19 | 20 | return app 21 | 22 | 23 | class SampleTask(celery.Task): 24 | name = 'sample-task' 25 | 26 | def run(self, *args, **kwargs): 27 | time.sleep(10) 28 | -------------------------------------------------------------------------------- /test/test_unit.py: -------------------------------------------------------------------------------- 1 | from time import time 2 | 3 | import os 4 | import celery 5 | import celery.states 6 | import amqp.exceptions 7 | 8 | from celery.events import Event 9 | from celery.utils import uuid 10 | from prometheus_client import REGISTRY 11 | from unittest import TestCase 12 | try: 13 | from unittest.mock import patch 14 | except ImportError: 15 | from mock import patch 16 | 17 | from celery_prometheus_exporter import ( 18 | WorkerMonitoringThread, setup_metrics, MonitorThread, EnableEventsThread, 19 | TASKS, 20 | get_histogram_buckets_from_evn, 21 | QueueLengthMonitoringThread, QUEUE_LENGTH) 22 | 23 | from celery_test_utils import get_celery_app, SampleTask 24 | 25 | 26 | class TestBucketLoading(TestCase): 27 | def tearDown(self): 28 | if 'TEST_BUCKETS' in os.environ: 29 | del os.environ['TEST_BUCKETS'] 30 | 31 | def test_default_buckets(self): 32 | self.assertIsNotNone(get_histogram_buckets_from_evn('TEST_BUCKETS')) 33 | 34 | def test_from_env(self): 35 | os.environ['TEST_BUCKETS'] = '1,2,3' 36 | self.assertEqual([1.0, 2.0, 3.0], get_histogram_buckets_from_evn('TEST_BUCKETS')) 37 | 38 | class TestFallbackSetup(TestCase): 39 | def test_fallback(self): 40 | TASKS.labels(state='RUNNING').set(0) 41 | setup_metrics(None) 42 | 43 | 44 | class TestMockedCelery(TestCase): 45 | task = 'my_task' 46 | 47 | def setUp(self): 48 | self.app = get_celery_app() 49 | with patch('celery.task.control.inspect.registered_tasks') as tasks: 50 | tasks.return_value = {'worker1': [self.task]} 51 | setup_metrics(self.app) # reset metrics 52 | 53 | def test_initial_metric_values(self): 54 | self._assert_task_states(celery.states.ALL_STATES, 0) 55 | assert REGISTRY.get_sample_value('celery_workers') == 0 56 | assert REGISTRY.get_sample_value('celery_task_latency_count') == 0 57 | assert REGISTRY.get_sample_value('celery_task_latency_sum') == 0 58 | 59 | def test_workers_count(self): 60 | assert REGISTRY.get_sample_value('celery_workers') == 0 61 | 62 | with patch.object(self.app.control, 'ping') as mock_ping: 63 | w = WorkerMonitoringThread(app=self.app) 64 | 65 | mock_ping.return_value = [] 66 | w.update_workers_count() 67 | assert REGISTRY.get_sample_value('celery_workers') == 0 68 | 69 | mock_ping.return_value = [0] # 1 worker 70 | w.update_workers_count() 71 | assert REGISTRY.get_sample_value('celery_workers') == 1 72 | 73 | mock_ping.return_value = [0, 0] # 2 workers 74 | w.update_workers_count() 75 | assert REGISTRY.get_sample_value('celery_workers') == 2 76 | 77 | mock_ping.return_value = [] 78 | w.update_workers_count() 79 | assert REGISTRY.get_sample_value('celery_workers') == 0 80 | 81 | def test_tasks_events(self): 82 | task_uuid = uuid() 83 | hostname = 'myhost' 84 | local_received = time() 85 | latency_before_started = 123.45 86 | runtime = 234.5 87 | 88 | m = MonitorThread(app=self.app) 89 | 90 | self._assert_task_states(celery.states.ALL_STATES, 0) 91 | assert REGISTRY.get_sample_value('celery_task_latency_count') == 0 92 | assert REGISTRY.get_sample_value('celery_task_latency_sum') == 0 93 | 94 | m._process_event(Event( 95 | 'task-received', uuid=task_uuid, name=self.task, 96 | args='()', kwargs='{}', retries=0, eta=None, hostname=hostname, 97 | clock=0, 98 | local_received=local_received)) 99 | self._assert_all_states({celery.states.RECEIVED}) 100 | 101 | m._process_event(Event( 102 | 'task-started', uuid=task_uuid, hostname=hostname, 103 | clock=1, name=self.task, 104 | local_received=local_received + latency_before_started)) 105 | self._assert_all_states({celery.states.STARTED}) 106 | 107 | m._process_event(Event( 108 | 'task-succeeded', uuid=task_uuid, result='42', 109 | runtime=runtime, hostname=hostname, clock=2, 110 | local_received=local_received + latency_before_started + runtime)) 111 | self._assert_all_states({celery.states.SUCCESS}) 112 | 113 | assert REGISTRY.get_sample_value('celery_task_latency_count') == 1 114 | self.assertAlmostEqual(REGISTRY.get_sample_value( 115 | 'celery_task_latency_sum'), latency_before_started) 116 | assert REGISTRY.get_sample_value( 117 | 'celery_tasks_runtime_seconds_count', 118 | labels=dict(name=self.task)) == 1 119 | assert REGISTRY.get_sample_value( 120 | 'celery_tasks_runtime_seconds_sum', 121 | labels=dict(name=self.task)) == 234.5 122 | 123 | def test_enable_events(self): 124 | with patch.object( 125 | self.app.control, 'enable_events') as mock_enable_events: 126 | e = EnableEventsThread(app=self.app) 127 | e.enable_events() 128 | mock_enable_events.assert_called_once_with() 129 | 130 | def test_can_measure_queue_length(self): 131 | celery_app = get_celery_app(queue='realqueue') 132 | sample_task = SampleTask() 133 | sample_task.app = celery_app 134 | monitoring_thread_instance = QueueLengthMonitoringThread(celery_app, queue_list=['realqueue']) 135 | 136 | sample_task.delay() 137 | monitoring_thread_instance.measure_queues_length() 138 | sample = REGISTRY.get_sample_value('celery_queue_length', {'queue_name':'realqueue'}) 139 | 140 | self.assertEqual(1.0, sample) 141 | 142 | def test_set_zero_on_queue_length_when_an_channel_layer_error_occurs_during_queue_read(self): 143 | instance = QueueLengthMonitoringThread(app=self.app, queue_list=['noqueue']) 144 | 145 | instance.measure_queues_length() 146 | sample = REGISTRY.get_sample_value('celery_queue_length', {'queue_name':'noqueue'}) 147 | 148 | self.assertEqual(0.0, sample) 149 | 150 | def _assert_task_states(self, states, cnt): 151 | for state in states: 152 | assert REGISTRY.get_sample_value( 153 | 'celery_tasks', labels=dict(state=state)) == cnt 154 | task_by_name_label = dict(state=state, name=self.task) 155 | assert REGISTRY.get_sample_value( 156 | 'celery_tasks_by_name', labels=task_by_name_label) == cnt 157 | 158 | def _assert_all_states(self, exclude): 159 | self._assert_task_states(celery.states.ALL_STATES - exclude, 0) 160 | self._assert_task_states(exclude, 1) 161 | 162 | def _setup_task_with_celery_and_queue_support(self, queue_name, task, celery_app): 163 | task.app = celery_app 164 | 165 | return task 166 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{27,34,35,36}-celery{3,4}-promclient{030,050}, lint 3 | 4 | [testenv] 5 | deps = 6 | -rrequirements/test.txt 7 | py27: mock 8 | promclient030: -rrequirements/promclient030.txt 9 | promclient050: -rrequirements/promclient050.txt 10 | celery3: -rrequirements/celery3.txt 11 | celery4: -rrequirements/celery4.txt 12 | commands = 13 | coverage run -m py.test -s -v {toxinidir}/test/ 14 | coverage report 15 | 16 | [testenv:lint] 17 | basepython = python3 18 | deps = flake8>=3.3.0,<4 19 | commands = flake8 --max-complexity 15 celery_prometheus_exporter.py test 20 | --------------------------------------------------------------------------------