├── .gitignore ├── requirements.txt ├── examples └── k8s │ └── k8s │ ├── object-store-user.yaml │ ├── service.yaml │ ├── service-monitor.yaml │ └── deployment.yaml ├── Dockerfile ├── LICENSE ├── .github └── workflows │ └── docker-images.yaml ├── README.md └── radosgw_usage_exporter.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/launch.json 2 | __pycache__/ 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | prometheus-client==0.21.0 2 | requests==2.32.4 3 | boto==2.49.0 4 | requests-aws==0.1.8 5 | -------------------------------------------------------------------------------- /examples/k8s/k8s/object-store-user.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: ceph.rook.io/v1 2 | kind: CephObjectStoreUser 3 | metadata: 4 | name: buckets-usage-exporter 5 | spec: 6 | store: us-east-1 7 | displayName: buckets-usage-exporter 8 | capabilities: 9 | bucket: read 10 | metadata: read 11 | usage: read 12 | user: read 13 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim 2 | 3 | RUN mkdir -p /usr/src/app 4 | WORKDIR /usr/src/app 5 | 6 | COPY requirements.txt /usr/src/app 7 | RUN pip install --no-cache-dir -r requirements.txt 8 | 9 | COPY radosgw_usage_exporter.py /usr/src/app 10 | 11 | EXPOSE 9242 12 | 13 | ENTRYPOINT [ "python", "-u", "./radosgw_usage_exporter.py" ] 14 | CMD [] 15 | -------------------------------------------------------------------------------- /examples/k8s/k8s/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: prometheus-buckets-usage-exporter 5 | labels: 6 | app.kubernetes.io/name: prometheus-buckets-usage-exporter 7 | spec: 8 | selector: 9 | app.kubernetes.io/name: prometheus-buckets-usage-exporter 10 | ports: 11 | - name: http 12 | port: 9242 13 | protocol: TCP 14 | targetPort: 9242 15 | 16 | 17 | -------------------------------------------------------------------------------- /examples/k8s/k8s/service-monitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: prometheus-buckets-usage-exporter 5 | labels: 6 | app.kubernetes.io/name: prometheus-buckets-usage-exporter 7 | spec: 8 | selector: 9 | matchLabels: 10 | app.kubernetes.io/name: prometheus-buckets-usage-exporter 11 | endpoints: 12 | - honorLabels: true 13 | interval: 90s 14 | path: /metrics 15 | port: http 16 | scheme: http 17 | scrapeTimeout: 60s 18 | jobLabel: prometheus-buckets-usage-exporter 19 | 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/docker-images.yaml: -------------------------------------------------------------------------------- 1 | name: Multi-Arch Docker Build and Push to GHCR 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | tags: 8 | - v* 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@v2 16 | 17 | - name: Login to GHCR 18 | run: echo ${{ secrets.GITHUB_TOKEN }} | docker login ghcr.io -u ${{ github.repository_owner }} --password-stdin 19 | 20 | - name: Set up QEMU 21 | uses: docker/setup-qemu-action@v3 22 | 23 | - name: Set up Docker Buildx for multiarch images 24 | uses: docker/setup-buildx-action@v3 25 | 26 | - name: Build and push Docker image to GHCR 27 | run: | 28 | # Strip git ref prefix from version 29 | VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') 30 | 31 | # Strip "v" prefix from tag name 32 | [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') 33 | 34 | # Use Docker `latest` tag convention 35 | [ "$VERSION" == "master" ] && VERSION=latest 36 | docker buildx build --platform linux/amd64,linux/arm64,linux/arm/v7 \ 37 | --push \ 38 | -t ghcr.io/${{ github.repository }}:${VERSION} \ 39 | . 40 | -------------------------------------------------------------------------------- /examples/k8s/k8s/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: prometheus-buckets-usage-exporter 5 | labels: 6 | app.kubernetes.io/name: prometheus-buckets-usage-exporter 7 | spec: 8 | replicas: 1 9 | revisionHistoryLimit: 3 10 | selector: 11 | matchLabels: 12 | app.kubernetes.io/name: prometheus-buckets-usage-exporter 13 | template: 14 | metadata: 15 | labels: 16 | app.kubernetes.io/name: prometheus-buckets-usage-exporter 17 | spec: 18 | containers: 19 | - image: ghcr.io/pando85/radosgw_usage_exporter:latest 20 | env: 21 | - name: ACCESS_KEY 22 | valueFrom: 23 | secretKeyRef: 24 | key: AccessKey 25 | name: rook-ceph-object-user-us-east-1-buckets-usage-exporter 26 | - name: SECRET_KEY 27 | valueFrom: 28 | secretKeyRef: 29 | key: SecretKey 30 | name: rook-ceph-object-user-us-east-1-buckets-usage-exporter 31 | - name: RADOSGW_SERVER 32 | valueFrom: 33 | secretKeyRef: 34 | key: Endpoint 35 | name: rook-ceph-object-user-us-east-1-buckets-usage-exporter 36 | - name: VIRTUAL_PORT 37 | value: "9242" 38 | - name: STORE 39 | value: eu-central-1a 40 | - name: LOG_LEVEL 41 | value: INFO 42 | - name: TIMEOUT 43 | value: "60" 44 | args: 45 | - --insecure 46 | name: exporter 47 | ports: 48 | - containerPort: 9242 49 | name: http 50 | protocol: TCP 51 | resources: 52 | limits: 53 | cpu: 500m 54 | memory: 512Mi 55 | requests: 56 | cpu: 10m 57 | memory: 40Mi 58 | livenessProbe: 59 | tcpSocket: 60 | port: http 61 | readinessProbe: 62 | tcpSocket: 63 | port: http 64 | securityContext: 65 | allowPrivilegeEscalation: false 66 | capabilities: 67 | drop: 68 | - ALL 69 | readOnlyRootFilesystem: true 70 | securityContext: 71 | runAsNonRoot: true 72 | runAsUser: 1000 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ceph RADOSGW Usage Exporter 2 | 3 | [Prometheus](https://prometheus.io/) exporter that scrapes [Ceph](http://ceph.com/) RADOSGW usage 4 | information (operations and buckets). This information is gathered from a RADOSGW using the 5 | [Admin Operations API](http://docs.ceph.com/docs/master/radosgw/adminops/). 6 | 7 | This exporter was based off from both 8 | (https://www.robustperception.io/writing-a-jenkins-exporter-in-python/) and the more elaborate 9 | Jenkins exporter here (https://github.com/lovoo/jenkins_exporter). 10 | 11 | ## Requirements 12 | 13 | - Working Ceph Cluster with Object Gateways setup. 14 | - Ceph RADOSGWs must beconfigured to gather usage information as this is not on by default. The 15 | miniumum is to enable it via `ceph.conf` as below. There are however other options that are 16 | available and should be considered [here](http://docs.ceph.com/docs/master/radosgw/config-ref/). 17 | If you don't configure thresholds, intervals, and shards you may end up having too large objects 18 | in the usage namespace of the log pool. The values below are just examples. Check the 19 | documentation which ones would be the best ones for your setup. 20 | 21 | ``` 22 | rgw enable usage log = true 23 | rgw usage log flush threshold = 1024 24 | rgw usage log tick interval = 30 25 | rgw usage max shards = 32 26 | rgw usage max user shards = 8 27 | 28 | ``` 29 | 30 | - Configure admin entry point (default is 'admin'): 31 | 32 | ``` 33 | rgw admin entry = "admin" 34 | ``` 35 | 36 | - Enable admin API (default is enabled): 37 | 38 | ``` 39 | rgw enable apis = "s3, admin" 40 | ``` 41 | 42 | - This exporter requires a user that has the following capability, see the Admin Guide 43 | [here](http://docs.ceph.com/docs/master/radosgw/admin/#add-remove-admin-capabilities) for more 44 | details. 45 | 46 | ``` 47 | "caps": [ 48 | { 49 | "type": "buckets", 50 | "perm": "read" 51 | }, 52 | { 53 | "type": "metadata", 54 | "perm": "read" 55 | }, 56 | { 57 | "type": "usage", 58 | "perm": "read" 59 | }, 60 | { 61 | "type": "users", 62 | "perm": "read" 63 | } 64 | ``` 65 | 66 | **Note:** If using a loadbalancer in front of your RADOSGWs, please make sure your timeouts are set 67 | appropriately as clusters with a large number of buckets, or large number of users+buckets could 68 | cause the usage query to exceed the loadbalancer timeout. 69 | 70 | For haproxy the timeout in question is `timeout server` 71 | 72 | ## Local Installation 73 | 74 | ```bash 75 | git clone git@github.com:blemmenes/radosgw_usage_exporter.git 76 | cd radosgw_usage_exporter 77 | pip install requirements.txt 78 | ``` 79 | 80 | ## Config 81 | 82 | | _Arg_ | _Env_ | _Description_ | _Default_ | 83 | | ------------------ | ---------------- | ----------------------------------------------------------------------- | ------------------- | 84 | | `-H --host` | `RADOSGW_SERVER` | Server URL for the RADOSGW api (example: http://objects.dreamhost.com/) | `http://radosgw:80` | 85 | | `-e --admin-entry` | `ADMIN_ENTRY` | The entry point for an admin request URL | `admin` | 86 | | `-a --access-key` | `ACCESS_KEY` | S3 access key | `NA` | 87 | | `-s --secret-key` | `SECRET_KEY` | S3 secret key | `NA` | 88 | | `-k --insecure` | | Allow insecure server connections when using SSL | `false` | 89 | | `-p --port` | VIRTUAL_PORT | Port to listen | `9242` | 90 | | `-S --store` | STORE | Store name added to metrics | `us-east-1` | 91 | | `-t --timeout` | TIMEOUT | Timeout when getting metrics | `60` | 92 | | `-l --log-level` | LOG_LEVEL | Provide logging level: DEBUG, INFO, WARNING, ERROR or CRITICAL | `INFO` | 93 | | `-T --tag-list` | TAG_LIST | Add bucket tags as label (example: 'tag1,tag2,tag3') | `` | 94 | 95 | ### Example 96 | 97 | ```bash 98 | ./check_ceph_rgw_api -H https://objects.dreamhost.com/ -a JXUABTZZYHAFLCMF9VYV -s jjP8RDD0R156atS6ACSy2vNdJLdEPM0TJQ5jD1pw 99 | ``` 100 | 101 | ## Docker 102 | 103 | Docker build 104 | (https://github.com/pando85/radosgw_usage_exporter/pkgs/container/radosgw_usage_exporter): 105 | 106 | ```bash 107 | docker run -d -p 9242 ghcr.io/pando85/radosgw_usage_exporter:latest \ 108 | -H -a -s -p 9242 109 | ``` 110 | 111 | Arguments can also be specified by environment variables as well. 112 | 113 | ```bash 114 | docker run -d -p 9242:9242 \ 115 | -e "RADOSGW_SERVER=" \ 116 | -e "VIRTUAL_PORT=9242" \ 117 | -e "ACCESS_KEY=" \ 118 | -e "SECRET_KEY=" \ 119 | ghcr.io/pando85/radosgw_usage_exporter:latest 120 | ``` 121 | 122 | Resulting metrics can be then retrieved via your Prometheus server via the 123 | `http://:9242/metrics` endpoint. 124 | 125 | ## Kubernetes 126 | 127 | You can find an example of deployment using [Rook](https://rook.io/) operator in a K8s environment 128 | in `examples/k8s` directory. 129 | -------------------------------------------------------------------------------- /radosgw_usage_exporter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | import time 5 | import requests 6 | import warnings 7 | import logging 8 | import json 9 | import argparse 10 | import os 11 | from awsauth import S3Auth 12 | from prometheus_client import start_http_server 13 | from collections import defaultdict, Counter 14 | from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily, REGISTRY 15 | 16 | 17 | class RADOSGWCollector(object): 18 | """RADOSGWCollector gathers bucket level usage data for all buckets from 19 | the specified RADOSGW and presents it in a format suitable for pulling via 20 | a Prometheus server. 21 | 22 | NOTE: By default RADOSGW Servers do not gather usage data and it must be 23 | enabled by 'rgw enable usage log = true' in the appropriate section 24 | of ceph.conf see Ceph documentation for details""" 25 | 26 | def __init__( 27 | self, host, admin_entry, access_key, secret_key, store, insecure, timeout, tag_list 28 | ): 29 | super(RADOSGWCollector, self).__init__() 30 | self.host = host 31 | self.access_key = access_key 32 | self.secret_key = secret_key 33 | self.store = store 34 | self.insecure = insecure 35 | self.timeout = timeout 36 | self.tag_list = tag_list 37 | 38 | # helpers for default schema 39 | if not self.host.startswith("http"): 40 | self.host = "http://{0}".format(self.host) 41 | # and for request_uri 42 | if not self.host.endswith("/"): 43 | self.host = "{0}/".format(self.host) 44 | 45 | self.url = "{0}{1}/".format(self.host, admin_entry) 46 | # Prepare Requests Session 47 | self._session() 48 | 49 | def collect(self): 50 | """ 51 | * Collect 'usage' data: 52 | http://docs.ceph.com/docs/master/radosgw/adminops/#get-usage 53 | * Collect 'bucket' data: 54 | http://docs.ceph.com/docs/master/radosgw/adminops/#get-bucket-info 55 | """ 56 | 57 | start = time.time() 58 | # setup empty prometheus metrics 59 | self._setup_empty_prometheus_metrics(args="") 60 | 61 | # setup dict for aggregating bucket usage accross "bins" 62 | self.usage_dict = defaultdict(dict) 63 | 64 | rgw_usage = self._request_data(query="usage", args="show-summary=False") 65 | rgw_bucket = self._request_data(query="bucket", args="stats=True") 66 | rgw_users = self._get_rgw_users() 67 | 68 | # populate metrics with data 69 | if rgw_usage: 70 | for entry in rgw_usage["entries"]: 71 | self._get_usage(entry) 72 | self._update_usage_metrics() 73 | 74 | if rgw_bucket: 75 | for bucket in rgw_bucket: 76 | self._get_bucket_usage(bucket) 77 | 78 | if rgw_users: 79 | for user in rgw_users: 80 | self._get_user_info(user) 81 | 82 | duration = time.time() - start 83 | self._prometheus_metrics["scrape_duration_seconds"].add_metric([], duration) 84 | 85 | for metric in list(self._prometheus_metrics.values()): 86 | yield metric 87 | 88 | def _session(self): 89 | """ 90 | Setup Requests connection settings. 91 | """ 92 | self.session = requests.Session() 93 | self.session_adapter = requests.adapters.HTTPAdapter( 94 | pool_connections=10, pool_maxsize=10 95 | ) 96 | self.session.mount("http://", self.session_adapter) 97 | self.session.mount("https://", self.session_adapter) 98 | 99 | # Inversion of condition, when '--insecure' is defined we disable 100 | # requests warning about certificate hostname mismatch. 101 | if not self.insecure: 102 | warnings.filterwarnings("ignore", message="Unverified HTTPS request") 103 | logging.debug("Perform insecured requests") 104 | 105 | def _request_data(self, query, args): 106 | """ 107 | Requests data from RGW. If admin entry and caps is fine - return 108 | JSON data, otherwise return NoneType. 109 | """ 110 | url = "{0}{1}/?format=json&{2}".format(self.url, query, args) 111 | 112 | try: 113 | response = self.session.get( 114 | url, 115 | verify=self.insecure, 116 | timeout=float(self.timeout), 117 | auth=S3Auth(self.access_key, self.secret_key, self.host), 118 | ) 119 | 120 | if response.status_code == requests.codes.ok: 121 | logging.debug(response) 122 | return response.json() 123 | else: 124 | # Usage caps absent or wrong admin entry 125 | logging.error( 126 | ( 127 | "Request error [{0}]: {1}".format( 128 | response.status_code, response.content.decode("utf-8") 129 | ) 130 | ) 131 | ) 132 | return 133 | 134 | # DNS, connection errors, etc 135 | except requests.exceptions.RequestException as e: 136 | logging.info(("Request error: {0}".format(e))) 137 | return 138 | 139 | def _setup_empty_prometheus_metrics(self, args): 140 | """ 141 | The metrics we want to export. 142 | """ 143 | 144 | b_labels = ["bucket", "owner", "category", "store"] 145 | b_labels=b_labels+self.tag_list.split(",") 146 | 147 | self._prometheus_metrics = { 148 | "ops": CounterMetricFamily( 149 | "radosgw_usage_ops_total", 150 | "Number of operations", 151 | labels=b_labels, 152 | ), 153 | "successful_ops": CounterMetricFamily( 154 | "radosgw_usage_successful_ops_total", 155 | "Number of successful operations", 156 | labels=b_labels, 157 | ), 158 | "bytes_sent": CounterMetricFamily( 159 | "radosgw_usage_sent_bytes_total", 160 | "Bytes sent by the RADOSGW", 161 | labels=b_labels, 162 | ), 163 | "bytes_received": CounterMetricFamily( 164 | "radosgw_usage_received_bytes_total", 165 | "Bytes received by the RADOSGW", 166 | labels=b_labels, 167 | ), 168 | "bucket_usage_bytes": GaugeMetricFamily( 169 | "radosgw_usage_bucket_bytes", 170 | "Bucket used bytes", 171 | labels=b_labels, 172 | ), 173 | "bucket_utilized_bytes": GaugeMetricFamily( 174 | "radosgw_usage_bucket_utilized_bytes", 175 | "Bucket utilized bytes", 176 | labels=b_labels, 177 | ), 178 | "bucket_usage_objects": GaugeMetricFamily( 179 | "radosgw_usage_bucket_objects", 180 | "Number of objects in bucket", 181 | labels=b_labels, 182 | ), 183 | "bucket_quota_enabled": GaugeMetricFamily( 184 | "radosgw_usage_bucket_quota_enabled", 185 | "Quota enabled for bucket", 186 | labels=b_labels, 187 | ), 188 | "bucket_quota_max_size": GaugeMetricFamily( 189 | "radosgw_usage_bucket_quota_size", 190 | "Maximum allowed bucket size", 191 | labels=b_labels, 192 | ), 193 | "bucket_quota_max_size_bytes": GaugeMetricFamily( 194 | "radosgw_usage_bucket_quota_size_bytes", 195 | "Maximum allowed bucket size in bytes", 196 | labels=b_labels, 197 | ), 198 | "bucket_quota_max_objects": GaugeMetricFamily( 199 | "radosgw_usage_bucket_quota_size_objects", 200 | "Maximum allowed bucket size in number of objects", 201 | labels=b_labels, 202 | ), 203 | "bucket_shards": GaugeMetricFamily( 204 | "radosgw_usage_bucket_shards", 205 | "Number ob shards in bucket", 206 | labels=b_labels, 207 | ), 208 | "user_metadata": GaugeMetricFamily( 209 | "radosgw_user_metadata", 210 | "User metadata", 211 | labels=["user", "display_name", "email", "storage_class", "store"], 212 | ), 213 | "user_quota_enabled": GaugeMetricFamily( 214 | "radosgw_usage_user_quota_enabled", 215 | "User quota enabled", 216 | labels=["user", "store"], 217 | ), 218 | "user_quota_max_size": GaugeMetricFamily( 219 | "radosgw_usage_user_quota_size", 220 | "Maximum allowed size for user", 221 | labels=["user", "store"], 222 | ), 223 | "user_quota_max_size_bytes": GaugeMetricFamily( 224 | "radosgw_usage_user_quota_size_bytes", 225 | "Maximum allowed size in bytes for user", 226 | labels=["user", "store"], 227 | ), 228 | "user_quota_max_objects": GaugeMetricFamily( 229 | "radosgw_usage_user_quota_size_objects", 230 | "Maximum allowed number of objects across all user buckets", 231 | labels=["user", "store"], 232 | ), 233 | "user_bucket_quota_enabled": GaugeMetricFamily( 234 | "radosgw_usage_user_bucket_quota_enabled", 235 | "User per-bucket-quota enabled", 236 | labels=["user", "store"], 237 | ), 238 | "user_bucket_quota_max_size": GaugeMetricFamily( 239 | "radosgw_usage_user_bucket_quota_size", 240 | "Maximum allowed size for each bucket of user", 241 | labels=["user", "store"], 242 | ), 243 | "user_bucket_quota_max_size_bytes": GaugeMetricFamily( 244 | "radosgw_usage_user_bucket_quota_size_bytes", 245 | "Maximum allowed size bytes size for each bucket of user", 246 | labels=["user", "store"], 247 | ), 248 | "user_bucket_quota_max_objects": GaugeMetricFamily( 249 | "radosgw_usage_user_bucket_quota_size_objects", 250 | "Maximum allowed number of objects in each user bucket", 251 | labels=["user", "store"], 252 | ), 253 | "user_total_objects": GaugeMetricFamily( 254 | "radosgw_usage_user_total_objects", 255 | "Usage of objects by user", 256 | labels=["user", "store"], 257 | ), 258 | "user_total_bytes": GaugeMetricFamily( 259 | "radosgw_usage_user_total_bytes", 260 | "Usage of bytes by user", 261 | labels=["user", "store"], 262 | ), 263 | "scrape_duration_seconds": GaugeMetricFamily( 264 | "radosgw_usage_scrape_duration_seconds", 265 | "Ammount of time each scrape takes", 266 | labels=[], 267 | ), 268 | } 269 | 270 | def _get_usage(self, entry): 271 | """ 272 | Recieves JSON object 'entity' that contains all the buckets relating 273 | to a given RGW UID. Builds a dictionary of metric data in order to 274 | handle UIDs where the usage data is truncated into multiple 1000 275 | entry bins. 276 | """ 277 | 278 | if "owner" in entry: 279 | bucket_owner = entry["owner"] 280 | # Luminous 281 | elif "user" in entry: 282 | bucket_owner = entry["user"] 283 | 284 | if bucket_owner not in list(self.usage_dict.keys()): 285 | self.usage_dict[bucket_owner] = defaultdict(dict) 286 | 287 | for bucket in entry["buckets"]: 288 | logging.debug((json.dumps(bucket, indent=4, sort_keys=True))) 289 | 290 | if not bucket["bucket"]: 291 | bucket_name = "bucket_root" 292 | else: 293 | bucket_name = bucket["bucket"] 294 | 295 | if bucket_name not in list(self.usage_dict[bucket_owner].keys()): 296 | self.usage_dict[bucket_owner][bucket_name] = defaultdict(dict) 297 | 298 | for category in bucket["categories"]: 299 | category_name = category["category"] 300 | if category_name not in list( 301 | self.usage_dict[bucket_owner][bucket_name].keys() 302 | ): 303 | self.usage_dict[bucket_owner][bucket_name][ 304 | category_name 305 | ] = Counter() 306 | c = self.usage_dict[bucket_owner][bucket_name][category_name] 307 | c.update( 308 | { 309 | "ops": category["ops"], 310 | "successful_ops": category["successful_ops"], 311 | "bytes_sent": category["bytes_sent"], 312 | "bytes_received": category["bytes_received"], 313 | } 314 | ) 315 | 316 | def _update_usage_metrics(self): 317 | """ 318 | Update promethes metrics with bucket usage data 319 | """ 320 | 321 | for bucket_owner in list(self.usage_dict.keys()): 322 | for bucket_name in list(self.usage_dict[bucket_owner].keys()): 323 | for category in list(self.usage_dict[bucket_owner][bucket_name].keys()): 324 | data_dict = self.usage_dict[bucket_owner][bucket_name][category] 325 | self._prometheus_metrics["ops"].add_metric( 326 | [bucket_name, bucket_owner, category, self.store], 327 | data_dict["ops"], 328 | ) 329 | 330 | self._prometheus_metrics["successful_ops"].add_metric( 331 | [bucket_name, bucket_owner, category, self.store], 332 | data_dict["successful_ops"], 333 | ) 334 | 335 | self._prometheus_metrics["bytes_sent"].add_metric( 336 | [bucket_name, bucket_owner, category, self.store], 337 | data_dict["bytes_sent"], 338 | ) 339 | 340 | self._prometheus_metrics["bytes_received"].add_metric( 341 | [bucket_name, bucket_owner, category, self.store], 342 | data_dict["bytes_received"], 343 | ) 344 | 345 | def _get_bucket_usage(self, bucket): 346 | """ 347 | Method get actual bucket usage (in bytes). 348 | Some skips and adjustments for various Ceph releases. 349 | """ 350 | logging.debug((json.dumps(bucket, indent=4, sort_keys=True))) 351 | 352 | if type(bucket) is dict: 353 | bucket_name = bucket["bucket"] 354 | bucket_owner = bucket["owner"] 355 | bucket_shards = bucket["num_shards"] 356 | bucket_usage_bytes = 0 357 | bucket_utilized_bytes = 0 358 | bucket_usage_objects = 0 359 | 360 | if bucket["usage"] and "rgw.main" in bucket["usage"]: 361 | # Prefer bytes, instead kbytes 362 | if "size_actual" in bucket["usage"]["rgw.main"]: 363 | bucket_usage_bytes = bucket["usage"]["rgw.main"]["size_actual"] 364 | # Hammer don't have bytes field 365 | elif "size_kb_actual" in bucket["usage"]["rgw.main"]: 366 | usage_kb = bucket["usage"]["rgw.main"]["size_kb_actual"] 367 | bucket_usage_bytes = usage_kb * 1024 368 | 369 | # Compressed buckets, since Kraken 370 | if "size_utilized" in bucket["usage"]["rgw.main"]: 371 | bucket_utilized_bytes = bucket["usage"]["rgw.main"]["size_utilized"] 372 | 373 | # Get number of objects in bucket 374 | if "num_objects" in bucket["usage"]["rgw.main"]: 375 | bucket_usage_objects = bucket["usage"]["rgw.main"]["num_objects"] 376 | 377 | if "zonegroup" in bucket: 378 | bucket_zonegroup = bucket["zonegroup"] 379 | # Hammer 380 | else: 381 | bucket_zonegroup = "0" 382 | 383 | 384 | taglist = [] 385 | if "tagset" in bucket: 386 | bucket_tagset = bucket["tagset"] 387 | if self.tag_list: 388 | for k in self.tag_list.split(","): 389 | if k in bucket_tagset: 390 | taglist.append(bucket_tagset[k]) 391 | 392 | b_metrics = [bucket_name, bucket_owner, bucket_zonegroup, self.store] 393 | b_metrics=b_metrics+taglist 394 | 395 | self._prometheus_metrics["bucket_usage_bytes"].add_metric( 396 | b_metrics, 397 | bucket_usage_bytes, 398 | ) 399 | 400 | self._prometheus_metrics["bucket_utilized_bytes"].add_metric( 401 | b_metrics, 402 | bucket_utilized_bytes, 403 | ) 404 | 405 | self._prometheus_metrics["bucket_usage_objects"].add_metric( 406 | b_metrics, 407 | bucket_usage_objects, 408 | ) 409 | 410 | if "bucket_quota" in bucket: 411 | self._prometheus_metrics["bucket_quota_enabled"].add_metric( 412 | b_metrics, 413 | bucket["bucket_quota"]["enabled"], 414 | ) 415 | self._prometheus_metrics["bucket_quota_max_size"].add_metric( 416 | b_metrics, 417 | bucket["bucket_quota"]["max_size"], 418 | ) 419 | self._prometheus_metrics["bucket_quota_max_size_bytes"].add_metric( 420 | b_metrics, 421 | bucket["bucket_quota"]["max_size_kb"] * 1024, 422 | ) 423 | self._prometheus_metrics["bucket_quota_max_objects"].add_metric( 424 | b_metrics, 425 | bucket["bucket_quota"]["max_objects"], 426 | ) 427 | 428 | self._prometheus_metrics["bucket_shards"].add_metric( 429 | b_metrics, 430 | bucket_shards, 431 | ) 432 | 433 | else: 434 | # Hammer junk, just skip it 435 | pass 436 | 437 | def _get_rgw_users(self): 438 | """ 439 | API request to get users. 440 | """ 441 | 442 | rgw_users = self._request_data(query="user", args="list") 443 | 444 | if rgw_users and "keys" in rgw_users: 445 | return rgw_users["keys"] 446 | else: 447 | # Compat with old Ceph versions (pre 12.2.13/13.2.9) 448 | rgw_metadata_users = self._request_data(query="metadata/user", args="") 449 | return rgw_metadata_users 450 | 451 | return 452 | 453 | def _get_user_info(self, user): 454 | """ 455 | Method to get the info on a specific user(s). 456 | """ 457 | user_info = self._request_data( 458 | query="user", args="uid={0}&stats=True".format(user) 459 | ) 460 | logging.debug((json.dumps(user_info, indent=4, sort_keys=True))) 461 | 462 | if "display_name" in user_info: 463 | user_display_name = user_info["display_name"] 464 | else: 465 | user_display_name = "" 466 | if "email" in user_info: 467 | user_email = user_info["email"] 468 | else: 469 | user_email = "" 470 | # Nautilus+ 471 | if "default_storage_class" in user_info: 472 | user_storage_class = user_info["default_storage_class"] 473 | else: 474 | user_storage_class = "" 475 | 476 | self._prometheus_metrics["user_metadata"].add_metric( 477 | [user, user_display_name, user_email, user_storage_class, self.store], 1 478 | ) 479 | 480 | if "stats" in user_info: 481 | self._prometheus_metrics["user_total_bytes"].add_metric( 482 | [user, self.store], user_info["stats"]["size_actual"] 483 | ) 484 | self._prometheus_metrics["user_total_objects"].add_metric( 485 | [user, self.store], user_info["stats"]["num_objects"] 486 | ) 487 | 488 | if "user_quota" in user_info: 489 | quota = user_info["user_quota"] 490 | self._prometheus_metrics["user_quota_enabled"].add_metric( 491 | [user, self.store], quota["enabled"] 492 | ) 493 | self._prometheus_metrics["user_quota_max_size"].add_metric( 494 | [user, self.store], quota["max_size"] 495 | ) 496 | self._prometheus_metrics["user_quota_max_size_bytes"].add_metric( 497 | [user, self.store], quota["max_size_kb"] * 1024 498 | ) 499 | self._prometheus_metrics["user_quota_max_objects"].add_metric( 500 | [user, self.store], quota["max_objects"] 501 | ) 502 | 503 | if "bucket_quota" in user_info: 504 | quota = user_info["bucket_quota"] 505 | self._prometheus_metrics["user_bucket_quota_enabled"].add_metric( 506 | [user, self.store], quota["enabled"] 507 | ) 508 | self._prometheus_metrics["user_bucket_quota_max_size"].add_metric( 509 | [user, self.store], quota["max_size"] 510 | ) 511 | self._prometheus_metrics["user_bucket_quota_max_size_bytes"].add_metric( 512 | [user, self.store], quota["max_size_kb"] * 1024 513 | ) 514 | self._prometheus_metrics["user_bucket_quota_max_objects"].add_metric( 515 | [user, self.store], quota["max_objects"] 516 | ) 517 | 518 | 519 | def parse_args(): 520 | parser = argparse.ArgumentParser( 521 | description="RADOSGW address and local binding port as well as \ 522 | S3 access_key and secret_key" 523 | ) 524 | parser.add_argument( 525 | "-H", 526 | "--host", 527 | required=False, 528 | help="Server URL for the RADOSGW api (example: http://objects.dreamhost.com/)", 529 | default=os.environ.get("RADOSGW_SERVER", "http://radosgw:80"), 530 | ) 531 | parser.add_argument( 532 | "-e", 533 | "--admin-entry", 534 | required=False, 535 | help="The entry point for an admin request URL [default is '%(default)s']", 536 | default=os.environ.get("ADMIN_ENTRY", "admin"), 537 | ) 538 | parser.add_argument( 539 | "-a", 540 | "--access-key", 541 | required=False, 542 | help="S3 access key", 543 | default=os.environ.get("ACCESS_KEY", "NA"), 544 | ) 545 | parser.add_argument( 546 | "-s", 547 | "--secret-key", 548 | required=False, 549 | help="S3 secret key", 550 | default=os.environ.get("SECRET_KEY", "NA"), 551 | ) 552 | parser.add_argument( 553 | "-k", 554 | "--insecure", 555 | help="Allow insecure server connections when using SSL", 556 | action="store_false", 557 | ) 558 | parser.add_argument( 559 | "-p", 560 | "--port", 561 | required=False, 562 | type=int, 563 | help="Port to listen", 564 | default=int(os.environ.get("VIRTUAL_PORT", "9242")), 565 | ) 566 | parser.add_argument( 567 | "-S", 568 | "--store", 569 | required=False, 570 | help="Store name added to metrics", 571 | default=os.environ.get("STORE", "us-east-1"), 572 | ) 573 | parser.add_argument( 574 | "-t", 575 | "--timeout", 576 | required=False, 577 | help="Timeout when getting metrics", 578 | default=os.environ.get("TIMEOUT", "60"), 579 | ) 580 | parser.add_argument( 581 | "-l", 582 | "--log-level", 583 | required=False, 584 | help="Provide logging level: DEBUG, INFO, WARNING, ERROR or CRITICAL", 585 | default=os.environ.get("LOG_LEVEL", "INFO"), 586 | ) 587 | parser.add_argument( 588 | "-T", 589 | "--tag-list", 590 | required=False, 591 | help="Add bucket tags as label (example: 'tag1,tag2,tag3') ", 592 | default=os.environ.get("TAG_LIST", ""), 593 | ) 594 | 595 | return parser.parse_args() 596 | 597 | 598 | def main(): 599 | try: 600 | args = parse_args() 601 | logging.basicConfig(level=args.log_level.upper()) 602 | REGISTRY.register( 603 | RADOSGWCollector( 604 | args.host, 605 | args.admin_entry, 606 | args.access_key, 607 | args.secret_key, 608 | args.store, 609 | args.insecure, 610 | args.timeout, 611 | args.tag_list, 612 | ) 613 | ) 614 | start_http_server(args.port, addr="::") 615 | logging.info(("Polling {0}. Serving at port: {1}".format(args.host, args.port))) 616 | while True: 617 | time.sleep(1) 618 | except KeyboardInterrupt: 619 | logging.info("\nInterrupted") 620 | exit(0) 621 | 622 | 623 | if __name__ == "__main__": 624 | main() 625 | --------------------------------------------------------------------------------