├── .dockerignore ├── .gitignore ├── .my-env ├── .vscode └── settings.json ├── Dockerfile ├── LICENSE ├── README.md ├── deployables └── assets │ ├── chargeback_handler │ └── config │ │ └── config_internal.yaml │ ├── grafana │ └── provisioning │ │ ├── dashboards │ │ ├── ccloud_chargeback.json │ │ └── dashboard.yml │ │ └── datasources │ │ └── datasource.yml │ └── prometheus_for_chargeback │ ├── collector.sh │ └── prometheus_chargeback.yml ├── docker-compose.yml ├── image.png ├── requirements.txt └── src ├── ccloud ├── ccloud_api │ ├── api_keys.py │ ├── clusters.py │ ├── connectors.py │ ├── environments.py │ ├── ksqldb_clusters.py │ ├── service_accounts.py │ └── user_accounts.py ├── connections.py └── org.py ├── data_processing └── data_handlers │ ├── billing_api_handler.py │ ├── ccloud_api_handler.py │ ├── chargeback_handler.py │ ├── prom_fetch_stats_handler.py │ ├── prom_metrics_api_handler.py │ └── types.py ├── helpers.py ├── internal_data_probe.py ├── main.py ├── prometheus_processing ├── custom_collector.py └── notifier.py ├── storage_mgmt.py └── workflow_runner.py /.dockerignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ 2 | **/.DS_Store 3 | **/*.pyc 4 | **/.vscode 5 | **/datastore 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/launch.json 2 | .ipynb_checkpoints 3 | output/ 4 | __pycache__/ 5 | .DS_Store 6 | testing/ 7 | src/to_be_deleted/ 8 | deployables/datastore/ 9 | .*-my-env 10 | docker-compose.local.yml -------------------------------------------------------------------------------- /.my-env: -------------------------------------------------------------------------------- 1 | # The Codebase when started will start at today - {CCLOUD_LOOKBACK_DAYS} for the chargeback calculation. 2 | # You can change this value as per your needs. For demo purposes - this can be turned to as short as you like. 3 | # Just remember that the chargeback calculation stops about 2 days before the current date to allow for CCloud 4 | # billing data to be finalized and Metrics data to be available. 5 | CCLOUD_LOOKBACK_DAYS=200 6 | 7 | CCLOUD_BILLING_API_KEY= 8 | # CCLOUD_BILLING_API_KEY=1234567890 9 | 10 | CCLOUD_BILLING_API_SECRET= 11 | # CCLOUD_BILLING_API_SECRET=asdfghjklpoiuytrewq1234567890 12 | 13 | #Do NOT change this if you will be using the provided docker-compose file 14 | CHARGEBACK_SERVER_URL=http://prometheus_for_chargeback:9090 15 | 16 | # If you do not have a Metrics API Prometheus instance, copy value from CHARGEBACK_SERVER_URL below. 17 | # This will ensure that Metrics API server is found , but no data is available. 18 | # Code will try to work around that. 19 | METRICS_API_SERVER_URL= 20 | # METRICS_API_SERVER_URL=http://localhost:9090 21 | 22 | METRICS_API_SERVER_ENABLE_AUTH= 23 | # METRICS_API_SERVER_ENABLE_AUTH=True 24 | 25 | #HTTPBasic is the only supported auth type as of now. Do NOT change this 26 | METRICS_API_SERVER_AUTH_TYPE=HTTPBasicAuth 27 | 28 | METRICS_API_SERVER_AUTH_USERNAME= 29 | # METRICS_API_SERVER_AUTH_USERNAME=admin 30 | 31 | METRICS_API_SERVER_AUTH_PASSWORD= 32 | # METRICS_API_SERVER_AUTH_PASSWORD=password 33 | 34 | # Log Level for the codebase 35 | # Available Options - ERROR, WARNING, INFO, DEBUG 36 | LOG_LEVEL="INFO" 37 | # Enable method breadcrumbs - Allows you to see which method was the last one executed before an error 38 | # Available Options - True, False 39 | ENABLE_METHOD_BREADCRUMBS=False -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "addl", 4 | "basepath", 5 | "CCME", 6 | "CHARGEBACK", 7 | "CKUS", 8 | "cname", 9 | "CSUS", 10 | "dataframe", 11 | "dataframes", 12 | "dateutil", 13 | "inplace", 14 | "isin", 15 | "isoparse", 16 | "itertuples", 17 | "Ksqldb", 18 | "ksqldbcm", 19 | "kwagrs", 20 | "levelname", 21 | "lksql", 22 | "loglevel", 23 | "lookback", 24 | "metricsapi", 25 | "mgmt", 26 | "millis", 27 | "pname", 28 | "psutil", 29 | "ptype", 30 | "pydatetime", 31 | "Threadable", 32 | "timerange", 33 | "timeslot" 34 | ], 35 | "python.analysis.typeCheckingMode": "off" 36 | } 37 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | 3 | WORKDIR /app 4 | 5 | COPY requirements.txt . 6 | COPY src/ ./ 7 | COPY deployables/assets/chargeback_handler/config/config_internal.yaml ./config/config.yaml 8 | RUN pip install --no-cache-dir -r ./requirements.txt 9 | 10 | ENTRYPOINT [ "python", "main.py" ] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Abhishek Walia 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Confluent Cloud Chargeback Helper/Calculator 2 | 3 | Confluent Cloud has a lot of objects and a very detailed API model. There are 2 core API's available with Confluent Cloud: 4 | 5 | 1. [Confluent Cloud Core Objects API](https://docs.confluent.io/cloud/current/api.html) : This API provides the details of current objects present in the Confluent ecosystem. 6 | 2. [Confluent Cloud Metrics API](https://api.telemetry.confluent.cloud/docs/descriptors/datasets/cloud) : This API provides us with the plethora of Metrics available from various systems in Confluent Cloud. 7 | 8 | The billing data is also available out of the box with Confluent Cloud API and is pretty detailed by itself. So what does this code do in that case ? This codebase aggregates data from the various API endpoints that are available with Confluent Cloud, co-relates them and produces meaningful detailed datasets. 9 | 10 | The Billing API data only has data which is granular upto 1 day at a time. These datasets are divided into per hour chunks by this code, co-related to data for same interval from Metrics API (indirectly - more on that in sometime). This co-related data is identified to the the matching Objects (Service Accounts, users, API Keys, kafka Clusters etc) and a enriched dataset is generated. 11 | 12 | This enriched dataset is hosted as Open Metrics compatible format via an endpoint so that systems that can feed off the that format can scrape the data from there. The code senses that a scrape has been performed and refreshes the exposed data for the next hour. It exposes data on a per hour basis to ensure optimized scrape. 13 | 14 | Eventually the data looks something like this in Grafana Dashboard: 15 | 16 | ![](image.png) 17 | 18 | Before getting into more details about the architecture, let's go through a quickstart first. 19 | 20 | ## Quickstart 21 | 22 | ### Pre-requisites 23 | 24 | 1. Ensure that you have a Prometheus Instance that is already gathering Metrics API dataset. This will be needed later while configuring the chargeback handler. 25 | 26 | 2. Create a new Service Account. We will be using this account to create an API Key which will be used in the Chargeback handler. 27 | 28 | ``` 29 | confluent login 30 | confluent iam sa create chargeback_handler --description "Chargeback handler user." 31 | ``` 32 | 33 | This will provide you with the Service Account ID which has the format `sa-*******`. Note this ID as you will need it for the next set of commands. 34 | 35 | 3. Assign this Service account the permissions below 36 | 37 | ``` 38 | confluent iam rbac role-binding create --principal User: --role MetricsViewer 39 | confluent iam rbac role-binding create --principal User: --role OrganizationAdmin 40 | ``` 41 | 42 | MetricsViewer permission is required for this account to be able to pull the Objects API data. 43 | [OrganizationAdmin permission](https://docs.confluent.io/cloud/current/billing/overview.html#retrieve-invoices-and-costs) is required for this account to be able to pull the Billing API data. 44 | 45 | 4. Create an API Key associated with this account 46 | ``` 47 | confluent api-key create --resource cloud --service-account 48 | ``` 49 | 50 | ### Chargeback Handler Configuration 51 | 52 | 1. Clone the chargeback repo. 53 | 54 | ``` 55 | git clone https://github.com/waliaabhishek/ccloud-chargeback-helper.git 56 | ``` 57 | 58 | 2. Open the repo in your preferred Code editor as we'll need add some properties. I will be using vscode as that is my preferred Code Editor 59 | 60 | ``` 61 | code ccloud-chargeback-helper 62 | ``` 63 | 64 | 3. Open the file called .my-env in the root folder and change all the necessary values. 65 | 66 | > **`Note`** 67 | > If you dont have a Metrics API server, Copy Paste the URL from CHARGEBACK_SERVER_URL variable and change the METRICS_API_SERVER_ENABLE_AUTH value to `False`. Auth values dont matter if this switch is turned off. 68 | 69 | > **`Warning`** 70 | > If you used the Chargeback Server URL for the Metrics API server, the code will function but will be approximate and may assign cost to non Service Account/ User Account entities. This is because the Metrics API data is necessary to compute certain types of chargebacks. If it not available, the code turns into a best effort path for calculating chargebacks instead of dropping those values. 71 | 72 | 4. Open a terminal window and go the directory where the codebase is checked out. 73 | 5. Run the following command 74 | 75 | ``` 76 | docker compose --env-file .my-env up -d --remove-orphans 77 | ``` 78 | 79 | > **`Note`** 80 | > If you are an advanced user who needs docker-copose Overrides as well; the following command will work 81 | 82 | ``` 83 | docker compose --env-file .my-env -f docker-compose.yml -f docker-compose.local.yml up -d --remove-orphans 84 | ``` 85 | 86 | 6. The chargeback code should be now running. 87 | 88 | 7. These are the following details that you should know 89 | 90 | 1. Grafana URL: `http://localhost:3000` 91 | 2. Grafana Username `admin` 92 | 3. Grafana Password `password` 93 | 4. Chargeback Prometheus URL `http://localhost:9091` (No user/pass required) 94 | 95 | 8. Enjoy! 96 | 97 | # Architecture 98 | 99 | Details coming soon !! 100 | -------------------------------------------------------------------------------- /deployables/assets/chargeback_handler/config/config_internal.yaml: -------------------------------------------------------------------------------- 1 | config: 2 | system: 3 | days_in_memory: 7 4 | output_dir_name: "output" 5 | log_level: env::LOG_LEVEL 6 | enable_method_breadcrumbs: env:ENABLE_METHOD_BREADCRUMBS 7 | org_details: 8 | - id: CCloud Org 1 9 | ccloud_details: 10 | ccloud_api: 11 | api_key: env::CCLOUD_BILLING_API_KEY 12 | api_secret: env::CCLOUD_BILLING_API_SECRET 13 | billing_api: 14 | api_key: env::CCLOUD_BILLING_API_KEY 15 | api_secret: env::CCLOUD_BILLING_API_SECRET 16 | metrics_api: 17 | api_key: env::CCLOUD_BILLING_API_KEY 18 | api_secret: env::CCLOUD_BILLING_API_SECRET 19 | total_lookback_days: env::CCLOUD_LOOKBACK_DAYS 20 | prometheus_details: 21 | metrics_api_datastore: 22 | prometheus_url: env::METRICS_API_SERVER_URL 23 | auth: 24 | enable_auth: env::METRICS_API_SERVER_ENABLE_AUTH 25 | auth_type: env::METRICS_API_SERVER_AUTH_TYPE 26 | auth_args: 27 | username: env::METRICS_API_SERVER_AUTH_USERNAME 28 | password: env::METRICS_API_SERVER_AUTH_PASSWORD 29 | connection_params: 30 | verify: False 31 | chargeback_datastore: 32 | prometheus_url: env::CHARGEBACK_SERVER_URL 33 | -------------------------------------------------------------------------------- /deployables/assets/grafana/provisioning/dashboards/ccloud_chargeback.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "target": { 12 | "limit": 100, 13 | "matchAny": false, 14 | "tags": [], 15 | "type": "dashboard" 16 | }, 17 | "type": "dashboard" 18 | } 19 | ] 20 | }, 21 | "editable": true, 22 | "gnetId": null, 23 | "graphTooltip": 0, 24 | "id": 1, 25 | "links": [], 26 | "panels": [ 27 | { 28 | "datasource": null, 29 | "fieldConfig": { 30 | "defaults": { 31 | "color": { 32 | "mode": "palette-classic" 33 | }, 34 | "custom": { 35 | "axisLabel": "", 36 | "axisPlacement": "hidden", 37 | "barAlignment": 0, 38 | "drawStyle": "line", 39 | "fillOpacity": 0, 40 | "gradientMode": "none", 41 | "hideFrom": { 42 | "legend": false, 43 | "tooltip": false, 44 | "viz": false 45 | }, 46 | "lineInterpolation": "linear", 47 | "lineWidth": 1, 48 | "pointSize": 5, 49 | "scaleDistribution": { 50 | "type": "linear" 51 | }, 52 | "showPoints": "always", 53 | "spanNulls": false, 54 | "stacking": { 55 | "group": "A", 56 | "mode": "none" 57 | }, 58 | "thresholdsStyle": { 59 | "mode": "off" 60 | } 61 | }, 62 | "mappings": [ 63 | { 64 | "options": { 65 | "1": { 66 | "index": 0, 67 | "text": "Data Available" 68 | } 69 | }, 70 | "type": "value" 71 | } 72 | ], 73 | "thresholds": { 74 | "mode": "absolute", 75 | "steps": [ 76 | { 77 | "color": "green", 78 | "value": null 79 | } 80 | ] 81 | } 82 | }, 83 | "overrides": [] 84 | }, 85 | "gridPos": { 86 | "h": 5, 87 | "w": 11, 88 | "x": 0, 89 | "y": 0 90 | }, 91 | "id": 19, 92 | "options": { 93 | "legend": { 94 | "calcs": [], 95 | "displayMode": "list", 96 | "placement": "bottom" 97 | }, 98 | "tooltip": { 99 | "mode": "single" 100 | } 101 | }, 102 | "targets": [ 103 | { 104 | "exemplar": false, 105 | "expr": "confluent_cloud_custom_scrape_status{object_type=\"billing_chargeback\"}", 106 | "instant": false, 107 | "interval": "3600", 108 | "legendFormat": "Billing Chargeback Status", 109 | "refId": "A" 110 | } 111 | ], 112 | "title": "Chargeback Data Available until", 113 | "type": "timeseries" 114 | }, 115 | { 116 | "datasource": null, 117 | "fieldConfig": { 118 | "defaults": { 119 | "color": { 120 | "mode": "thresholds" 121 | }, 122 | "mappings": [], 123 | "thresholds": { 124 | "mode": "absolute", 125 | "steps": [ 126 | { 127 | "color": "green", 128 | "value": null 129 | } 130 | ] 131 | }, 132 | "unit": "currencyUSD" 133 | }, 134 | "overrides": [] 135 | }, 136 | "gridPos": { 137 | "h": 5, 138 | "w": 13, 139 | "x": 11, 140 | "y": 0 141 | }, 142 | "id": 14, 143 | "interval": "3600", 144 | "options": { 145 | "colorMode": "value", 146 | "graphMode": "area", 147 | "justifyMode": "auto", 148 | "orientation": "auto", 149 | "reduceOptions": { 150 | "calcs": [ 151 | "sum" 152 | ], 153 | "fields": "", 154 | "values": false 155 | }, 156 | "text": {}, 157 | "textMode": "auto" 158 | }, 159 | "pluginVersion": "8.1.3", 160 | "targets": [ 161 | { 162 | "exemplar": true, 163 | "expr": "sum (confluent_cloud_chargeback_details)", 164 | "interval": "3600", 165 | "legendFormat": "Total Cost", 166 | "refId": "A" 167 | }, 168 | { 169 | "exemplar": true, 170 | "expr": "sum by (cost_type) (confluent_cloud_chargeback_details{cost_type=~\"UsageCost\"})", 171 | "hide": false, 172 | "interval": "3600", 173 | "legendFormat": "Usage Cost", 174 | "refId": "B" 175 | }, 176 | { 177 | "exemplar": true, 178 | "expr": "sum by (cost_type) (confluent_cloud_chargeback_details{cost_type=~\"SharedCost\"})", 179 | "hide": false, 180 | "interval": "3600", 181 | "legendFormat": "Shared Cost", 182 | "refId": "C" 183 | } 184 | ], 185 | "timeFrom": null, 186 | "timeShift": null, 187 | "title": "Overall Cost Breakdown", 188 | "type": "stat" 189 | }, 190 | { 191 | "collapsed": false, 192 | "datasource": null, 193 | "gridPos": { 194 | "h": 1, 195 | "w": 24, 196 | "x": 0, 197 | "y": 5 198 | }, 199 | "id": 31, 200 | "panels": [], 201 | "title": "Cost breakdown summary", 202 | "type": "row" 203 | }, 204 | { 205 | "datasource": null, 206 | "fieldConfig": { 207 | "defaults": { 208 | "color": { 209 | "mode": "palette-classic" 210 | }, 211 | "custom": { 212 | "hideFrom": { 213 | "legend": false, 214 | "tooltip": false, 215 | "viz": false 216 | } 217 | }, 218 | "mappings": [], 219 | "unit": "currencyUSD" 220 | }, 221 | "overrides": [] 222 | }, 223 | "gridPos": { 224 | "h": 13, 225 | "w": 6, 226 | "x": 0, 227 | "y": 6 228 | }, 229 | "id": 22, 230 | "options": { 231 | "displayLabels": [ 232 | "name" 233 | ], 234 | "legend": { 235 | "displayMode": "table", 236 | "placement": "bottom", 237 | "values": [ 238 | "value" 239 | ] 240 | }, 241 | "pieType": "pie", 242 | "reduceOptions": { 243 | "calcs": [ 244 | "sum" 245 | ], 246 | "fields": "", 247 | "values": false 248 | }, 249 | "tooltip": { 250 | "mode": "single" 251 | } 252 | }, 253 | "pluginVersion": "8.1.3", 254 | "repeat": null, 255 | "targets": [ 256 | { 257 | "exemplar": true, 258 | "expr": "sum by (env_id) (confluent_cloud_billing_details)", 259 | "hide": false, 260 | "interval": "3600", 261 | "legendFormat": "{{env_id}}", 262 | "refId": "A" 263 | } 264 | ], 265 | "title": "Cost breakdown per Environment", 266 | "type": "piechart" 267 | }, 268 | { 269 | "datasource": null, 270 | "fieldConfig": { 271 | "defaults": { 272 | "color": { 273 | "mode": "palette-classic" 274 | }, 275 | "custom": { 276 | "hideFrom": { 277 | "legend": false, 278 | "tooltip": false, 279 | "viz": false 280 | } 281 | }, 282 | "mappings": [], 283 | "unit": "currencyUSD" 284 | }, 285 | "overrides": [] 286 | }, 287 | "gridPos": { 288 | "h": 13, 289 | "w": 6, 290 | "x": 6, 291 | "y": 6 292 | }, 293 | "id": 38, 294 | "options": { 295 | "displayLabels": [ 296 | "name" 297 | ], 298 | "legend": { 299 | "displayMode": "table", 300 | "placement": "bottom", 301 | "values": [ 302 | "value" 303 | ] 304 | }, 305 | "pieType": "pie", 306 | "reduceOptions": { 307 | "calcs": [ 308 | "sum" 309 | ], 310 | "fields": "", 311 | "values": false 312 | }, 313 | "tooltip": { 314 | "mode": "single" 315 | } 316 | }, 317 | "pluginVersion": "8.1.3", 318 | "targets": [ 319 | { 320 | "exemplar": true, 321 | "expr": "sum by (kafka_cluster_id) (confluent_cloud_billing_details)", 322 | "hide": false, 323 | "interval": "3600", 324 | "legendFormat": "{{kafka_cluster_id}}", 325 | "refId": "A" 326 | } 327 | ], 328 | "title": "Cost breakdown per Kafka Cluster", 329 | "type": "piechart" 330 | }, 331 | { 332 | "datasource": null, 333 | "fieldConfig": { 334 | "defaults": { 335 | "color": { 336 | "mode": "palette-classic" 337 | }, 338 | "custom": { 339 | "hideFrom": { 340 | "legend": false, 341 | "tooltip": false, 342 | "viz": false 343 | } 344 | }, 345 | "mappings": [], 346 | "unit": "currencyUSD" 347 | }, 348 | "overrides": [] 349 | }, 350 | "gridPos": { 351 | "h": 13, 352 | "w": 6, 353 | "x": 12, 354 | "y": 6 355 | }, 356 | "id": 40, 357 | "options": { 358 | "displayLabels": [ 359 | "name" 360 | ], 361 | "legend": { 362 | "displayMode": "table", 363 | "placement": "bottom", 364 | "values": [ 365 | "value" 366 | ] 367 | }, 368 | "pieType": "pie", 369 | "reduceOptions": { 370 | "calcs": [ 371 | "sum" 372 | ], 373 | "fields": "", 374 | "values": false 375 | }, 376 | "tooltip": { 377 | "mode": "single" 378 | } 379 | }, 380 | "pluginVersion": "8.1.3", 381 | "targets": [ 382 | { 383 | "exemplar": true, 384 | "expr": "sum by (product_name) (confluent_cloud_billing_details)", 385 | "hide": false, 386 | "interval": "3600", 387 | "legendFormat": "{{product_name}}", 388 | "refId": "A" 389 | } 390 | ], 391 | "title": "Cost breakdown per Product Group", 392 | "type": "piechart" 393 | }, 394 | { 395 | "datasource": null, 396 | "fieldConfig": { 397 | "defaults": { 398 | "color": { 399 | "mode": "palette-classic" 400 | }, 401 | "custom": { 402 | "hideFrom": { 403 | "legend": false, 404 | "tooltip": false, 405 | "viz": false 406 | } 407 | }, 408 | "mappings": [], 409 | "unit": "currencyUSD" 410 | }, 411 | "overrides": [] 412 | }, 413 | "gridPos": { 414 | "h": 13, 415 | "w": 6, 416 | "x": 18, 417 | "y": 6 418 | }, 419 | "id": 39, 420 | "options": { 421 | "displayLabels": [ 422 | "name" 423 | ], 424 | "legend": { 425 | "displayMode": "table", 426 | "placement": "bottom", 427 | "values": [ 428 | "value" 429 | ] 430 | }, 431 | "pieType": "pie", 432 | "reduceOptions": { 433 | "calcs": [ 434 | "sum" 435 | ], 436 | "fields": "", 437 | "values": false 438 | }, 439 | "tooltip": { 440 | "mode": "single" 441 | } 442 | }, 443 | "pluginVersion": "8.1.3", 444 | "targets": [ 445 | { 446 | "exemplar": true, 447 | "expr": "sum by (product_line_type) (confluent_cloud_billing_details)", 448 | "hide": false, 449 | "interval": "3600", 450 | "legendFormat": "{{product_line_type}}", 451 | "refId": "A" 452 | } 453 | ], 454 | "title": "Cost breakdown per Product Line", 455 | "type": "piechart" 456 | }, 457 | { 458 | "datasource": null, 459 | "fieldConfig": { 460 | "defaults": { 461 | "color": { 462 | "mode": "thresholds" 463 | }, 464 | "mappings": [], 465 | "thresholds": { 466 | "mode": "absolute", 467 | "steps": [ 468 | { 469 | "color": "green", 470 | "value": null 471 | } 472 | ] 473 | }, 474 | "unit": "currencyUSD" 475 | }, 476 | "overrides": [] 477 | }, 478 | "gridPos": { 479 | "h": 12, 480 | "w": 24, 481 | "x": 0, 482 | "y": 19 483 | }, 484 | "id": 17, 485 | "options": { 486 | "orientation": "auto", 487 | "reduceOptions": { 488 | "calcs": [ 489 | "sum" 490 | ], 491 | "fields": "", 492 | "values": false 493 | }, 494 | "showThresholdLabels": false, 495 | "showThresholdMarkers": false, 496 | "text": {} 497 | }, 498 | "pluginVersion": "8.1.3", 499 | "targets": [ 500 | { 501 | "exemplar": true, 502 | "expr": "sum (confluent_cloud_chargeback_details)", 503 | "hide": false, 504 | "interval": "3600", 505 | "legendFormat": "Total Chargeback", 506 | "refId": "C" 507 | }, 508 | { 509 | "exemplar": true, 510 | "expr": "topk(15, sum(confluent_cloud_chargeback_details) by (principal))", 511 | "hide": true, 512 | "interval": "3600", 513 | "legendFormat": "{{principal}}", 514 | "refId": "A" 515 | }, 516 | { 517 | "exemplar": true, 518 | "expr": "sum by (principal) (confluent_cloud_chargeback_details)", 519 | "hide": false, 520 | "interval": "3600", 521 | "legendFormat": "{{principal}}", 522 | "refId": "B" 523 | } 524 | ], 525 | "timeFrom": null, 526 | "timeShift": null, 527 | "title": "Cost breakdown by Principal", 528 | "type": "gauge" 529 | }, 530 | { 531 | "collapsed": true, 532 | "datasource": null, 533 | "gridPos": { 534 | "h": 1, 535 | "w": 24, 536 | "x": 0, 537 | "y": 31 538 | }, 539 | "id": 29, 540 | "panels": [ 541 | { 542 | "datasource": null, 543 | "fieldConfig": { 544 | "defaults": { 545 | "color": { 546 | "mode": "thresholds" 547 | }, 548 | "mappings": [], 549 | "thresholds": { 550 | "mode": "absolute", 551 | "steps": [ 552 | { 553 | "color": "green", 554 | "value": null 555 | } 556 | ] 557 | }, 558 | "unit": "currencyUSD" 559 | }, 560 | "overrides": [] 561 | }, 562 | "gridPos": { 563 | "h": 12, 564 | "w": 24, 565 | "x": 0, 566 | "y": 32 567 | }, 568 | "id": 24, 569 | "options": { 570 | "colorMode": "value", 571 | "graphMode": "area", 572 | "justifyMode": "auto", 573 | "orientation": "auto", 574 | "reduceOptions": { 575 | "calcs": [ 576 | "sum" 577 | ], 578 | "fields": "", 579 | "values": false 580 | }, 581 | "text": {}, 582 | "textMode": "auto" 583 | }, 584 | "pluginVersion": "8.1.3", 585 | "targets": [ 586 | { 587 | "exemplar": true, 588 | "expr": "sum (confluent_cloud_billing_details)", 589 | "interval": "3600", 590 | "legendFormat": "Total Cost", 591 | "refId": "A" 592 | }, 593 | { 594 | "exemplar": true, 595 | "expr": "sum by (env_id) (confluent_cloud_billing_details)", 596 | "hide": false, 597 | "interval": "3600", 598 | "legendFormat": "{{env_id}}", 599 | "refId": "B" 600 | } 601 | ], 602 | "title": "Cost Split per Environment via Billing API", 603 | "type": "stat" 604 | } 605 | ], 606 | "title": "Cost breakdown per Environment (Details)", 607 | "type": "row" 608 | }, 609 | { 610 | "collapsed": true, 611 | "datasource": null, 612 | "gridPos": { 613 | "h": 1, 614 | "w": 24, 615 | "x": 0, 616 | "y": 32 617 | }, 618 | "id": 33, 619 | "panels": [ 620 | { 621 | "datasource": null, 622 | "fieldConfig": { 623 | "defaults": { 624 | "color": { 625 | "mode": "thresholds" 626 | }, 627 | "mappings": [], 628 | "thresholds": { 629 | "mode": "absolute", 630 | "steps": [ 631 | { 632 | "color": "green", 633 | "value": null 634 | } 635 | ] 636 | }, 637 | "unit": "currencyUSD" 638 | }, 639 | "overrides": [] 640 | }, 641 | "gridPos": { 642 | "h": 12, 643 | "w": 24, 644 | "x": 0, 645 | "y": 45 646 | }, 647 | "id": 25, 648 | "options": { 649 | "colorMode": "value", 650 | "graphMode": "area", 651 | "justifyMode": "auto", 652 | "orientation": "auto", 653 | "reduceOptions": { 654 | "calcs": [ 655 | "sum" 656 | ], 657 | "fields": "", 658 | "values": false 659 | }, 660 | "text": {}, 661 | "textMode": "auto" 662 | }, 663 | "pluginVersion": "8.1.3", 664 | "targets": [ 665 | { 666 | "exemplar": true, 667 | "expr": "sum (confluent_cloud_billing_details)", 668 | "hide": false, 669 | "interval": "3600", 670 | "legendFormat": "Total Cost", 671 | "refId": "B" 672 | }, 673 | { 674 | "exemplar": true, 675 | "expr": "sum by (kafka_cluster_id) (confluent_cloud_billing_details)", 676 | "interval": "3600", 677 | "legendFormat": "{{kafka_cluster_id}}", 678 | "refId": "A" 679 | } 680 | ], 681 | "title": "Cost Split per Kafka Cluster via Billing API", 682 | "type": "stat" 683 | } 684 | ], 685 | "title": "Cost breakdown Per Kafka Cluster (Details)", 686 | "type": "row" 687 | }, 688 | { 689 | "collapsed": true, 690 | "datasource": null, 691 | "gridPos": { 692 | "h": 1, 693 | "w": 24, 694 | "x": 0, 695 | "y": 33 696 | }, 697 | "id": 35, 698 | "panels": [ 699 | { 700 | "datasource": null, 701 | "fieldConfig": { 702 | "defaults": { 703 | "color": { 704 | "mode": "thresholds" 705 | }, 706 | "mappings": [], 707 | "thresholds": { 708 | "mode": "absolute", 709 | "steps": [ 710 | { 711 | "color": "green", 712 | "value": null 713 | } 714 | ] 715 | }, 716 | "unit": "currencyUSD" 717 | }, 718 | "overrides": [] 719 | }, 720 | "gridPos": { 721 | "h": 13, 722 | "w": 24, 723 | "x": 0, 724 | "y": 58 725 | }, 726 | "id": 26, 727 | "options": { 728 | "colorMode": "value", 729 | "graphMode": "area", 730 | "justifyMode": "auto", 731 | "orientation": "auto", 732 | "reduceOptions": { 733 | "calcs": [ 734 | "sum" 735 | ], 736 | "fields": "", 737 | "values": false 738 | }, 739 | "text": {}, 740 | "textMode": "auto" 741 | }, 742 | "pluginVersion": "8.1.3", 743 | "targets": [ 744 | { 745 | "exemplar": true, 746 | "expr": "sum (confluent_cloud_billing_details)", 747 | "interval": "3600", 748 | "legendFormat": "Total Cost", 749 | "refId": "A" 750 | }, 751 | { 752 | "exemplar": true, 753 | "expr": "sum by (resource_id) (confluent_cloud_billing_details)", 754 | "hide": false, 755 | "interval": "3600", 756 | "legendFormat": "{{resource_id}}", 757 | "refId": "B" 758 | } 759 | ], 760 | "title": "Cost Split per Resource via Billing API", 761 | "type": "stat" 762 | } 763 | ], 764 | "title": "Cost breakdown Per Resource (Details)", 765 | "type": "row" 766 | }, 767 | { 768 | "collapsed": true, 769 | "datasource": null, 770 | "gridPos": { 771 | "h": 1, 772 | "w": 24, 773 | "x": 0, 774 | "y": 34 775 | }, 776 | "id": 37, 777 | "panels": [ 778 | { 779 | "datasource": null, 780 | "fieldConfig": { 781 | "defaults": { 782 | "color": { 783 | "mode": "thresholds" 784 | }, 785 | "mappings": [], 786 | "thresholds": { 787 | "mode": "absolute", 788 | "steps": [ 789 | { 790 | "color": "green", 791 | "value": null 792 | } 793 | ] 794 | }, 795 | "unit": "currencyUSD" 796 | }, 797 | "overrides": [] 798 | }, 799 | "gridPos": { 800 | "h": 13, 801 | "w": 24, 802 | "x": 0, 803 | "y": 72 804 | }, 805 | "id": 27, 806 | "options": { 807 | "colorMode": "value", 808 | "graphMode": "area", 809 | "justifyMode": "auto", 810 | "orientation": "auto", 811 | "reduceOptions": { 812 | "calcs": [ 813 | "sum" 814 | ], 815 | "fields": "", 816 | "values": false 817 | }, 818 | "text": {}, 819 | "textMode": "auto" 820 | }, 821 | "pluginVersion": "8.1.3", 822 | "targets": [ 823 | { 824 | "exemplar": true, 825 | "expr": "sum (confluent_cloud_billing_details)", 826 | "hide": false, 827 | "interval": "3600", 828 | "legendFormat": "Total Cost", 829 | "refId": "B" 830 | }, 831 | { 832 | "exemplar": true, 833 | "expr": "sum by (product_line_type) (confluent_cloud_billing_details)", 834 | "interval": "3600", 835 | "legendFormat": "{{product_line_type}}", 836 | "refId": "A" 837 | } 838 | ], 839 | "title": "Cost Split per Product Line via Billing API", 840 | "type": "stat" 841 | } 842 | ], 843 | "title": "Cost Split per Product Line (Details)", 844 | "type": "row" 845 | }, 846 | { 847 | "collapsed": true, 848 | "datasource": null, 849 | "gridPos": { 850 | "h": 1, 851 | "w": 24, 852 | "x": 0, 853 | "y": 35 854 | }, 855 | "id": 42, 856 | "panels": [ 857 | { 858 | "datasource": null, 859 | "fieldConfig": { 860 | "defaults": { 861 | "color": { 862 | "mode": "thresholds" 863 | }, 864 | "mappings": [], 865 | "thresholds": { 866 | "mode": "absolute", 867 | "steps": [ 868 | { 869 | "color": "green", 870 | "value": null 871 | }, 872 | { 873 | "color": "red", 874 | "value": 80 875 | } 876 | ] 877 | } 878 | }, 879 | "overrides": [] 880 | }, 881 | "gridPos": { 882 | "h": 6, 883 | "w": 3, 884 | "x": 1, 885 | "y": 43 886 | }, 887 | "id": 4, 888 | "options": { 889 | "colorMode": "value", 890 | "graphMode": "area", 891 | "justifyMode": "auto", 892 | "orientation": "auto", 893 | "reduceOptions": { 894 | "calcs": [ 895 | "distinctCount" 896 | ], 897 | "fields": "", 898 | "values": false 899 | }, 900 | "text": {}, 901 | "textMode": "auto" 902 | }, 903 | "pluginVersion": "8.1.3", 904 | "targets": [ 905 | { 906 | "exemplar": true, 907 | "expr": "count(confluent_cloud_api_key)", 908 | "interval": "3600", 909 | "legendFormat": "", 910 | "refId": "A" 911 | } 912 | ], 913 | "title": "API Keys Count", 914 | "type": "stat" 915 | }, 916 | { 917 | "datasource": null, 918 | "fieldConfig": { 919 | "defaults": { 920 | "color": { 921 | "mode": "thresholds" 922 | }, 923 | "mappings": [], 924 | "thresholds": { 925 | "mode": "absolute", 926 | "steps": [ 927 | { 928 | "color": "green", 929 | "value": null 930 | }, 931 | { 932 | "color": "red", 933 | "value": 80 934 | } 935 | ] 936 | } 937 | }, 938 | "overrides": [] 939 | }, 940 | "gridPos": { 941 | "h": 6, 942 | "w": 3, 943 | "x": 4, 944 | "y": 43 945 | }, 946 | "id": 5, 947 | "options": { 948 | "colorMode": "value", 949 | "graphMode": "area", 950 | "justifyMode": "auto", 951 | "orientation": "auto", 952 | "reduceOptions": { 953 | "calcs": [ 954 | "distinctCount" 955 | ], 956 | "fields": "", 957 | "values": false 958 | }, 959 | "text": {}, 960 | "textMode": "auto" 961 | }, 962 | "pluginVersion": "8.1.3", 963 | "targets": [ 964 | { 965 | "exemplar": true, 966 | "expr": "count(confluent_cloud_sa)", 967 | "interval": "3600", 968 | "legendFormat": "", 969 | "refId": "A" 970 | } 971 | ], 972 | "title": "Service Accounts Count", 973 | "type": "stat" 974 | }, 975 | { 976 | "datasource": null, 977 | "fieldConfig": { 978 | "defaults": { 979 | "color": { 980 | "mode": "thresholds" 981 | }, 982 | "mappings": [], 983 | "thresholds": { 984 | "mode": "absolute", 985 | "steps": [ 986 | { 987 | "color": "green", 988 | "value": null 989 | }, 990 | { 991 | "color": "red", 992 | "value": 80 993 | } 994 | ] 995 | } 996 | }, 997 | "overrides": [] 998 | }, 999 | "gridPos": { 1000 | "h": 6, 1001 | "w": 3, 1002 | "x": 7, 1003 | "y": 43 1004 | }, 1005 | "id": 6, 1006 | "options": { 1007 | "colorMode": "value", 1008 | "graphMode": "area", 1009 | "justifyMode": "auto", 1010 | "orientation": "auto", 1011 | "reduceOptions": { 1012 | "calcs": [ 1013 | "distinctCount" 1014 | ], 1015 | "fields": "", 1016 | "values": false 1017 | }, 1018 | "text": {}, 1019 | "textMode": "auto" 1020 | }, 1021 | "pluginVersion": "8.1.3", 1022 | "targets": [ 1023 | { 1024 | "exemplar": true, 1025 | "expr": "count(confluent_cloud_user)", 1026 | "interval": "3600", 1027 | "legendFormat": "", 1028 | "refId": "A" 1029 | } 1030 | ], 1031 | "title": "User Accounts Count", 1032 | "type": "stat" 1033 | }, 1034 | { 1035 | "datasource": null, 1036 | "fieldConfig": { 1037 | "defaults": { 1038 | "color": { 1039 | "mode": "thresholds" 1040 | }, 1041 | "mappings": [], 1042 | "thresholds": { 1043 | "mode": "absolute", 1044 | "steps": [ 1045 | { 1046 | "color": "green", 1047 | "value": null 1048 | }, 1049 | { 1050 | "color": "red", 1051 | "value": 80 1052 | } 1053 | ] 1054 | } 1055 | }, 1056 | "overrides": [] 1057 | }, 1058 | "gridPos": { 1059 | "h": 6, 1060 | "w": 3, 1061 | "x": 10, 1062 | "y": 43 1063 | }, 1064 | "id": 7, 1065 | "options": { 1066 | "colorMode": "value", 1067 | "graphMode": "area", 1068 | "justifyMode": "auto", 1069 | "orientation": "auto", 1070 | "reduceOptions": { 1071 | "calcs": [ 1072 | "distinctCount" 1073 | ], 1074 | "fields": "", 1075 | "values": false 1076 | }, 1077 | "text": {}, 1078 | "textMode": "auto" 1079 | }, 1080 | "pluginVersion": "8.1.3", 1081 | "targets": [ 1082 | { 1083 | "exemplar": true, 1084 | "expr": "count (confluent_cloud_kafka_cluster)", 1085 | "interval": "3600", 1086 | "legendFormat": "", 1087 | "refId": "A" 1088 | } 1089 | ], 1090 | "title": "Kafka Cluster Count", 1091 | "type": "stat" 1092 | }, 1093 | { 1094 | "datasource": null, 1095 | "fieldConfig": { 1096 | "defaults": { 1097 | "color": { 1098 | "mode": "thresholds" 1099 | }, 1100 | "mappings": [], 1101 | "thresholds": { 1102 | "mode": "absolute", 1103 | "steps": [ 1104 | { 1105 | "color": "green", 1106 | "value": null 1107 | }, 1108 | { 1109 | "color": "red", 1110 | "value": 80 1111 | } 1112 | ] 1113 | } 1114 | }, 1115 | "overrides": [] 1116 | }, 1117 | "gridPos": { 1118 | "h": 6, 1119 | "w": 3, 1120 | "x": 13, 1121 | "y": 43 1122 | }, 1123 | "id": 8, 1124 | "options": { 1125 | "colorMode": "value", 1126 | "graphMode": "area", 1127 | "justifyMode": "auto", 1128 | "orientation": "auto", 1129 | "reduceOptions": { 1130 | "calcs": [ 1131 | "distinctCount" 1132 | ], 1133 | "fields": "", 1134 | "values": false 1135 | }, 1136 | "text": {}, 1137 | "textMode": "auto" 1138 | }, 1139 | "pluginVersion": "8.1.3", 1140 | "targets": [ 1141 | { 1142 | "exemplar": true, 1143 | "expr": "count (confluent_cloud_environment)", 1144 | "interval": "", 1145 | "legendFormat": "", 1146 | "refId": "A" 1147 | } 1148 | ], 1149 | "title": "Environment Count", 1150 | "type": "stat" 1151 | }, 1152 | { 1153 | "datasource": null, 1154 | "fieldConfig": { 1155 | "defaults": { 1156 | "color": { 1157 | "mode": "thresholds" 1158 | }, 1159 | "mappings": [], 1160 | "thresholds": { 1161 | "mode": "absolute", 1162 | "steps": [ 1163 | { 1164 | "color": "green", 1165 | "value": null 1166 | }, 1167 | { 1168 | "color": "red", 1169 | "value": 80 1170 | } 1171 | ] 1172 | } 1173 | }, 1174 | "overrides": [] 1175 | }, 1176 | "gridPos": { 1177 | "h": 6, 1178 | "w": 3, 1179 | "x": 16, 1180 | "y": 43 1181 | }, 1182 | "id": 9, 1183 | "options": { 1184 | "colorMode": "value", 1185 | "graphMode": "area", 1186 | "justifyMode": "auto", 1187 | "orientation": "auto", 1188 | "reduceOptions": { 1189 | "calcs": [ 1190 | "distinctCount" 1191 | ], 1192 | "fields": "", 1193 | "values": false 1194 | }, 1195 | "text": {}, 1196 | "textMode": "auto" 1197 | }, 1198 | "pluginVersion": "8.1.3", 1199 | "targets": [ 1200 | { 1201 | "exemplar": true, 1202 | "expr": "count (confluent_cloud_connector)", 1203 | "interval": "", 1204 | "legendFormat": "", 1205 | "refId": "A" 1206 | } 1207 | ], 1208 | "title": "Connector Count", 1209 | "type": "stat" 1210 | }, 1211 | { 1212 | "datasource": null, 1213 | "fieldConfig": { 1214 | "defaults": { 1215 | "color": { 1216 | "mode": "thresholds" 1217 | }, 1218 | "mappings": [], 1219 | "thresholds": { 1220 | "mode": "absolute", 1221 | "steps": [ 1222 | { 1223 | "color": "green", 1224 | "value": null 1225 | }, 1226 | { 1227 | "color": "red", 1228 | "value": 80 1229 | } 1230 | ] 1231 | } 1232 | }, 1233 | "overrides": [] 1234 | }, 1235 | "gridPos": { 1236 | "h": 6, 1237 | "w": 3, 1238 | "x": 19, 1239 | "y": 43 1240 | }, 1241 | "id": 10, 1242 | "options": { 1243 | "colorMode": "value", 1244 | "graphMode": "area", 1245 | "justifyMode": "auto", 1246 | "orientation": "auto", 1247 | "reduceOptions": { 1248 | "calcs": [ 1249 | "distinctCount" 1250 | ], 1251 | "fields": "", 1252 | "values": false 1253 | }, 1254 | "text": {}, 1255 | "textMode": "auto" 1256 | }, 1257 | "pluginVersion": "8.1.3", 1258 | "targets": [ 1259 | { 1260 | "exemplar": true, 1261 | "expr": "count (confluent_cloud_ksqldb_cluster)", 1262 | "interval": "", 1263 | "legendFormat": "", 1264 | "refId": "A" 1265 | } 1266 | ], 1267 | "title": "ksqlDB Cluster Count", 1268 | "type": "stat" 1269 | } 1270 | ], 1271 | "title": "CCloud Object Details", 1272 | "type": "row" 1273 | } 1274 | ], 1275 | "refresh": "5s", 1276 | "schemaVersion": 30, 1277 | "style": "dark", 1278 | "tags": [], 1279 | "templating": { 1280 | "list": [] 1281 | }, 1282 | "time": { 1283 | "from": "now-1M/M", 1284 | "to": "now-1M/M" 1285 | }, 1286 | "timepicker": {}, 1287 | "timezone": "utc", 1288 | "title": "Confluent Cloud Chargeback", 1289 | "uid": "EE-Vwi_4z", 1290 | "version": 23 1291 | } -------------------------------------------------------------------------------- /deployables/assets/grafana/provisioning/dashboards/dashboard.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: "Prometheus" 5 | orgId: 1 6 | folder: "" 7 | type: file 8 | disableDeletion: false 9 | allowUiUpdates: true 10 | editable: true 11 | options: 12 | path: /etc/grafana/provisioning/dashboards 13 | -------------------------------------------------------------------------------- /deployables/assets/grafana/provisioning/datasources/datasource.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | datasources: 3 | - name: Prometheus 4 | type: prometheus 5 | url: http://prometheus_for_chargeback:9090 6 | isDefault: true 7 | access: proxy 8 | editable: true 9 | -------------------------------------------------------------------------------- /deployables/assets/prometheus_for_chargeback/collector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #set -x 3 | #echo on 4 | 5 | READINESS_PROBE="/is_ready" 6 | CURRENT_TS_PROBE="/current_timestamp" 7 | 8 | READINESS_URL="${CHARGEBACK_READINESS_PROBE_URL}${READINESS_PROBE}" 9 | TS_URL="${CHARGEBACK_READINESS_PROBE_URL}${CURRENT_TS_PROBE}" 10 | 11 | SCRAPE_URL="${CHARGEBACK_METRICS_URL}" 12 | 13 | check_readiness () { 14 | # This function checks if the readiness probe is True 15 | # If it is not, it will wait 5 seconds and try again 16 | test=`wget -O - -q ${READINESS_URL} 2>&1 | cut -d ' ' -f 1` 17 | echo "Readiness probe is ${test}" 18 | while [ ${test} != "True" ] 19 | do 20 | test=`wget -O - -q ${READINESS_URL} 2>&1 | cut -d ' ' -f 1` 21 | echo "Readiness probe is ${test}" 22 | echo "Waiting for readiness probe to be True" 23 | sleep 3 24 | done 25 | } 26 | 27 | check_ts_vicinity () { 28 | # This function checks if the scrape timestamp is getting close to the current time 29 | # If it is, it will increase the scrape interval to 10 minutes 30 | # If it is not, it will set the scrape interval to 0.1 seconds 31 | TS_VALUE=`wget -O - -q ${TS_URL} 2>&1 | cut -d ' ' -f 1` 32 | VICINITY_CUTOFF=$(( `date '+%s'` - $(( 24 * 60 * 60 * 5 )) )) 33 | if [ ${TS_VALUE} -gt ${VICINITY_CUTOFF} ] 34 | then 35 | echo 600 36 | else 37 | echo 1 38 | fi 39 | } 40 | 41 | # Main loop 42 | # This loop will check if the readiness probe is True 43 | # If it is not, it will wait 5 seconds and try again 44 | # If it is, it will check if the scrape timestamp is getting close to the current time 45 | # If it is, it will increase the scrape interval to 10 minutes 46 | # If it is not, it will set the scrape interval to 0.1 seconds 47 | # It will then scrape the Chargeback API and create a new block 48 | # It will then wait for the scrape interval and repeat 49 | # Don't we just love the Auto generated Comments :) 50 | while true 51 | do 52 | check_readiness 53 | SCRAPE_INTERVAL=`check_ts_vicinity` 54 | echo "Scraping Interval set to ${SCRAPE_INTERVAL}" 55 | rm -f index.html index2.html 56 | wget -T 60 ${SCRAPE_URL} 57 | tail +37 index.html > index2.html 58 | echo "# EOF" >> index2.html 59 | promtool tsdb create-blocks-from openmetrics index2.html . 60 | # rm -f index.html index2.html 61 | echo "Sleeping for ${SCRAPE_INTERVAL} seconds" 62 | sleep ${SCRAPE_INTERVAL} 63 | done 64 | 65 | -------------------------------------------------------------------------------- /deployables/assets/prometheus_for_chargeback/prometheus_chargeback.yml: -------------------------------------------------------------------------------- 1 | scrape_configs: 2 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | ccloud_chargeback_handler: 4 | image: abhiwalia/ccloud_chargeback_handler:1.0.4 5 | container_name: ccloud_chargeback_handler 6 | # cpus: 1 7 | restart: "no" 8 | volumes: 9 | # This is the folder which you can use to override the pre-coded config file. The internally available file still needs the environment variables to be configured with the API Keys for access. 10 | # No credentials are hard coded into the code at all, so if you do not provide any access credentials, the code will not work. 11 | - ./deployables/assets/chargeback_handler/config/config_internal.yaml:/user_config/config.yaml 12 | command: 13 | # The below command switch will alllow you to specify your custom config file. 14 | # The /user_config/config.yaml file is the default config file which is used if no config file is specified. 15 | - "--config-file" 16 | - "/user_config/config.yaml" 17 | environment: 18 | # The below environment variables are required for the code to work. 19 | # Lookback days + Billing API Key & Secret Injection 20 | - CCLOUD_LOOKBACK_DAYS=${CCLOUD_LOOKBACK_DAYS} 21 | - CCLOUD_BILLING_API_KEY=${CCLOUD_BILLING_API_KEY} 22 | - CCLOUD_BILLING_API_SECRET=${CCLOUD_BILLING_API_SECRET} 23 | # Metrics API server 24 | - METRICS_API_SERVER_URL=${METRICS_API_SERVER_URL} 25 | - METRICS_API_SERVER_ENABLE_AUTH=${METRICS_API_SERVER_ENABLE_AUTH} 26 | - METRICS_API_SERVER_AUTH_TYPE=${METRICS_API_SERVER_AUTH_TYPE} 27 | - METRICS_API_SERVER_AUTH_USERNAME=${METRICS_API_SERVER_AUTH_USERNAME} 28 | - METRICS_API_SERVER_AUTH_PASSWORD=${METRICS_API_SERVER_AUTH_PASSWORD} 29 | - CHARGEBACK_SERVER_URL=http://prometheus_for_chargeback:9090 30 | - LOG_LEVEL=${LOG_LEVEL} 31 | - ENABLE_METHOD_BREADCRUMBS=${ENABLE_METHOD_BREADCRUMBS} 32 | ports: 33 | # Exposed_port:Internal_port 34 | - 8000:8000 35 | - 8001:8001 36 | depends_on: 37 | - prometheus_for_chargeback 38 | prometheus_for_chargeback: 39 | image: prom/prometheus:v2.44.0 40 | container_name: prometheus_for_chargeback 41 | command: 42 | - "--config.file=/etc/prometheus/prometheus.yml" 43 | - "--storage.tsdb.path=/prometheus" 44 | - "--storage.tsdb.retention.time=2y" 45 | - "--storage.tsdb.retention.size=1TB" 46 | # # Enable debug for prometheus pod 47 | # # - "--log.level=debug" 48 | ports: 49 | # Exposed_port:Internal_port 50 | - 9091:9090 51 | volumes: 52 | # Prom Config file (The file is empty as the scrape is executed via promtool) 53 | - ./deployables/assets/prometheus_for_chargeback/prometheus_chargeback.yml:/etc/prometheus/prometheus.yml 54 | # This is the datastore location so that you do not lose data when the container is restarted. 55 | - ./deployables/datastore/prometheus_for_chargeback:/prometheus 56 | # - ./deployables/assets/prometheus_for_chargeback/collector.sh:/collector/collector.sh 57 | prometheus_feeder: 58 | image: prom/prometheus:v2.44.0 59 | container_name: prometheus_feeder 60 | entrypoint: /bin/sh 61 | # tty: true 62 | command: 63 | - -c 64 | - "chmod +x /collector/collector.sh && /collector/collector.sh" 65 | environment: 66 | - CHARGEBACK_METRICS_URL=http://ccloud_chargeback_handler:8000 67 | - CHARGEBACK_READINESS_PROBE_URL=http://ccloud_chargeback_handler:8001 68 | volumes: 69 | # Promtool executable to collect old timestamped metrics and feed them to the prometheus server for compaction and aggregation. 70 | # This needs the Prometheus server datastore to be attached to this container so that it can create the chunks in there. 71 | # The chunks are then read by the Prometheus server and aggregated. 72 | - ./deployables/assets/prometheus_for_chargeback/collector.sh:/collector/collector.sh 73 | # This is the datastore location for the above prometheus server so that the aggregated dataset can be stored there. 74 | - ./deployables/datastore/prometheus_for_chargeback:/prometheus 75 | depends_on: 76 | - grafana 77 | grafana: 78 | image: grafana/grafana:10.0.1 79 | container_name: grafana 80 | environment: 81 | - "GF_SECURITY_ADMIN_USER=admin" 82 | - "GF_SECURITY_ADMIN_PASSWORD=password" 83 | - "GF_USERS_ALLOW_SIGN_UP=false" 84 | ports: 85 | # Exposed_port:Internal_port 86 | - 3000:3000 87 | volumes: 88 | # Grafana dashboards location 89 | - ./deployables/assets/grafana/provisioning/:/etc/grafana/provisioning 90 | depends_on: 91 | - ccloud_chargeback_handler 92 | -------------------------------------------------------------------------------- /image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/waliaabhishek/ccloud-chargeback-helper/bbf66450368aeae8e6c83ac9bd9184645c3c1d9b/image.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==2.2.5 2 | pandas==2.0.3 3 | prometheus_client==0.17.1 4 | psutil==5.9.5 5 | python-dateutil==2.8.2 6 | PyYAML==6.0 7 | Requests==2.31.0 8 | -------------------------------------------------------------------------------- /src/ccloud/ccloud_api/api_keys.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import pprint 4 | from dataclasses import InitVar, dataclass, field 5 | from typing import Dict, List 6 | 7 | from dateutil import parser 8 | 9 | from ccloud.connections import CCloudBase 10 | from helpers import logged_method 11 | from prometheus_processing.custom_collector import TimestampedCollector 12 | 13 | pp = pprint.PrettyPrinter(indent=2) 14 | LOGGER = logging.getLogger(__name__) 15 | 16 | 17 | @dataclass 18 | class CCloudAPIKey: 19 | api_key: str 20 | api_secret: str 21 | api_key_description: str 22 | owner_id: str 23 | cluster_id: str 24 | created_at: str 25 | 26 | 27 | api_key_prom_metrics = TimestampedCollector( 28 | "confluent_cloud_api_key", 29 | "API Key details for every API Key created within CCloud", 30 | ["api_key", "owner_id", "resource_id"], 31 | in_begin_timestamp=datetime.datetime.now(), 32 | ) 33 | # api_key_prom_status_metrics = TimestampedCollector( 34 | # "confluent_cloud_api_key_scrape_status", 35 | # "CCloud API Keys scrape status", 36 | # in_begin_timestamp=datetime.datetime.now(), 37 | # ) 38 | 39 | 40 | @dataclass 41 | class CCloudAPIKeyList(CCloudBase): 42 | exposed_timestamp: InitVar[datetime.datetime] = field(init=True) 43 | 44 | # ccloud_sa: service_account.CCloudServiceAccountList 45 | api_keys: Dict[str, CCloudAPIKey] = field(default_factory=dict, init=False) 46 | 47 | # This init function will initiate the base object and then check CCloud 48 | # for all the active API Keys. All API Keys that are listed in CCloud are 49 | # the added to a cache. 50 | @logged_method 51 | def __post_init__(self, exposed_timestamp: datetime.datetime) -> None: 52 | super().__post_init__() 53 | self.url = self.in_ccloud_connection.get_endpoint_url(key=self.in_ccloud_connection.uri.api_keys) 54 | LOGGER.debug(f"API Keys URL: {self.url}") 55 | self.read_all() 56 | LOGGER.debug("Exposing Prometheus Metrics for API Keys") 57 | self.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 58 | LOGGER.info("CCloud API Keys initialized successfully") 59 | 60 | def expose_prometheus_metrics(self, exposed_timestamp: datetime.datetime): 61 | LOGGER.debug("Exposing Prometheus Metrics for API Keys for timestamp: " + str(exposed_timestamp)) 62 | self.force_clear_prom_metrics() 63 | api_key_prom_metrics.set_timestamp(curr_timestamp=exposed_timestamp) 64 | for _, v in self.api_keys.items(): 65 | if v.created_at >= exposed_timestamp: 66 | api_key_prom_metrics.labels(v.api_key, v.owner_id, v.cluster_id).set(1) 67 | # api_key_prom_status_metrics.set_timestamp(curr_timestamp=exposed_timestamp).set(1) 68 | 69 | @logged_method 70 | def force_clear_prom_metrics(self): 71 | api_key_prom_metrics.clear() 72 | 73 | # This method will help reading all the API Keys that are already provisioned. 74 | # Please note that the API Secrets cannot be read back again, so if you do not have 75 | # access to the secret , you will need to generate new api key/secret pair. 76 | @logged_method 77 | def read_all(self, params={"page_size": 100}): 78 | LOGGER.debug("Reading all API Keys from Confluent Cloud") 79 | for item in self.read_from_api(params=params): 80 | self.__add_to_cache( 81 | CCloudAPIKey( 82 | api_key=item["id"], 83 | api_secret=None, 84 | api_key_description=item["spec"]["description"], 85 | owner_id=item["spec"]["owner"]["id"], 86 | cluster_id=item["spec"]["resource"]["id"], 87 | created_at=parser.isoparse(item["metadata"]["created_at"]), 88 | ) 89 | ) 90 | LOGGER.debug("Found API Key " + item["id"] + " with owner " + item["spec"]["owner"]["id"]) 91 | 92 | # resp = requests.get(url=self.url, auth=self.http_connection, params=params) 93 | # if resp.status_code == 200: 94 | # out_json = resp.json() 95 | # if out_json is not None and out_json["data"] is not None: 96 | # for item in out_json["data"]: 97 | # print("Found API Key " + item["id"] + " with owner " + item["spec"]["owner"]["id"]) 98 | # self.__add_to_cache( 99 | # CCloudAPIKey( 100 | # api_key=item["id"], 101 | # api_secret=None, 102 | # api_key_description=item["spec"]["description"], 103 | # owner_id=item["spec"]["owner"]["id"], 104 | # cluster_id=item["spec"]["resource"]["id"], 105 | # created_at=parser.isoparse(item["metadata"]["created_at"]), 106 | # ) 107 | # ) 108 | # if "next" in out_json["metadata"]: 109 | # query_params = parse.parse_qs(parse.urlsplit(out_json["metadata"]["next"]).query) 110 | # params["page_token"] = str(query_params["page_token"][0]) 111 | # self.read_all(params) 112 | # elif resp.status_code == 429: 113 | # print(f"CCloud API Per-Minute Limit exceeded. Sleeping for 45 seconds. Error stack: {resp.text}") 114 | # sleep(45) 115 | # print("Timer up. Resuming CCloud API scrape.") 116 | # else: 117 | # raise Exception("Could not connect to Confluent Cloud. Please check your settings. " + resp.text) 118 | 119 | @logged_method 120 | def __add_to_cache(self, api_key: CCloudAPIKey) -> None: 121 | self.api_keys[api_key.api_key] = api_key 122 | 123 | @logged_method 124 | def find_keys_with_sa(self, sa_id: str) -> List[CCloudAPIKey]: 125 | output = [] 126 | for item in self.api_keys.values(): 127 | if sa_id == item.owner_id: 128 | output.append(item) 129 | return output 130 | 131 | @logged_method 132 | def find_sa_count_for_clusters(self, cluster_id: str) -> Dict[str, int]: 133 | out = {} 134 | for item in self.api_keys.values(): 135 | if item.cluster_id == cluster_id: 136 | count = out.get(item.owner_id, int(0)) 137 | out[item.owner_id] = count + 1 138 | return out 139 | 140 | @logged_method 141 | def find_keys_with_sa_and_cluster(self, sa_id: str, cluster_id: str) -> List[CCloudAPIKey]: 142 | output = [] 143 | for item in self.api_keys.values(): 144 | if cluster_id == item.cluster_id and sa_id == item.owner_id: 145 | output.append(item) 146 | return output 147 | -------------------------------------------------------------------------------- /src/ccloud/ccloud_api/clusters.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from dataclasses import InitVar, dataclass, field 4 | from typing import Dict 5 | 6 | from ccloud.ccloud_api.environments import CCloudEnvironmentList 7 | from ccloud.connections import CCloudBase 8 | from helpers import logged_method 9 | from prometheus_processing.custom_collector import TimestampedCollector 10 | 11 | LOGGER = logging.getLogger(__name__) 12 | 13 | 14 | @dataclass 15 | class CCloudCluster: 16 | env_id: str 17 | cluster_id: str 18 | cluster_name: str 19 | cloud: str 20 | availability: str 21 | region: str 22 | bootstrap_url: str 23 | 24 | 25 | kafka_cluster_prom_metrics = TimestampedCollector( 26 | "confluent_cloud_kafka_cluster", 27 | "Cluster Details for every Kafka Cluster created within CCloud", 28 | ["cluster_id", "env_id", "display_name"], 29 | in_begin_timestamp=datetime.datetime.now(), 30 | ) 31 | # kafka_cluster_prom_status_metrics = TimestampedCollector( 32 | # "confluent_cloud_kafka_cluster_scrape_status", 33 | # "CCloud Kafka Cluster scrape status", 34 | # in_begin_timestamp=datetime.datetime.now(), 35 | # ) 36 | 37 | 38 | @dataclass 39 | class CCloudClusterList(CCloudBase): 40 | ccloud_envs: CCloudEnvironmentList 41 | exposed_timestamp: InitVar[datetime.datetime] = field(init=True) 42 | 43 | clusters: Dict[str, CCloudCluster] = field(default_factory=dict, init=False) 44 | 45 | def __post_init__(self, exposed_timestamp: datetime.datetime) -> None: 46 | super().__post_init__() 47 | self.url = self.in_ccloud_connection.get_endpoint_url(key=self.in_ccloud_connection.uri.clusters) 48 | LOGGER.debug(f"Kafka Cluster URL: {self.url}") 49 | self.read_all(params={"page_size": 50}) 50 | LOGGER.debug("Exposing Prometheus Metrics for Kafka Clusters") 51 | self.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 52 | LOGGER.info("CCloud Kafka Clusters initialized successfully") 53 | 54 | @logged_method 55 | def expose_prometheus_metrics(self, exposed_timestamp: datetime.datetime): 56 | LOGGER.debug("Exposing Prometheus Metrics for Kafka Clusters for timestamp: " + str(exposed_timestamp)) 57 | self.force_clear_prom_metrics() 58 | kafka_cluster_prom_metrics.set_timestamp(curr_timestamp=exposed_timestamp) 59 | for _, v in self.clusters.items(): 60 | # TODO: created datetime is missing from cluster creation date. 61 | kafka_cluster_prom_metrics.labels(v.cluster_id, v.env_id, v.cluster_name).set(1) 62 | # kafka_cluster_prom_status_metrics.set_timestamp(curr_timestamp=exposed_timestamp).set(1) 63 | 64 | @logged_method 65 | def force_clear_prom_metrics(self): 66 | kafka_cluster_prom_metrics.clear() 67 | 68 | @logged_method 69 | def __str__(self): 70 | for v in self.clusters.values(): 71 | print( 72 | "{:<15} {:<15} {:<25} {:<10} {:<25} {:<50}".format( 73 | v.env_id, v.cluster_id, v.cluster_name, v.cloud, v.availability, v.bootstrap_url 74 | ) 75 | ) 76 | 77 | @logged_method 78 | def read_all(self, params={"page_size": 100}): 79 | LOGGER.debug("Reading all Kafka Clusters from Confluent Cloud") 80 | for env_item in self.ccloud_envs.env.values(): 81 | LOGGER.info("Checking CCloud Environment " + env_item.env_id + " for any provisioned Kafka Clusters.") 82 | params["environment"] = env_item.env_id 83 | for item in self.read_from_api(params=params): 84 | self.__add_to_cache( 85 | CCloudCluster( 86 | env_id=env_item.env_id, 87 | cluster_id=item["id"], 88 | cluster_name=item["spec"]["display_name"], 89 | cloud=item["spec"]["cloud"], 90 | availability=item["spec"]["availability"], 91 | region=item["spec"]["region"], 92 | bootstrap_url=item["spec"]["kafka_bootstrap_endpoint"], 93 | ) 94 | ) 95 | LOGGER.debug("Found cluster " + item["id"] + " with name " + item["spec"]["display_name"]) 96 | 97 | # params["environment"] = env_id 98 | # resp = requests.get(url=self.url, auth=self.http_connection, params=params) 99 | # if resp.status_code == 200: 100 | # out_json = resp.json() 101 | # if out_json is not None and out_json["data"] is not None: 102 | # for item in out_json["data"]: 103 | # print("Found cluster " + item["id"] + " with name " + item["spec"]["display_name"]) 104 | # self.__add_to_cache( 105 | # CCloudCluster( 106 | # env_id=env_id, 107 | # cluster_id=item["id"], 108 | # cluster_name=item["spec"]["display_name"], 109 | # cloud=item["spec"]["cloud"], 110 | # availability=item["spec"]["availability"], 111 | ## region=item["spec"]["region"], 112 | # bootstrap_url=item["spec"]["kafka_bootstrap_endpoint"], 113 | # ) 114 | # ) 115 | # if "next" in out_json["metadata"]: 116 | # query_params = parse.parse_qs(parse.urlsplit(out_json["metadata"]["next"]).query) 117 | # params["page_token"] = str(query_params["page_token"][0]) 118 | # self.read_all(env_id, params) 119 | # elif resp.status_code == 429: 120 | # print(f"CCloud API Per-Minute Limit exceeded. Sleeping for 45 seconds. Error stack: {resp.text}") 121 | # sleep(45) 122 | # print("Timer up. Resuming CCloud API scrape.") 123 | # else: 124 | # raise Exception("Could not connect to Confluent Cloud. Please check your settings. " + resp.text) 125 | 126 | @logged_method 127 | def __add_to_cache(self, ccloud_cluster: CCloudCluster) -> None: 128 | self.clusters[ccloud_cluster.cluster_id] = ccloud_cluster 129 | 130 | # Read/Find one Cluster from the cache 131 | @logged_method 132 | def find_cluster(self, cluster_id): 133 | return self.clusters[cluster_id] 134 | -------------------------------------------------------------------------------- /src/ccloud/ccloud_api/connectors.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from dataclasses import InitVar, dataclass, field 4 | from time import sleep 5 | from typing import Dict 6 | 7 | import requests 8 | 9 | from ccloud.ccloud_api.api_keys import CCloudAPIKeyList 10 | from ccloud.ccloud_api.clusters import CCloudCluster, CCloudClusterList 11 | from ccloud.ccloud_api.service_accounts import CCloudServiceAccountList 12 | from ccloud.ccloud_api.user_accounts import CCloudUserAccountList 13 | from ccloud.connections import CCloudBase 14 | from helpers import logged_method 15 | from prometheus_processing.custom_collector import TimestampedCollector 16 | 17 | LOGGER = logging.getLogger(__name__) 18 | 19 | 20 | @dataclass 21 | class CCloudConnector: 22 | env_id: str 23 | cluster_id: str 24 | connector_id: str 25 | connector_name: str 26 | connector_class: str 27 | owner_id: str 28 | 29 | 30 | kafka_connectors_prom_metrics = TimestampedCollector( 31 | "confluent_cloud_connector", 32 | "Connector Details for every Fully Managed Connector created within CCloud", 33 | ["connector_id", "cluster_id", "env_id"], 34 | in_begin_timestamp=datetime.datetime.now(), 35 | ) 36 | # kafka_connectors_prom_status_metrics = TimestampedCollector( 37 | # "confluent_cloud_kafka_connector_scrape_status", 38 | # "CCloud Kafka Connectors scrape status", 39 | # in_begin_timestamp=datetime.datetime.now(), 40 | # ) 41 | 42 | 43 | @dataclass 44 | class CCloudConnectorList(CCloudBase): 45 | ccloud_kafka_clusters: CCloudClusterList 46 | ccloud_service_accounts: CCloudServiceAccountList 47 | ccloud_users: CCloudUserAccountList 48 | ccloud_api_keys: CCloudAPIKeyList 49 | exposed_timestamp: InitVar[datetime.datetime] = field(init=True) 50 | 51 | connectors: Dict[str, CCloudConnector] = field(default_factory=dict, init=False) 52 | url_get_connector_config: str = field(init=False) 53 | 54 | def __post_init__(self, exposed_timestamp: datetime.datetime) -> None: 55 | super().__post_init__() 56 | self.url = self.in_ccloud_connection.get_endpoint_url(key=self.in_ccloud_connection.uri.list_connector_names) 57 | LOGGER.debug(f"Kafka Connector URL: {self.url}") 58 | self.url_get_connector_config = self.in_ccloud_connection.get_endpoint_url( 59 | key=self.in_ccloud_connection.uri.get_connector_config 60 | ) 61 | LOGGER.debug(f"Kafka Get Connector Config URL: {self.url}") 62 | self.read_all() 63 | LOGGER.debug("Exposing Prometheus Metrics for Kafka Connector") 64 | self.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 65 | LOGGER.info("CCloud Kafka Connectors initialized successfully") 66 | 67 | @logged_method 68 | def expose_prometheus_metrics(self, exposed_timestamp: datetime.datetime): 69 | LOGGER.debug("Exposing Prometheus Metrics for Kafka Connector for timestamp: " + str(exposed_timestamp)) 70 | self.force_clear_prom_metrics() 71 | kafka_connectors_prom_metrics.set_timestamp(curr_timestamp=exposed_timestamp) 72 | for _, v in self.connectors.items(): 73 | # TODO: created datetime is missing from connector creation date. 74 | kafka_connectors_prom_metrics.labels(v.connector_id, v.cluster_id, v.env_id).set(1) 75 | 76 | @logged_method 77 | def force_clear_prom_metrics(self): 78 | kafka_connectors_prom_metrics.clear() 79 | 80 | @logged_method 81 | def __str__(self): 82 | for v in self.cluster.values(): 83 | print( 84 | "{:<15} {:<15} {:<25} {:<10} {:<25} {:<50}".format( 85 | v.env_id, v.cluster_id, v.cluster_name, v.cloud, v.availability, v.bootstrap_url 86 | ) 87 | ) 88 | 89 | @logged_method 90 | def read_all(self): 91 | LOGGER.debug("Reading all Kafka Connector from Confluent Cloud") 92 | for kafka_cluster in self.ccloud_kafka_clusters.clusters.values(): 93 | LOGGER.info("Checking Environment " + kafka_cluster.env_id + " for any provisioned connectors.") 94 | for connector_item in self.read_all_connector_details(kafka_cluster=kafka_cluster): 95 | LOGGER.debug( 96 | f'Found Connector {connector_item["status"]["name"]} linked to Kafka Cluster ID {kafka_cluster.cluster_id} with Cluster Name {kafka_cluster.cluster_name}' 97 | ) 98 | self.read_connector_config(kafka_cluster=kafka_cluster, connector_details=connector_item) 99 | 100 | def read_all_connector_details(self, kafka_cluster: CCloudCluster, params={}): 101 | temp_url = self.url.format(environment_id=kafka_cluster.env_id, kafka_cluster_id=kafka_cluster.cluster_id) 102 | LOGGER.debug(f"Reading from CCloud API: {temp_url}") 103 | resp = requests.get(url=temp_url, auth=self.http_connection, timeout=10, params=params) 104 | if resp.status_code == 200: 105 | LOGGER.debug(f"Successfully fetched the Connector details for Kafka Cluster {kafka_cluster.cluster_id}") 106 | out_json = resp.json() 107 | # if out_json is not None and out_json["data"] is not None: 108 | for item in out_json.values(): 109 | yield item 110 | elif resp.status_code == 429: 111 | LOGGER.info(f"CCloud API Per-Minute Limit exceeded. Sleeping for 45 seconds. Error stack: {resp.text}") 112 | sleep(45) 113 | LOGGER.info("Timer up. Resuming CCloud API scrape.") 114 | elif resp.status_code >= 400: 115 | LOGGER.error( 116 | f"Cannot fetch the Connector details. API Error Code: {resp.status_code} API Error Message: {resp.text}" 117 | ) 118 | 119 | @logged_method 120 | def read_connector_config(self, kafka_cluster: dict, connector_details:dict): 121 | connector_id = str(connector_details["id"]["id"]).strip().replace(" ", "") 122 | connector_config = connector_details["info"]["config"] 123 | connector_name=str(connector_config["name"]).strip().replace(" ", "") 124 | LOGGER.debug("Found connector config for connector " + connector_config["name"]) 125 | owner_id = None 126 | api_key_prop = "kafka.api.key" 127 | sa_prop = "kafka.service.account.id" 128 | auth_mode = connector_config.get("kafka.auth.mode", "NOT_FOUND") 129 | if auth_mode == "NOT_FOUND": 130 | LOGGER.warn(f"Connector {connector_config['name']} has no authentication mode set. Trying to find a matching auth mode.") 131 | if connector_config.get(api_key_prop, None) is not None: 132 | auth_mode = "KAFKA_API_KEY" 133 | elif connector_config.get(sa_prop, None) is not None: 134 | auth_mode = "SERVICE_ACCOUNT" 135 | else: 136 | err_str = f"Connector {connector_config['name']} has no authentication mode set and does not have '{api_key_prop}' or '{sa_prop}' property set. Code cannot proceed." 137 | LOGGER.debug(f"Connector response received: \n{connector_details}") 138 | raise Exception(err_str) 139 | match auth_mode: 140 | case "KAFKA_API_KEY": 141 | api_key = connector_config[api_key_prop] 142 | # Check if all the API_KEY value is protected or not 143 | if not all([ch == "*" for ch in api_key]): 144 | # Locate the API Key details 145 | if self.ccloud_api_keys.api_keys.get(api_key) is not None: 146 | owner_id = self.ccloud_api_keys.api_keys[api_key].owner_id 147 | else: 148 | LOGGER.warn(f"Connector API Key Not found in the Active API Keys. Connector {connector_config['name']} returned {api_key} as the executioner.") 149 | owner_id = "connector_api_key_cannot_be_mapped" 150 | LOGGER.debug( 151 | f"API Key is unavailable for Mapping Connector {connector_config['name']} to its corresponding holder. Connector Ownership defaulted to {owner_id}." 152 | ) 153 | else: 154 | LOGGER.warn(f"Connector API Key Masked. Found API Key {api_key} for Connector {connector_config['name']}.") 155 | owner_id = "connector_api_key_masked" 156 | LOGGER.debug( 157 | f"API Key is unavailable for Mapping Connector {connector_config['name']} to its corresponding Service Account. Connector Ownership defaulted to {owner_id}." 158 | ) 159 | case "SERVICE_ACCOUNT": 160 | owner_id = connector_config[sa_prop] 161 | self.__add_to_cache( 162 | CCloudConnector( 163 | env_id=kafka_cluster.env_id, 164 | cluster_id=kafka_cluster.cluster_id, 165 | connector_id=connector_id, 166 | connector_name=connector_name, 167 | connector_class=connector_config["connector.class"], 168 | owner_id=owner_id, 169 | ) 170 | ) 171 | 172 | @logged_method 173 | def __add_to_cache(self, connector: CCloudConnector) -> None: 174 | self.connectors[f"{connector.connector_id}"] = connector 175 | 176 | # def locate_api_key_owner(self, api_key: str) -> CCloudUserAccount | CCloudServiceAccount: 177 | # key = self.ccloud_api_keys.api_keys[api_key] 178 | # if key.owner_id in self.ccloud_service_accounts.sa.keys(): 179 | # return self.ccloud_service_accounts.sa[key.owner_id] 180 | # elif key.owner_id in self.ccloud_users.users.keys(): 181 | # return self.ccloud_users.users[key.owner_id] 182 | 183 | # Read/Find one Cluster from the cache 184 | @logged_method 185 | def find_cluster(self, cluster_id): 186 | return self.cluster[cluster_id] 187 | -------------------------------------------------------------------------------- /src/ccloud/ccloud_api/environments.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from dataclasses import InitVar, dataclass, field 4 | from typing import Dict 5 | 6 | from dateutil import parser 7 | 8 | from ccloud.connections import CCloudBase 9 | from helpers import logged_method 10 | from prometheus_processing.custom_collector import TimestampedCollector 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | 15 | @dataclass 16 | class CCloudEnvironment: 17 | env_id: str 18 | display_name: str 19 | created_at: str 20 | 21 | 22 | env_prom_metrics = TimestampedCollector( 23 | "confluent_cloud_environment", 24 | "Environment Details for every Environment created within CCloud", 25 | ["env_id", "display_name"], 26 | in_begin_timestamp=datetime.datetime.now(), 27 | ) 28 | # env_prom_status_metrics = TimestampedCollector( 29 | # "confluent_cloud_env_scrape_status", 30 | # "CCloud Environments scrape status", 31 | # in_begin_timestamp=datetime.datetime.now(), 32 | # ) 33 | 34 | 35 | @dataclass(kw_only=True) 36 | class CCloudEnvironmentList(CCloudBase): 37 | env: Dict[str, CCloudEnvironment] = field(default_factory=dict, init=False) 38 | exposed_timestamp: InitVar[datetime.datetime] = field(init=True) 39 | 40 | def __post_init__(self, exposed_timestamp: datetime.datetime) -> None: 41 | super().__post_init__() 42 | self.url = self.in_ccloud_connection.get_endpoint_url(key=self.in_ccloud_connection.uri.environments) 43 | LOGGER.debug(f"Environment List URL: {self.url}") 44 | self.read_all() 45 | LOGGER.debug("Exposing Prometheus Metrics for Environment List") 46 | self.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 47 | LOGGER.info("CCloud Environment List initialized successfully") 48 | 49 | @logged_method 50 | def expose_prometheus_metrics(self, exposed_timestamp: datetime.datetime): 51 | LOGGER.debug("Exposing Prometheus Metrics for Environment List for timestamp: " + str(exposed_timestamp)) 52 | self.force_clear_prom_metrics() 53 | env_prom_metrics.set_timestamp(curr_timestamp=exposed_timestamp) 54 | for _, v in self.env.items(): 55 | if v.created_at >= exposed_timestamp: 56 | env_prom_metrics.labels(v.env_id, v.display_name).set(1) 57 | # env_prom_status_metrics.set_timestamp(curr_timestamp=exposed_timestamp).set(1) 58 | 59 | @logged_method 60 | def force_clear_prom_metrics(self): 61 | env_prom_metrics.clear() 62 | 63 | def __str__(self): 64 | LOGGER.debug("Found " + str(len(self.env)) + " environments.") 65 | for v in self.env.values(): 66 | print("{:<15} {:<40}".format(v.env_id, v.display_name)) 67 | 68 | @logged_method 69 | def read_all(self, params={"page_size": 100}): 70 | LOGGER.debug("Reading all Environment List from Confluent Cloud") 71 | for item in self.read_from_api(params=params): 72 | self.__add_env_to_cache( 73 | CCloudEnvironment( 74 | env_id=item["id"], 75 | display_name=item["display_name"], 76 | created_at=parser.isoparse(item["metadata"]["created_at"]), 77 | ) 78 | ) 79 | LOGGER.debug("Found environment " + item["id"] + " with name " + item["display_name"]) 80 | # resp = requests.get(url=self.url, auth=self.http_connection, params=params) 81 | # if resp.status_code == 200: 82 | # out_json = resp.json() 83 | # if out_json is not None and out_json["data"] is not None: 84 | # for item in out_json["data"]: 85 | # self.__add_env_to_cache( 86 | # CCloudEnvironment( 87 | # env_id=item["id"], 88 | # display_name=item["display_name"], 89 | # created_at=parser.isoparse(item["metadata"]["created_at"]), 90 | # ) 91 | # ) 92 | # print("Found environment " + item["id"] + " with name " + item["display_name"]) 93 | # if "next" in out_json["metadata"]: 94 | # query_params = parse.parse_qs(parse.urlsplit(out_json["metadata"]["next"]).query) 95 | # params["page_token"] = str(query_params["page_token"][0]) 96 | # self.read_all(params) 97 | # elif resp.status_code == 429: 98 | # print(f"CCloud API Per-Minute Limit exceeded. Sleeping for 45 seconds. Error stack: {resp.text}") 99 | # sleep(45) 100 | # print("Timer up. Resuming CCloud API scrape.") 101 | # else: 102 | # raise Exception("Could not connect to Confluent Cloud. Please check your settings. " + resp.text) 103 | 104 | @logged_method 105 | def __add_env_to_cache(self, ccloud_env: CCloudEnvironment) -> None: 106 | self.env[ccloud_env.env_id] = ccloud_env 107 | 108 | # Read/Find one Cluster from the cache 109 | @logged_method 110 | def find_environment(self, env_id): 111 | return self.env[env_id] 112 | -------------------------------------------------------------------------------- /src/ccloud/ccloud_api/ksqldb_clusters.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import pprint 4 | from dataclasses import InitVar, dataclass, field 5 | from typing import Dict 6 | 7 | from dateutil import parser 8 | 9 | from ccloud.ccloud_api.environments import CCloudEnvironmentList 10 | from ccloud.connections import CCloudBase 11 | from helpers import logged_method 12 | from prometheus_processing.custom_collector import TimestampedCollector 13 | 14 | pp = pprint.PrettyPrinter(indent=2) 15 | LOGGER = logging.getLogger(__name__) 16 | 17 | 18 | @dataclass 19 | class CCloudKsqldbCluster: 20 | cluster_id: str 21 | cluster_name: str 22 | csu_count: str 23 | env_id: str 24 | kafka_cluster_id: str 25 | owner_id: str 26 | created_at: str 27 | 28 | 29 | ksqldb_prom_metrics = TimestampedCollector( 30 | "confluent_cloud_ksqldb_cluster", 31 | "Environment Details for every Environment created within CCloud", 32 | [ 33 | "cluster_id", 34 | "env_id", 35 | "kafka_cluster_id", 36 | ], 37 | in_begin_timestamp=datetime.datetime.now(), 38 | ) 39 | # ksqldb_prom_status_metrics = TimestampedCollector( 40 | # "confluent_cloud_ksqldb_scrape_status", "CCloud ksqlDB scrape status", in_begin_timestamp=datetime.datetime.now(), 41 | # ) 42 | 43 | 44 | @dataclass 45 | class CCloudKsqldbClusterList(CCloudBase): 46 | ccloud_envs: CCloudEnvironmentList 47 | exposed_timestamp: InitVar[datetime.datetime] = field(init=True) 48 | 49 | ksqldb_clusters: Dict[str, CCloudKsqldbCluster] = field(default_factory=dict, init=False) 50 | 51 | # This init function will initiate the base object and then check CCloud 52 | # for all the active API Keys. All API Keys that are listed in CCloud are 53 | # the added to a cache. 54 | def __post_init__(self, exposed_timestamp: datetime.datetime) -> None: 55 | super().__post_init__() 56 | self.url = self.in_ccloud_connection.get_endpoint_url(key=self.in_ccloud_connection.uri.list_ksql_clusters) 57 | LOGGER.debug(f"ksqlDB Cluster URL: {self.url}") 58 | self.read_all() 59 | LOGGER.debug("Exposing Prometheus Metrics for ksqlDB Cluster") 60 | self.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 61 | LOGGER.info("CCloud ksqlDB Cluster initialized successfully") 62 | 63 | @logged_method 64 | def expose_prometheus_metrics(self, exposed_timestamp: datetime.datetime): 65 | LOGGER.debug("Exposing Prometheus Metrics for ksqlDB Cluster for timestamp: " + str(exposed_timestamp)) 66 | self.force_clear_prom_metrics() 67 | ksqldb_prom_metrics.set_timestamp(curr_timestamp=exposed_timestamp) 68 | for _, v in self.ksqldb_clusters.items(): 69 | if v.created_at >= exposed_timestamp: 70 | ksqldb_prom_metrics.labels(v.cluster_id, v.env_id, v.kafka_cluster_id).set(1) 71 | # ksqldb_prom_status_metrics.set_timestamp(curr_timestamp=exposed_timestamp).set(1) 72 | 73 | @logged_method 74 | def force_clear_prom_metrics(self): 75 | ksqldb_prom_metrics.clear() 76 | 77 | # This method will help reading all the API Keys that are already provisioned. 78 | # Please note that the API Secrets cannot be read back again, so if you do not have 79 | # access to the secret , you will need to generate new api key/secret pair. 80 | @logged_method 81 | def read_all(self, params={"page_size": 100}): 82 | LOGGER.debug("Reading all ksqlDB Cluster from Confluent Cloud") 83 | for env_item in self.ccloud_envs.env.values(): 84 | LOGGER.info("Checking CCloud Environment " + env_item.env_id + " for any provisioned ksqlDB Clusters.") 85 | params["environment"] = env_item.env_id 86 | for item in self.read_from_api(params=params): 87 | owner_id = None 88 | if item["spec"]["credential_identity"]["id"]: 89 | owner_id = item["spec"]["credential_identity"]["id"] 90 | else: 91 | owner_id = "ksqldb_owner_id_missing_in_api_response" 92 | LOGGER.warn( 93 | f'ksqlDB API does not provide any Owner ID for cluster {item["id"]}. ksqlDB cluster Ownership will default to a static string' 94 | ) 95 | self.__add_to_cache( 96 | CCloudKsqldbCluster( 97 | cluster_id=item["id"], 98 | cluster_name=item["spec"]["display_name"], 99 | csu_count=item["spec"]["csu"], 100 | env_id=item["spec"]["environment"]["id"], 101 | kafka_cluster_id=item["spec"]["kafka_cluster"]["id"], 102 | owner_id=owner_id, 103 | created_at=parser.isoparse(item["metadata"]["created_at"]), 104 | ) 105 | ) 106 | LOGGER.debug("Found ksqlDB Cluster " + item["id"] + " with name " + item["spec"]["display_name"]) 107 | 108 | @logged_method 109 | def __add_to_cache(self, ksqldb_cluster: CCloudKsqldbCluster) -> None: 110 | self.ksqldb_clusters[ksqldb_cluster.cluster_id] = ksqldb_cluster 111 | -------------------------------------------------------------------------------- /src/ccloud/ccloud_api/service_accounts.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from dataclasses import InitVar, dataclass, field 4 | from typing import Dict 5 | 6 | from dateutil import parser 7 | 8 | from ccloud.connections import CCloudBase 9 | from helpers import logged_method 10 | from prometheus_processing.custom_collector import TimestampedCollector 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | 15 | @dataclass 16 | class CCloudServiceAccount: 17 | resource_id: str 18 | name: str 19 | description: str 20 | created_at: str 21 | updated_at: str 22 | 23 | 24 | sa_prom_metrics = TimestampedCollector( 25 | "confluent_cloud_sa", 26 | "Environment Details for every Environment created within CCloud", 27 | ["sa_id", "display_name"], 28 | in_begin_timestamp=datetime.datetime.now(), 29 | ) 30 | # sa_prom_status_metrics = TimestampedCollector( 31 | # "confluent_cloud_sa_scrape_status", 32 | # "CCloud Service Accounts scrape status", 33 | # in_begin_timestamp=datetime.datetime.now(), 34 | # ) 35 | 36 | 37 | @dataclass(kw_only=True) 38 | class CCloudServiceAccountList(CCloudBase): 39 | exposed_timestamp: InitVar[datetime.datetime] = field(init=True) 40 | sa: Dict[str, CCloudServiceAccount] = field(default_factory=dict, init=False) 41 | 42 | def __post_init__(self, exposed_timestamp: datetime.datetime) -> None: 43 | super().__post_init__() 44 | self.url = self.in_ccloud_connection.get_endpoint_url(key=self.in_ccloud_connection.uri.service_accounts) 45 | LOGGER.debug(f"Service Account URL: {self.url}") 46 | self.read_all() 47 | LOGGER.debug("Exposing Prometheus Metrics for Service Accounts") 48 | self.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 49 | LOGGER.info("CCloud Service Accounts initialized successfully") 50 | 51 | @logged_method 52 | def expose_prometheus_metrics(self, exposed_timestamp: datetime.datetime): 53 | LOGGER.debug("Exposing Prometheus Metrics for Service Accounts for timestamp: " + str(exposed_timestamp)) 54 | self.force_clear_prom_metrics() 55 | sa_prom_metrics.set_timestamp(curr_timestamp=exposed_timestamp) 56 | for _, v in self.sa.items(): 57 | if v.created_at >= exposed_timestamp: 58 | sa_prom_metrics.labels(v.resource_id, v.name).set(1) 59 | # sa_prom_status_metrics.set_timestamp(curr_timestamp=exposed_timestamp).set(1) 60 | 61 | @logged_method 62 | def force_clear_prom_metrics(self): 63 | sa_prom_metrics.clear() 64 | 65 | def __str__(self) -> str: 66 | for item in self.sa.values(): 67 | print("{:<15} {:<40} {:<50}".format(item.resource_id, item.name, item.description)) 68 | 69 | # Read ALL Service Account details from Confluent Cloud 70 | @logged_method 71 | def read_all(self, params={"page_size": 100}): 72 | LOGGER.debug("Reading all Service Accounts from Confluent Cloud") 73 | for item in self.read_from_api(params=params): 74 | self.__add_to_cache( 75 | CCloudServiceAccount( 76 | resource_id=item["id"], 77 | name=item["display_name"], 78 | description=item["description"], 79 | created_at=parser.isoparse(item["metadata"]["created_at"]), 80 | updated_at=parser.isoparse(item["metadata"]["updated_at"]), 81 | ) 82 | ) 83 | LOGGER.debug(f"Found Service Account: {item['id']}; Name {item['display_name']}") 84 | 85 | @logged_method 86 | def __add_to_cache(self, ccloud_sa: CCloudServiceAccount) -> None: 87 | self.sa[ccloud_sa.resource_id] = ccloud_sa 88 | 89 | # Read/Find one SA from the cache 90 | @logged_method 91 | def find_sa(self, sa_name): 92 | for item in self.sa.values(): 93 | if sa_name == item.name: 94 | return item 95 | return None 96 | 97 | # def __delete_from_cache(self, res_id): 98 | # self.sa.pop(res_id, None) 99 | 100 | # Create/Find one SA and add it to the cache, so that we do not have to refresh the cache manually 101 | # def create_sa(self, sa_name, description=None) -> Tuple[CCloudServiceAccount, bool]: 102 | # temp = self.find_sa(sa_name) 103 | # if temp: 104 | # return temp, False 105 | # # print("Creating a new Service Account with name: " + sa_name) 106 | # payload = { 107 | # "display_name": sa_name, 108 | # "description": str("Account for " + sa_name + " created by CI/CD framework") 109 | # if not description 110 | # else description, 111 | # } 112 | # resp = requests.post( 113 | # url=self.url, 114 | # auth=self.http_connection, 115 | # json=payload, 116 | # ) 117 | # if resp.status_code == 201: 118 | # sa_details = resp.json() 119 | # sa_value = CCloudServiceAccount( 120 | # resource_id=sa_details["id"], 121 | # name=sa_details["display_name"], 122 | # description=sa_details["description"], 123 | # created_at=sa_details["metadata"]["created_at"], 124 | # updated_at=sa_details["metadata"]["updated_at"], 125 | # is_ignored=False, 126 | # ) 127 | # self.__add_to_cache(ccloud_sa=sa_value) 128 | # return (sa_value, True) 129 | # else: 130 | # raise Exception("Could not connect to Confluent Cloud. Please check your settings. " + resp.text) 131 | 132 | # def delete_sa(self, sa_name) -> bool: 133 | # temp = self.find_sa(sa_name) 134 | # if not temp: 135 | # print("Did not find Service Account with name '" + sa_name + "'. Not deleting anything.") 136 | # return False 137 | # else: 138 | # resp = requests.delete(url=str(self.url + "/" + temp.resource_id), auth=self.http_connection) 139 | # if resp.status_code == 204: 140 | # self.__delete_from_cache(temp.resource_id) 141 | # return True 142 | # else: 143 | # raise Exception("Could not perform the DELETE operation. Please check your settings. " + resp.text) 144 | 145 | # def __try_detect_internal_service_accounts(self, sa_name: str) -> bool: 146 | # if sa_name.startswith(("Connect.lcc-", "KSQL.lksqlc-")): 147 | # return True 148 | # else: 149 | # return False 150 | -------------------------------------------------------------------------------- /src/ccloud/ccloud_api/user_accounts.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from dataclasses import InitVar, dataclass, field 4 | from typing import Dict 5 | 6 | from dateutil import parser 7 | 8 | from ccloud.connections import CCloudBase 9 | from helpers import logged_method 10 | from prometheus_processing.custom_collector import TimestampedCollector 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | 15 | @dataclass 16 | class CCloudUserAccount: 17 | resource_id: str 18 | name: str 19 | created_at: str 20 | updated_at: str 21 | 22 | 23 | users_prom_metrics = TimestampedCollector( 24 | "confluent_cloud_user", 25 | "Environment Details for every Environment created within CCloud", 26 | ["sa_id", "display_name"], 27 | in_begin_timestamp=datetime.datetime.now(), 28 | ) 29 | # users_prom_status_metrics = TimestampedCollector( 30 | # "confluent_cloud_users_scrape_status", 31 | # "CCloud User Accounts scrape status", 32 | # in_begin_timestamp=datetime.datetime.now(), 33 | # ) 34 | 35 | 36 | @dataclass(kw_only=True) 37 | class CCloudUserAccountList(CCloudBase): 38 | exposed_timestamp: InitVar[datetime.datetime] = field(init=True) 39 | users: Dict[str, CCloudUserAccount] = field(default_factory=dict, init=False) 40 | 41 | def __post_init__(self, exposed_timestamp: datetime.datetime) -> None: 42 | super().__post_init__() 43 | self.url = self.in_ccloud_connection.get_endpoint_url(key=self.in_ccloud_connection.uri.user_accounts) 44 | LOGGER.debug(f"CCloud Users Fetch URL: {self.url}") 45 | self.read_all() 46 | LOGGER.debug("Exposing Prometheus Metrics for CCloud Users") 47 | self.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 48 | LOGGER.info("CCloud Users initialized successfully") 49 | 50 | @logged_method 51 | def expose_prometheus_metrics(self, exposed_timestamp: datetime.datetime): 52 | LOGGER.debug("Exposing Prometheus Metrics for Users for timestamp: " + str(exposed_timestamp)) 53 | self.force_clear_prom_metrics() 54 | users_prom_metrics.set_timestamp(curr_timestamp=exposed_timestamp) 55 | for _, v in self.users.items(): 56 | if v.created_at >= exposed_timestamp: 57 | users_prom_metrics.labels(v.resource_id, v.name).set(1) 58 | # users_prom_status_metrics.set_timestamp(curr_timestamp=exposed_timestamp).set(1) 59 | 60 | @logged_method 61 | def force_clear_prom_metrics(self): 62 | users_prom_metrics.clear() 63 | 64 | def __str__(self) -> str: 65 | for item in self.users.values(): 66 | print("{:<15} {:<40} {:<50}".format(item.resource_id, item.name, item.description)) 67 | 68 | # Read ALL Service Account details from Confluent Cloud 69 | @logged_method 70 | def read_all(self, params={"page_size": 100}): 71 | LOGGER.debug("Reading all CCloud Users from Confluent Cloud") 72 | for item in self.read_from_api(params=params): 73 | self.__add_to_cache( 74 | CCloudUserAccount( 75 | resource_id=item["id"], 76 | name=item["full_name"], 77 | created_at=parser.isoparse(item["metadata"]["created_at"]), 78 | updated_at=parser.isoparse(item["metadata"]["updated_at"]), 79 | ) 80 | ) 81 | LOGGER.debug(f"Found User: {item['id']}; Name {item['full_name']}") 82 | # resp = requests.get(url=self.url, auth=self.http_connection, params=params) 83 | # if resp.status_code == 200: 84 | # out_json = resp.json() 85 | # if out_json is not None and out_json["data"] is not None: 86 | # for item in out_json["data"]: 87 | # self.__add_to_cache( 88 | # CCloudUserAccount( 89 | # resource_id=item["id"], 90 | # name=item["full_name"], 91 | # created_at=parser.isoparse(item["metadata"]["created_at"]), 92 | # updated_at=parser.isoparse(item["metadata"]["updated_at"]), 93 | # ) 94 | # ) 95 | # print(f"Found User: {item['id']}; Name {item['full_name']}") 96 | # if "next" in out_json["metadata"]: 97 | # query_params = parse.parse_qs(parse.urlsplit(out_json["metadata"]["next"]).query) 98 | # params["page_token"] = str(query_params["page_token"][0]) 99 | # self.read_all(params) 100 | # elif resp.status_code == 429: 101 | # print(f"CCloud API Per-Minute Limit exceeded. Sleeping for 45 seconds. Error stack: {resp.text}") 102 | # sleep(45) 103 | # print("Timer up. Resuming CCloud API scrape.") 104 | # else: 105 | # raise Exception("Could not connect to Confluent Cloud. Please check your settings. " + resp.text) 106 | 107 | @logged_method 108 | def __add_to_cache(self, ccloud_user: CCloudUserAccount) -> None: 109 | self.users[ccloud_user.resource_id] = ccloud_user 110 | 111 | # Read/Find one SA from the cache 112 | @logged_method 113 | def find_user(self, ccloud_user): 114 | for item in self.users.values(): 115 | if ccloud_user == item.name: 116 | return item 117 | return None 118 | 119 | # def __delete_from_cache(self, res_id): 120 | # self.users.pop(res_id, None) 121 | 122 | # Create/Find one SA and add it to the cache, so that we do not have to refresh the cache manually 123 | # def create_user(self, ccloud_user) -> Tuple[CCloudUserAccount, bool]: 124 | # temp = self.find_user(ccloud_user) 125 | # if temp: 126 | # return temp, False 127 | # # print("Creating a new Service Account with name: " + sa_name) 128 | # payload = { 129 | # "display_name": ccloud_user, 130 | # } 131 | # resp = requests.post( 132 | # url=self.url, 133 | # auth=self.http_connection, 134 | # json=payload, 135 | # ) 136 | # if resp.status_code == 201: 137 | # user_details = resp.json() 138 | # user_value = CCloudUserAccount( 139 | # resource_id=user_details["id"], 140 | # name=user_details["full_name"], 141 | # created_at=user_details["metadata"]["created_at"], 142 | # updated_at=user_details["metadata"]["updated_at"], 143 | # is_ignored=False, 144 | # ) 145 | # self.__add_to_cache(ccloud_user=user_value) 146 | # return (user_value, True) 147 | # else: 148 | # raise Exception("Could not connect to Confluent Cloud. Please check your settings. " + resp.text) 149 | 150 | # def delete_sa(self, ccloud_user) -> bool: 151 | # temp = self.find_user(ccloud_user) 152 | # if not temp: 153 | # print("Did not find CCloud user with name '" + ccloud_user + "'. Not deleting anything.") 154 | # return False 155 | # else: 156 | # resp = requests.delete(url=str(self.url + "/" + temp.resource_id), auth=self.http_connection) 157 | # if resp.status_code == 204: 158 | # self.__delete_from_cache(temp.resource_id) 159 | # return True 160 | # else: 161 | # raise Exception("Could not perform the DELETE operation. Please check your settings. " + resp.text) 162 | -------------------------------------------------------------------------------- /src/ccloud/connections.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from dataclasses import InitVar, dataclass, field 3 | from enum import Enum, auto 4 | from time import sleep 5 | from typing import Dict 6 | from urllib import parse 7 | 8 | import requests 9 | from requests.auth import HTTPBasicAuth, HTTPDigestAuth 10 | 11 | from helpers import logged_method 12 | 13 | LOGGER = logging.getLogger(__name__) 14 | 15 | 16 | class EndpointURL(Enum): 17 | API_URL = auto() 18 | TELEMETRY_URL = auto() 19 | 20 | 21 | class URIDetails: 22 | API_URL = "https://api.confluent.cloud" 23 | environments = "/org/v2/environments" 24 | clusters = "/cmk/v2/clusters" 25 | service_accounts = "/iam/v2/service-accounts" 26 | user_accounts = "/iam/v2/users" 27 | api_keys = "/iam/v2/api-keys" 28 | list_connector_names = ( 29 | "/connect/v1/environments/{environment_id}/clusters/{kafka_cluster_id}/connectors?expand=info,status,id" 30 | ) 31 | get_connector_config = ( 32 | "/connect/v1/environments/{environment_id}/clusters/{kafka_cluster_id}/connectors/{connector_name}/config" 33 | ) 34 | list_ksql_clusters = "/ksqldbcm/v2/clusters" 35 | get_billing_costs = "/billing/v1/costs" 36 | 37 | TELEMETRY_URL = "https://api.telemetry.confluent.cloud" 38 | telemetry_query_metrics = "/v2/metrics/{dataset}/query" 39 | 40 | prometheus_query_range = "/api/v1/query_range" 41 | 42 | @logged_method 43 | def override_column_names(self, key, value): 44 | object.__setattr__(self, key, value) 45 | 46 | 47 | @dataclass( 48 | frozen=True, 49 | kw_only=True, 50 | ) 51 | class CCloudConnection: 52 | in_api_key: InitVar[str] = None 53 | in_api_secret: InitVar[str] = None 54 | 55 | base_url: EndpointURL = field(default=EndpointURL.API_URL) 56 | uri: URIDetails = field(default=URIDetails(), init=False) 57 | http_connection: HTTPBasicAuth = field(init=False) 58 | 59 | def __post_init__(self, in_api_key, in_api_secret) -> None: 60 | object.__setattr__(self, "http_connection", HTTPBasicAuth(in_api_key, in_api_secret)) 61 | 62 | @logged_method 63 | def get_endpoint_url(self, key="/") -> str: 64 | if self.base_url is EndpointURL.API_URL: 65 | return self.uri.API_URL + key 66 | else: 67 | return self.uri.TELEMETRY_URL + key 68 | 69 | 70 | @dataclass 71 | class CCloudBase: 72 | in_ccloud_connection: CCloudConnection 73 | 74 | url: str = field(init=False) 75 | http_connection: HTTPBasicAuth = field(init=False) 76 | 77 | def __post_init__(self) -> None: 78 | self.http_connection = self.in_ccloud_connection.http_connection 79 | 80 | @logged_method 81 | def override_auth_type_from_yaml(self, auth_dict: Dict): 82 | LOGGER.debug(f"Trying to override auth type") 83 | if auth_dict.get("enable_auth", False): 84 | LOGGER.debug(f"Enable Auth Flag is found in the config with value {auth_dict.get('enable_auth', False)}.") 85 | if auth_dict.get("auth_type") == "HTTPBasicAuth": 86 | LOGGER.debug(f"Setting Auth Type as HTTPBasicAuth") 87 | self.http_connection = HTTPBasicAuth(**auth_dict.get("auth_args")) 88 | elif auth_dict.get("auth_type") == "HTTPDigestAuth": 89 | LOGGER.debug(f"Setting Auth Type as HTTPDigestAuth") 90 | self.http_connection = HTTPDigestAuth(**auth_dict.get("auth_args")) 91 | else: 92 | # Other AUTH Types are not implemented yet. 93 | LOGGER.debug(f"Unsupported Auth Type received. Value: {auth_dict.get('enable_auth', False)}") 94 | LOGGER.debug(f"Setting Auth Type as None") 95 | self.http_connection = None 96 | else: 97 | LOGGER.debug(f"Enable Auth Flag is set to false.") 98 | self.http_connection = None 99 | 100 | @logged_method 101 | def read_from_api(self, params={"page_size": 500}, **kwagrs): 102 | LOGGER.info(f"Reading from API: {self.url}") 103 | resp = requests.get(url=self.url, auth=self.http_connection, timeout=10, params=params) 104 | if resp.status_code == 200: 105 | LOGGER.debug("Received 200 OK from API") 106 | out_json = resp.json() 107 | if out_json is not None and out_json["data"] is not None: 108 | LOGGER.info(f"Found {len(out_json['data'])} items in API response.") 109 | for item in out_json["data"]: 110 | yield item 111 | if "next" in out_json["metadata"] and out_json["metadata"]["next"]: 112 | query_params = parse.parse_qs(parse.urlsplit(out_json["metadata"]["next"]).query) 113 | params["page_token"] = str(query_params["page_token"][0]) 114 | LOGGER.info(f"Found next page token: {params['page_token']}. Grabbing next page.") 115 | self.read_from_api(params) 116 | elif resp.status_code == 429: 117 | LOGGER.info(f"CCloud API Per-Minute Limit exceeded. Sleeping for 45 seconds. Error stack: {resp.text}") 118 | sleep(45) 119 | LOGGER.info("Timer up. Resuming CCloud API scrape.") 120 | self.read_from_api(params) 121 | else: 122 | LOGGER.error("Error stack: " + resp.text) 123 | raise Exception("Could not connect to Confluent Cloud. Please check your settings. " + resp.text) 124 | -------------------------------------------------------------------------------- /src/ccloud/org.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from copy import deepcopy 4 | from dataclasses import InitVar, dataclass, field 5 | from typing import Dict, List 6 | 7 | import pandas as pd 8 | 9 | from ccloud.connections import CCloudConnection, EndpointURL 10 | from data_processing.data_handlers.billing_api_handler import CCloudBillingHandler 11 | from data_processing.data_handlers.ccloud_api_handler import CCloudObjectsHandler 12 | from data_processing.data_handlers.chargeback_handler import CCloudChargebackHandler 13 | from data_processing.data_handlers.prom_fetch_stats_handler import PrometheusStatusMetricsDataHandler, ScrapeType 14 | from data_processing.data_handlers.prom_metrics_api_handler import PrometheusMetricsDataHandler 15 | from helpers import logged_method, sanitize_id 16 | from internal_data_probe import set_current_exposed_date, set_readiness 17 | from prometheus_processing.custom_collector import TimestampedCollector 18 | from prometheus_processing.notifier import NotifierAbstract, Observer 19 | 20 | LOGGER = logging.getLogger(__name__) 21 | 22 | 23 | scrape_status_metrics = TimestampedCollector( 24 | "confluent_cloud_custom_scrape_status", 25 | "CCloud Scrape Status for various object types", 26 | ["object_type"], 27 | in_begin_timestamp=datetime.datetime.now(), 28 | ) 29 | 30 | 31 | @dataclass(kw_only=True) 32 | class CCloudOrg(Observer): 33 | in_org_details: InitVar[List | None] = None 34 | in_days_in_memory: InitVar[int] = field(default=7) 35 | org_id: str 36 | 37 | objects_handler: CCloudObjectsHandler = field(init=False) 38 | metrics_handler: PrometheusMetricsDataHandler = field(init=False) 39 | status_metrics_handler: PrometheusStatusMetricsDataHandler = field(init=False) 40 | billing_handler: CCloudBillingHandler = field(init=False) 41 | chargeback_handler: CCloudChargebackHandler = field(init=False) 42 | exposed_metrics_datetime: datetime.datetime = field(init=False) 43 | epoch_start_date: datetime.datetime = field(init=False) 44 | exposed_end_date: datetime.datetime = field(init=False) 45 | reset_counter: int = field(default=0, init=False) 46 | 47 | def __post_init__(self, in_org_details, in_days_in_memory) -> None: 48 | Observer.__init__(self) 49 | LOGGER.debug(f"Sanitizing Org ID {in_org_details['id']}") 50 | self.org_id = sanitize_id(in_org_details["id"]) 51 | LOGGER.debug(f"Initializing CCloudOrg for Org ID: {self.org_id}") 52 | # This start date is calculated from the now time to rewind back 365 days as that is the 53 | # time limit of the billing dataset which is available to us. We will need the metrics handler 54 | # to try and get the data from that far back as well. 55 | # start_date = datetime.datetime.utcnow().replace( 56 | # minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc 57 | # ) + datetime.timedelta(days=-30, hours=+1) 58 | 59 | # This following action is required as for the first run we need to derive the start date. 60 | # So we step back by 1 hour, so that the current hour slice is returned. 61 | lookback_days = int(in_org_details.get("ccloud_details", {}).get("total_lookback_days", 200)) * -1 62 | self.exposed_metrics_datetime = datetime.datetime.utcnow().replace( 63 | minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc 64 | ) + datetime.timedelta(days=lookback_days, hours=+1) 65 | LOGGER.debug(f"Starting Exposed Metrics Datetime: {self.exposed_metrics_datetime}") 66 | set_current_exposed_date(exposed_date=self.exposed_metrics_datetime) 67 | 68 | self.epoch_start_date = deepcopy(self.exposed_metrics_datetime) 69 | LOGGER.debug(f"Epoch Start Date: {self.epoch_start_date}") 70 | 71 | self.exposed_end_date = datetime.datetime.utcnow().replace( 72 | hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc 73 | ) - datetime.timedelta(days=2) 74 | LOGGER.debug(f"Exposed End Date: {self.exposed_end_date}") 75 | 76 | # Initialize the Scrape Metrics Handler 77 | # This is used for checking when was the last scrape stored in Prometheus 78 | # and where to resume the scrape 79 | LOGGER.info(f"Initializing Prometheus Status Metrics Handler for Org ID: {self.org_id}") 80 | self.status_metrics_handler = PrometheusStatusMetricsDataHandler( 81 | in_prometheus_url=in_org_details["prometheus_details"]["chargeback_datastore"]["prometheus_url"], 82 | ) 83 | 84 | next_fetch_date = self.locate_next_fetch_date(start_date=self.exposed_metrics_datetime) 85 | LOGGER.info(f"Initial Fetch Date after checking chargeback status in Prometheus: {next_fetch_date}") 86 | 87 | LOGGER.debug(f"Initializing CCloud Objects Handler for Org ID: {self.org_id}") 88 | # Initialize the CCloud Objects Handler 89 | self.objects_handler = CCloudObjectsHandler( 90 | in_ccloud_connection=CCloudConnection( 91 | in_api_key=in_org_details["ccloud_details"]["ccloud_api"]["api_key"], 92 | in_api_secret=in_org_details["ccloud_details"]["ccloud_api"]["api_secret"], 93 | base_url=EndpointURL.API_URL, 94 | ), 95 | start_date=next_fetch_date, 96 | ) 97 | 98 | LOGGER.debug(f"Initializing CCloud Billing Handler for Org ID: {self.org_id}") 99 | # Initialize the Billing API Handler 100 | self.billing_handler = CCloudBillingHandler( 101 | in_ccloud_connection=CCloudConnection( 102 | in_api_key=in_org_details["ccloud_details"]["billing_api"]["api_key"], 103 | in_api_secret=in_org_details["ccloud_details"]["billing_api"]["api_secret"], 104 | base_url=EndpointURL.API_URL, 105 | ), 106 | start_date=next_fetch_date, 107 | objects_dataset=self.objects_handler, 108 | ) 109 | 110 | LOGGER.debug(f"Initializing Prometheus Metrics Handler for Org ID: {self.org_id}") 111 | # Initialize the Metrics Handler 112 | self.metrics_handler = PrometheusMetricsDataHandler( 113 | in_ccloud_connection=CCloudConnection( 114 | in_api_key=in_org_details["ccloud_details"]["metrics_api"]["api_key"], 115 | in_api_secret=in_org_details["ccloud_details"]["metrics_api"]["api_secret"], 116 | ), 117 | in_prometheus_url=in_org_details["prometheus_details"]["metrics_api_datastore"]["prometheus_url"], 118 | in_connection_kwargs=in_org_details["prometheus_details"]["metrics_api_datastore"]["connection_params"], 119 | in_connection_auth=in_org_details.get("prometheus_details", dict()) 120 | .get("metrics_api_datastore", dict()) 121 | .get("auth", dict()), 122 | start_date=next_fetch_date, 123 | ) 124 | 125 | LOGGER.debug(f"Initializing CCloud Chargeback Handler for Org ID: {self.org_id}") 126 | # Initialize the Chargeback Object Handler 127 | self.chargeback_handler = CCloudChargebackHandler( 128 | billing_dataset=self.billing_handler, 129 | objects_dataset=self.objects_handler, 130 | metrics_dataset=self.metrics_handler, 131 | start_date=next_fetch_date, 132 | ) 133 | 134 | LOGGER.debug(f"Attaching CCloudOrg to notifier {scrape_status_metrics._name} for Org ID: {self.org_id}") 135 | self.attach(notifier=scrape_status_metrics) 136 | # self.update(notifier=scrape_status_metrics) 137 | 138 | @logged_method 139 | def update(self, notifier: NotifierAbstract): 140 | self.exposed_end_date = datetime.datetime.utcnow().replace( 141 | hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc 142 | ) - datetime.timedelta(days=2) 143 | next_ts_in_dt = self.locate_next_fetch_date(start_date=self.exposed_metrics_datetime, is_notifier_update=True) 144 | if next_ts_in_dt < self.exposed_end_date: 145 | if next_ts_in_dt == self.exposed_metrics_datetime: 146 | LOGGER.debug( 147 | f"Next Fetch Date is same as the current fetch date. Clearing out the stats to prevent republishing of the same data." 148 | ) 149 | notifier.clear() 150 | self.objects_handler.force_clear_prom_metrics() 151 | self.chargeback_handler.force_clear_prom_metrics() 152 | else: 153 | set_readiness(readiness_flag=False) 154 | notifier.clear() 155 | notifier.set_timestamp(curr_timestamp=next_ts_in_dt) 156 | # self.expose_prometheus_metrics(ts_filter=next_ts) 157 | LOGGER.info(f"Refreshing CCloud Existing Objects Data") 158 | self.objects_handler.execute_requests(exposed_timestamp=next_ts_in_dt) 159 | notifier.labels("ccloud_objects").set(1) 160 | LOGGER.info(f"Gathering Metrics API Data") 161 | self.metrics_handler.execute_requests(exposed_timestamp=next_ts_in_dt) 162 | LOGGER.info(f"Checking for new Billing CSV Files") 163 | self.billing_handler.execute_requests(exposed_timestamp=next_ts_in_dt) 164 | LOGGER.info("Calculating next dataset for chargeback") 165 | self.chargeback_handler.execute_requests(exposed_timestamp=next_ts_in_dt) 166 | notifier.labels("billing_chargeback").set(1) 167 | self.exposed_metrics_datetime = next_ts_in_dt 168 | LOGGER.info(f"Fetch Date: {next_ts_in_dt}") 169 | set_current_exposed_date(exposed_date=next_ts_in_dt) 170 | set_readiness(readiness_flag=True) 171 | else: 172 | LOGGER.info( 173 | f"""Chargeback calculation is fully caught up to the point where it needs to be. 174 | More processing will continue after the day passes and the data for the day is finalized in the Billing API.""" 175 | ) 176 | 177 | @logged_method 178 | def locate_next_fetch_date( 179 | self, start_date: datetime.datetime, is_notifier_update: bool = False 180 | ) -> datetime.datetime: 181 | # TODO; Current fetch is totally naive and finds the first gap in Chargeback dataset only. 182 | # Long term thought is to diverge datetimes for multiple objects as necessary. 183 | # This will prevent naive re-calculations for other objects as well. 184 | # The reason we do it with CB object right now is 2 fold: 185 | # 1. simple :) 186 | # 2. as we scrape all the datasets in the same scrape and align the same dates, right now everything will work. 187 | # Once we diverge scrapes on their own dates, we will need to enhance this method to return specific values. 188 | next_ts_in_dt = start_date 189 | self.reset_counter += 1 190 | # Add a reset intelligence marker to rewind just in case any time slot is missed and we need to go back in time to 191 | # fetch the data set again. Yes, this will be a little bit of a performance hit, but it is better than missing data. 192 | if self.reset_counter > 50: 193 | LOGGER.debug("Rewinding back to the start date to ensure no data is missed.") 194 | self.reset_counter = 0 195 | next_ts_in_dt = self.epoch_start_date 196 | is_notifier_update = False 197 | for next_date in pd.date_range( 198 | start=start_date, 199 | end=self.exposed_end_date, 200 | freq="1H", 201 | inclusive="neither" if is_notifier_update else "left", 202 | ): 203 | next_ts_in_dt = next_date.to_pydatetime(warn=False) 204 | if not self.status_metrics_handler.is_dataset_present( 205 | scrape_type=ScrapeType.BillingChargeback, 206 | ts_in_millis=self.status_metrics_handler.convert_dt_to_ts(next_ts_in_dt), 207 | ): 208 | # return the immediate gap datetime in the series 209 | return next_ts_in_dt 210 | # if no dates are found, use the last fetch date 211 | return next_ts_in_dt 212 | 213 | 214 | @dataclass(kw_only=True) 215 | class CCloudOrgList: 216 | in_orgs: InitVar[List | None] = None 217 | in_days_in_memory: InitVar[int] = field(default=7) 218 | 219 | orgs: Dict[str, CCloudOrg] = field(default_factory=dict, init=False) 220 | 221 | def __post_init__(self, in_orgs, in_days_in_memory) -> None: 222 | LOGGER.info("Initializing CCloudOrgList") 223 | req_count = 0 224 | for org_item in in_orgs: 225 | temp = CCloudOrg( 226 | in_org_details=org_item, 227 | in_days_in_memory=in_days_in_memory, 228 | org_id=org_item["id"] if org_item["id"] else req_count, 229 | ) 230 | self.__add_org_to_cache(ccloud_org=temp) 231 | LOGGER.debug("Initialization Complete.") 232 | LOGGER.debug("marking readiness") 233 | set_readiness(readiness_flag=True) 234 | 235 | @logged_method 236 | def __add_org_to_cache(self, ccloud_org: CCloudOrg) -> None: 237 | self.orgs[ccloud_org.org_id] = ccloud_org 238 | 239 | @logged_method 240 | def execute_requests(self): 241 | for org_item in self.orgs.values(): 242 | org_item.execute_requests() 243 | 244 | @logged_method 245 | def run_calculations(self): 246 | for org_id, org in self.orgs.items(): 247 | org.run_calculations() 248 | -------------------------------------------------------------------------------- /src/data_processing/data_handlers/billing_api_handler.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from dataclasses import dataclass, field 4 | from decimal import Decimal 5 | from typing import List, Tuple 6 | 7 | import pandas as pd 8 | 9 | from ccloud.connections import CCloudBase 10 | from data_processing.data_handlers.ccloud_api_handler import CCloudObjectsHandler 11 | from data_processing.data_handlers.types import AbstractDataHandler 12 | from helpers import logged_method 13 | from prometheus_processing.custom_collector import TimestampedCollector 14 | from prometheus_processing.notifier import NotifierAbstract 15 | 16 | LOGGER = logging.getLogger(__name__) 17 | 18 | 19 | class BillingAPIColumnNames: 20 | env_id = "EnvironmentID" 21 | cluster_id = "LogicalClusterID" 22 | cluster_name = "LogicalClusterName" 23 | product_name = "Product" 24 | product_type = "Type" 25 | quantity = "Quantity" 26 | orig_amt = "OriginalAmount" 27 | total = "Total" 28 | price = "Price" 29 | 30 | calc_timestamp = "Interval" 31 | calc_split_quantity = "QuantityAfterSplit" 32 | calc_split_amt = "AmountAfterSplit" 33 | calc_split_total = "TotalAfterSplit" 34 | 35 | 36 | BILLING_API_COLUMNS = BillingAPIColumnNames() 37 | 38 | billing_api_prom_metrics = TimestampedCollector( 39 | "confluent_cloud_billing_details", 40 | "Confluent Cloud Costs API data distribution details divided on a per hour basis", 41 | [ 42 | "env_id", 43 | "kafka_cluster_id", 44 | "kafka_cluster_unknown_reason", 45 | "resource_id", 46 | "product_name", 47 | "product_line_type", 48 | ], 49 | in_begin_timestamp=datetime.datetime.now(), 50 | ) 51 | 52 | 53 | @dataclass 54 | class CCloudBillingHandler(AbstractDataHandler, CCloudBase): 55 | start_date: datetime.datetime = field(init=True) 56 | objects_dataset: CCloudObjectsHandler = field(init=True) 57 | days_per_query: int = field(default=7) 58 | max_days_in_memory: int = field(default=14) 59 | 60 | billing_dataset: pd.DataFrame = field(init=False, default=None) 61 | last_available_date: datetime.datetime = field(init=False) 62 | curr_export_datetime: datetime.datetime = field(init=False) 63 | 64 | def __post_init__(self) -> None: 65 | # Initialize the super classes to set the internal attributes 66 | AbstractDataHandler.__init__(self, start_date=self.start_date) 67 | CCloudBase.__post_init__(self) 68 | self.url = self.in_ccloud_connection.get_endpoint_url(key=self.in_ccloud_connection.uri.get_billing_costs) 69 | LOGGER.info(f"Initialized the Billing API Handler with URL: {self.url}") 70 | # Calculate the end_date from start_date plus number of days per query 71 | end_date = self.start_date + datetime.timedelta(days=self.days_per_query) 72 | # Set up params for querying the Billing API 73 | self.read_all(start_date=self.start_date, end_date=end_date) 74 | self.curr_export_datetime = self.start_date 75 | self.update(notifier=billing_api_prom_metrics) 76 | 77 | self.last_available_date = end_date 78 | LOGGER.info(f"Initialized the Billing API Handler with last available date: {self.last_available_date}") 79 | 80 | @logged_method 81 | def update(self, notifier: NotifierAbstract) -> None: 82 | """This is the Observer class method implementation that helps us step through the next timestamp in sequence. 83 | The Data for next timestamp is also populated in the Gauge implementation using this method. 84 | It also tracks the currently exported timestamp in Observer as well as update it to the Notifier. 85 | 86 | Args: 87 | notifier (NotifierAbstract): This objects is used to get updates from the notifier that the collection for on timestamp is complete and the dataset should be refreshed for the next timestamp. 88 | """ 89 | curr_ts = pd.date_range(self.curr_export_datetime, freq="1H", periods=2)[0] 90 | notifier.set_timestamp(curr_timestamp=self.curr_export_datetime) 91 | # chargeback_prom_status_metrics.set_timestamp(curr_timestamp=self.curr_export_datetime) 92 | self.expose_prometheus_metrics(ts_filter=curr_ts) 93 | 94 | @logged_method 95 | def expose_prometheus_metrics(self, ts_filter: pd.Timestamp): 96 | """Set and expose the metrics to the prom collector as a Gauge. 97 | 98 | Args: 99 | ts_filter (pd.Timestamp): This Timestamp allows us to filter the data from the entire data set 100 | to a specific timestamp and expose it to the prometheus collector 101 | """ 102 | LOGGER.debug( 103 | "Exposing Prometheus Metrics for Billing dataset for timestamp: " + str(ts_filter.to_pydatetime()) 104 | ) 105 | self.force_clear_prom_metrics() 106 | out, is_none = self._get_dataset_for_exact_timestamp( 107 | dataset=self.billing_dataset, ts_column_name=BILLING_API_COLUMNS.calc_timestamp, time_slice=ts_filter 108 | ) 109 | if not is_none: 110 | for df_row in out.itertuples(name="BillingData"): 111 | env_id = df_row[0][1] 112 | resource_id = df_row[0][2] 113 | product_name = df_row[0][3] 114 | product_line_type = df_row[0][4] 115 | cost = df_row[8] 116 | 117 | kafka_cluster_list, not_found_reason = self.get_connected_kafka_cluster_id( 118 | env_id=env_id, resource_id=resource_id 119 | ) 120 | for item in kafka_cluster_list: 121 | billing_api_prom_metrics.labels( 122 | env_id, 123 | item, 124 | not_found_reason, 125 | resource_id, 126 | product_name, 127 | product_line_type, 128 | ).set(cost / len(kafka_cluster_list)) 129 | 130 | @logged_method 131 | def get_connected_kafka_cluster_id(self, env_id: str, resource_id: str) -> Tuple[List[str], str]: 132 | """This method is used to get the connected Kafka Cluster ID for a given Billing API resource ID 133 | 134 | Args: 135 | env_id (str): The environment ID for the resource 136 | resource_id (str): The resource ID for which the connected Kafka Cluster ID is required 137 | 138 | Returns: 139 | str: The connected Kafka Cluster ID 140 | """ 141 | return self.objects_dataset.get_connected_kafka_cluster_id(env_id=env_id, resource_id=resource_id) 142 | 143 | @logged_method 144 | def force_clear_prom_metrics(self): 145 | billing_api_prom_metrics.clear() 146 | 147 | @logged_method 148 | def read_all( 149 | self, start_date: datetime.datetime, end_date: datetime.datetime, params={"page_size": 2000}, **kwargs 150 | ): 151 | params["start_date"] = str(start_date.date()) 152 | params["end_date"] = str(end_date.date()) 153 | LOGGER.debug(f"Reading from Billing API with params: {params}") 154 | for item in self.read_from_api(params=params): 155 | item_start_date = datetime.datetime.strptime(item["start_date"], "%Y-%m-%d") 156 | item_end_date = datetime.datetime.strptime(item["end_date"], "%Y-%m-%d") 157 | temp_date_range = self._generate_date_range_per_row(start_date=item_start_date, end_date=item_end_date) 158 | temp_data = [ 159 | { 160 | BILLING_API_COLUMNS.calc_timestamp: x, 161 | BILLING_API_COLUMNS.env_id: item.get("resource", {}) 162 | .get("environment", {}) 163 | .get("id", f"MISSING_DATA_{idx}"), 164 | BILLING_API_COLUMNS.cluster_id: item.get("resource", {}).get("id", f"MISSING_DATA_{idx}"), 165 | BILLING_API_COLUMNS.cluster_name: item.get("resource", {}).get( 166 | "display_name", f"MISSING_DATA_{idx}" 167 | ), 168 | BILLING_API_COLUMNS.product_name: item.get("product", f"MISSING_DATA_{idx}"), 169 | BILLING_API_COLUMNS.product_type: item.get("line_type", f"MISSING_DATA_{idx}"), 170 | BILLING_API_COLUMNS.quantity: item.get("quantity", 1), 171 | BILLING_API_COLUMNS.orig_amt: item.get("original_amount", 0), 172 | BILLING_API_COLUMNS.total: item.get("amount", 0), 173 | BILLING_API_COLUMNS.price: item.get("price", 0), 174 | BILLING_API_COLUMNS.calc_split_quantity: Decimal(item.get("quantity", 1)) / 24, 175 | BILLING_API_COLUMNS.calc_split_amt: Decimal(item.get("original_amount", 0)) / 24, 176 | BILLING_API_COLUMNS.calc_split_total: Decimal(item.get("amount", 0)) / 24, 177 | } 178 | for idx, x in enumerate(temp_date_range) 179 | ] 180 | if temp_data is not None: 181 | if self.billing_dataset is not None: 182 | LOGGER.debug(f"Appending new Billing data to the existing dataset") 183 | self.billing_dataset = pd.concat( 184 | [ 185 | self.billing_dataset, 186 | pd.DataFrame.from_records( 187 | temp_data, 188 | index=[ 189 | BILLING_API_COLUMNS.calc_timestamp, 190 | BILLING_API_COLUMNS.env_id, 191 | BILLING_API_COLUMNS.cluster_id, 192 | BILLING_API_COLUMNS.product_name, 193 | BILLING_API_COLUMNS.product_type, 194 | ], 195 | ), 196 | ] 197 | ) 198 | else: 199 | LOGGER.debug(f"Initializing the Billing dataset with new data") 200 | self.billing_dataset = pd.DataFrame.from_records( 201 | temp_data, 202 | index=[ 203 | BILLING_API_COLUMNS.calc_timestamp, 204 | BILLING_API_COLUMNS.env_id, 205 | BILLING_API_COLUMNS.cluster_id, 206 | BILLING_API_COLUMNS.product_name, 207 | BILLING_API_COLUMNS.product_type, 208 | ], 209 | ) 210 | 211 | @logged_method 212 | def read_next_dataset(self, exposed_timestamp: datetime.datetime): 213 | LOGGER.debug("Reading the next dataset for Billing API") 214 | if self.is_next_fetch_required(exposed_timestamp, self.last_available_date, 2): 215 | LOGGER.debug("Next fetch is required for Billing API") 216 | effective_dates = self.calculate_effective_dates( 217 | self.last_available_date, self.days_per_query, self.max_days_in_memory 218 | ) 219 | LOGGER.debug(f"Effective dates for Billing API: {effective_dates}") 220 | self.read_all( 221 | start_date=effective_dates.next_fetch_start_date, end_date=effective_dates.next_fetch_end_date 222 | ) 223 | self.last_available_date = effective_dates.next_fetch_end_date 224 | LOGGER.debug("Trimming Billing dataset to max days in memory per config") 225 | self.billing_dataset, is_none = self.get_dataset_for_timerange( 226 | start_datetime=effective_dates.retention_start_date, end_datetime=effective_dates.retention_end_date 227 | ) 228 | self.curr_export_datetime = exposed_timestamp 229 | self.update(notifier=billing_api_prom_metrics) 230 | 231 | @logged_method 232 | def get_dataset_for_timerange(self, start_datetime: datetime.datetime, end_datetime: datetime.datetime, **kwargs): 233 | """Wrapper over the internal method so that cross-imports are not necessary 234 | 235 | Args: 236 | start_datetime (datetime.datetime): Inclusive Start datetime 237 | end_datetime (datetime.datetime): Exclusive end datetime 238 | 239 | Returns: 240 | pd.Dataframe: Returns a pandas dataframe with the filtered data 241 | """ 242 | return self._get_dataset_for_timerange( 243 | dataset=self.billing_dataset, 244 | ts_column_name=BILLING_API_COLUMNS.calc_timestamp, 245 | start_datetime=start_datetime, 246 | end_datetime=end_datetime, 247 | ) 248 | 249 | @logged_method 250 | def get_dataset_for_time_slice(self, time_slice: pd.Timestamp, **kwargs): 251 | """Wrapper over the internal method so that cross-imports are not necessary 252 | 253 | Args: 254 | time_slice (pd.Timestamp): Time slice to be used for fetching the data from datafame for the exact timestamp 255 | 256 | Returns: 257 | pd.DataFrame: Returns a pandas Dataframe with the filtered data. 258 | """ 259 | temp_data, is_none = self._get_dataset_for_exact_timestamp( 260 | dataset=self.billing_dataset, ts_column_name=BILLING_API_COLUMNS.calc_timestamp, time_slice=time_slice 261 | ) 262 | if is_none: 263 | return pd.DataFrame( 264 | data={}, 265 | index=[ 266 | BILLING_API_COLUMNS.calc_timestamp, 267 | BILLING_API_COLUMNS.env_id, 268 | BILLING_API_COLUMNS.cluster_id, 269 | BILLING_API_COLUMNS.product_name, 270 | BILLING_API_COLUMNS.product_type, 271 | ], 272 | ) 273 | else: 274 | return temp_data 275 | -------------------------------------------------------------------------------- /src/data_processing/data_handlers/ccloud_api_handler.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from dataclasses import dataclass, field 4 | from typing import List, Tuple 5 | 6 | from ccloud.ccloud_api.api_keys import CCloudAPIKeyList 7 | from ccloud.ccloud_api.clusters import CCloudClusterList 8 | from ccloud.ccloud_api.connectors import CCloudConnectorList 9 | from ccloud.ccloud_api.environments import CCloudEnvironmentList 10 | from ccloud.ccloud_api.ksqldb_clusters import CCloudKsqldbClusterList 11 | from ccloud.ccloud_api.service_accounts import CCloudServiceAccountList 12 | from ccloud.ccloud_api.user_accounts import CCloudUserAccountList 13 | from ccloud.connections import CCloudBase 14 | from data_processing.data_handlers.types import AbstractDataHandler 15 | from helpers import logged_method 16 | 17 | LOGGER = logging.getLogger(__name__) 18 | 19 | 20 | @dataclass 21 | class CCloudObjectsHandler(AbstractDataHandler, CCloudBase): 22 | last_refresh: datetime.datetime | None = field(init=False, default=None) 23 | min_refresh_gap: datetime.timedelta = field(init=False, default=datetime.timedelta(minutes=30)) 24 | cc_sa: CCloudServiceAccountList = field(init=False) 25 | cc_users: CCloudUserAccountList = field(init=False) 26 | cc_api_keys: CCloudAPIKeyList = field(init=False) 27 | cc_environments: CCloudEnvironmentList = field(init=False) 28 | cc_clusters: CCloudClusterList = field(init=False) 29 | cc_connectors: CCloudConnectorList = field(init=False) 30 | cc_ksqldb_clusters: CCloudKsqldbClusterList = field(init=False) 31 | 32 | def __post_init__(self) -> None: 33 | LOGGER.debug(f"Initializing CCloudObjectsHandler") 34 | # Initialize the super classes to set the internal attributes 35 | AbstractDataHandler.__init__(self, start_date=self.start_date) 36 | CCloudBase.__post_init__(self) 37 | self.last_refresh = datetime.datetime.now() - self.min_refresh_gap 38 | effective_dates = self.calculate_effective_dates( 39 | last_available_date=self.start_date, days_per_query=1, max_days_in_memory=1 40 | ) 41 | # self.read_all(exposed_timestamp=effective_dates.curr_end_date) 42 | self.read_next_dataset(exposed_timestamp=effective_dates.curr_end_date) 43 | LOGGER.debug(f"Finished Initializing CCloudObjectsHandler") 44 | 45 | @logged_method 46 | def read_all(self, exposed_timestamp: datetime.datetime = None): 47 | if self.min_refresh_gap > datetime.datetime.now() - self.last_refresh: 48 | # TODO: Add Refresh gap as a configurable value in YAML file 49 | LOGGER.info(f"Not refreshing the CCloud Object state -- TimeDelta is not enough. {self.min_refresh_gap}") 50 | else: 51 | LOGGER.info(f"Starting CCloud Object refresh now -- {datetime.datetime.now()}") 52 | LOGGER.info(f"Refreshing CCloud Service Accounts") 53 | self.cc_sa = CCloudServiceAccountList( 54 | in_ccloud_connection=self.in_ccloud_connection, 55 | exposed_timestamp=exposed_timestamp, 56 | ) 57 | LOGGER.info(f"Refreshing CCloud User Accounts") 58 | self.cc_users = CCloudUserAccountList( 59 | in_ccloud_connection=self.in_ccloud_connection, 60 | exposed_timestamp=exposed_timestamp, 61 | ) 62 | LOGGER.info(f"Refreshing CCloud API Keys") 63 | self.cc_api_keys = CCloudAPIKeyList( 64 | in_ccloud_connection=self.in_ccloud_connection, 65 | exposed_timestamp=exposed_timestamp, 66 | ) 67 | LOGGER.info(f"Refreshing CCloud Environments") 68 | self.cc_environments = CCloudEnvironmentList( 69 | in_ccloud_connection=self.in_ccloud_connection, 70 | exposed_timestamp=exposed_timestamp, 71 | ) 72 | LOGGER.info(f"Refreshing CCloud Kafka Clusters") 73 | self.cc_clusters = CCloudClusterList( 74 | in_ccloud_connection=self.in_ccloud_connection, 75 | ccloud_envs=self.cc_environments, 76 | exposed_timestamp=exposed_timestamp, 77 | ) 78 | LOGGER.info(f"Refreshing CCloud Connectors") 79 | self.cc_connectors = CCloudConnectorList( 80 | in_ccloud_connection=self.in_ccloud_connection, 81 | ccloud_kafka_clusters=self.cc_clusters, 82 | ccloud_service_accounts=self.cc_sa, 83 | ccloud_users=self.cc_users, 84 | ccloud_api_keys=self.cc_api_keys, 85 | exposed_timestamp=exposed_timestamp, 86 | ) 87 | LOGGER.info(f"Refreshing CCloud KSQLDB Clusters") 88 | self.cc_ksqldb_clusters = CCloudKsqldbClusterList( 89 | in_ccloud_connection=self.in_ccloud_connection, 90 | ccloud_envs=self.cc_environments, 91 | exposed_timestamp=exposed_timestamp, 92 | ) 93 | self.last_refresh = datetime.datetime.now() 94 | LOGGER.info(f"Finished CCloud Object refresh -- {self.last_refresh}") 95 | 96 | @logged_method 97 | def read_next_dataset(self, exposed_timestamp: datetime.datetime): 98 | self.read_all(exposed_timestamp=exposed_timestamp) 99 | LOGGER.info(f"Reading Objects dataset for Timestamp: {exposed_timestamp}") 100 | self.cc_sa.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 101 | self.cc_users.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 102 | self.cc_api_keys.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 103 | self.cc_environments.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 104 | self.cc_clusters.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 105 | self.cc_connectors.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 106 | self.cc_ksqldb_clusters.expose_prometheus_metrics(exposed_timestamp=exposed_timestamp) 107 | 108 | @logged_method 109 | def force_clear_prom_metrics(self): 110 | self.cc_sa.force_clear_prom_metrics() 111 | self.cc_users.force_clear_prom_metrics() 112 | self.cc_api_keys.force_clear_prom_metrics() 113 | self.cc_environments.force_clear_prom_metrics() 114 | self.cc_clusters.force_clear_prom_metrics() 115 | self.cc_connectors.force_clear_prom_metrics() 116 | self.cc_ksqldb_clusters.force_clear_prom_metrics() 117 | 118 | @logged_method 119 | def get_dataset_for_timerange(self, start_datetime: datetime.datetime, end_datetime: datetime.datetime, **kwargs): 120 | # TODO: Do we want to narrow down the active dataset for the timelines ? 121 | pass 122 | 123 | @logged_method 124 | def get_connected_kafka_cluster_id(self, env_id: str, resource_id: str) -> Tuple[List[str], str]: 125 | cluster_list = [] 126 | error_string = None 127 | LOGGER.debug(f"Getting connected Kafka cluster(s) for resource_id: {resource_id} in env_id: {env_id}") 128 | if resource_id.startswith("lcc"): 129 | if resource_id in self.cc_connectors.connectors.keys(): 130 | cluster_list.append(self.cc_connectors.connectors[resource_id].cluster_id) 131 | error_string = None 132 | else: 133 | cluster_list.append("unknown") 134 | error_string = "no_data_in_api" 135 | elif resource_id.startswith("lksql"): 136 | if resource_id in self.cc_ksqldb_clusters.ksqldb_clusters.keys(): 137 | cluster_list.append(self.cc_ksqldb_clusters.ksqldb_clusters[resource_id].kafka_cluster_id) 138 | error_string = None 139 | else: 140 | cluster_list.append("unknown") 141 | error_string = "no_data_in_api" 142 | elif resource_id.startswith("lkc"): 143 | cluster_list.append(resource_id) 144 | error_string = None 145 | elif resource_id.startswith("lsr"): 146 | temp_cluster_list = [x.cluster_id for x in self.cc_clusters.clusters.values() if x.env_id == env_id] 147 | if len(temp_cluster_list) > 0: 148 | cluster_list += temp_cluster_list 149 | error_string = None 150 | else: 151 | cluster_list.append(None) 152 | error_string = "no_cluster_in_env" 153 | else: 154 | cluster_list.append("unknown") 155 | error_string = "unknown_resource_type" 156 | LOGGER.debug( 157 | f"Found cluster_list: {cluster_list} and error_string: {error_string} for resource_id: {resource_id} in env_id: {env_id}" 158 | ) 159 | return (cluster_list, error_string) 160 | -------------------------------------------------------------------------------- /src/data_processing/data_handlers/prom_fetch_stats_handler.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from dataclasses import InitVar, dataclass, field 4 | from enum import Enum, auto 5 | from urllib import parse 6 | 7 | import requests 8 | 9 | from helpers import logged_method 10 | 11 | LOGGER = logging.getLogger(__name__) 12 | 13 | 14 | class MetricsAPIPrometheusStatusQueries: 15 | objects_sync_status_name = "ccloud_objects" 16 | chargeback_sync_status_name = "billing_chargeback" 17 | status_query = "confluent_cloud_custom_scrape_status" 18 | 19 | def override_column_names(self, key, value): 20 | object.__setattr__(self, key, value) 21 | 22 | 23 | class ScrapeType(Enum): 24 | BillingChargeback = auto() 25 | CCloudObjects = auto() 26 | 27 | 28 | METRICS_API_PROMETHEUS_STATUS_QUERIES = MetricsAPIPrometheusStatusQueries() 29 | 30 | 31 | @dataclass 32 | class PrometheusStatusMetricsDataHandler: 33 | in_prometheus_url: InitVar[str | None] = field(default="http://localhost:9091") 34 | in_prometheus_query_endpoint: InitVar[str] = field(default="/api/v1/query") 35 | # days_per_query: int = field(default=2) 36 | # max_days_in_memory: int = field(default=30) 37 | 38 | url: str = field(init=False) 39 | # last_available_date: datetime.datetime = field(init=False) 40 | # scrape_status_dataset: Dict = field(init=False, repr=False, default_factory=dict) 41 | 42 | def __post_init__(self, in_prometheus_url, in_prometheus_query_endpoint) -> None: 43 | self.url = parse.urljoin(base=in_prometheus_url, url=in_prometheus_query_endpoint) 44 | LOGGER.debug(f"Current Prometheus URL: {self.url}") 45 | # end_date = self.start_date + datetime.timedelta(days=self.days_per_query) 46 | # Set up params for querying the Billing API 47 | # for item in [ 48 | # METRICS_API_PROMETHEUS_STATUS_QUERIES.status_query, 49 | # ]: 50 | # self.read_all(start_date=self.start_date, end_date=end_date, query_type=item) 51 | # self.last_available_date = end_date 52 | 53 | @logged_method 54 | def convert_dt_to_ts(self, ts_date: datetime.datetime) -> int: 55 | return int(ts_date.timestamp()) 56 | 57 | @logged_method 58 | def is_dataset_present(self, scrape_type: ScrapeType, ts_in_millis: int) -> bool: 59 | # return True if (scrape_type.value, ts_in_millis) in self.scrape_status_dataset.keys() else False 60 | headers = {"Content-Type": "application/x-www-form-urlencoded"} 61 | post_body = {} 62 | post_body["time"] = f"{ts_in_millis}" 63 | post_body[ 64 | "query" 65 | ] = f'{METRICS_API_PROMETHEUS_STATUS_QUERIES.status_query}{{object_type="{METRICS_API_PROMETHEUS_STATUS_QUERIES.chargeback_sync_status_name}"}}' 66 | resp = requests.post(url=self.url, headers=headers, data=post_body) 67 | if resp.status_code == 200: 68 | out_json = resp.json() 69 | if out_json is not None and out_json["data"] is not None: 70 | if out_json["data"]["result"]: 71 | for item in out_json["data"]["result"]: 72 | if ( 73 | item["metric"]["object_type"] 74 | == METRICS_API_PROMETHEUS_STATUS_QUERIES.chargeback_sync_status_name 75 | ): 76 | if item["value"][0] == ts_in_millis: 77 | return True 78 | return False 79 | 80 | # def read_all( 81 | # self, 82 | # start_date: datetime.datetime, 83 | # end_date: datetime.datetime, 84 | # query_type: str, 85 | # params={"step": 3600}, 86 | # **kwargs, 87 | # ): 88 | # headers = {"Content-Type": "application/x-www-form-urlencoded"} 89 | # post_body = {} 90 | # post_body["start"] = f'{start_date.replace(tzinfo=datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")}+00:00' 91 | # post_body["end"] = f'{end_date.replace(tzinfo=datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")}+00:00' 92 | # post_body["step"] = params["step"] 93 | # post_body["query"] = METRICS_API_PROMETHEUS_STATUS_QUERIES.status_query 94 | # resp = requests.post(url=self.url, headers=headers, data=post_body) 95 | # if resp.status_code == 200: 96 | # out_json = resp.json() 97 | # if out_json is not None and out_json["data"] is not None: 98 | # if out_json["data"]["result"]: 99 | # for item in out_json["data"]["result"]: 100 | # if ( 101 | # item["metric"]["object_type"] 102 | # == METRICS_API_PROMETHEUS_STATUS_QUERIES.chargeback_sync_status_name 103 | # ): 104 | # for item_values in item["values"]: 105 | # self.add_to_dataset( 106 | # scrape_type=ScrapeType.BillingChargeback, ts_in_millis=item_values[0] 107 | # ) 108 | # if ( 109 | # item["metric"]["object_type"] 110 | # == METRICS_API_PROMETHEUS_STATUS_QUERIES.objects_sync_status_name 111 | # ): 112 | # for item_values in item["values"]: 113 | # self.add_to_dataset(scrape_type=ScrapeType.CCloudObjects, ts_in_millis=item_values[0]) 114 | 115 | # def add_to_dataset(self, scrape_type: ScrapeType, ts_in_millis: int): 116 | # self.scrape_status_dataset[(scrape_type.value, ts_in_millis)] = None 117 | 118 | # def read_next_dataset(self, exposed_timestamp: datetime.datetime): 119 | # if self.is_next_fetch_required(exposed_timestamp, self.last_available_date, next_fetch_within_days=2): 120 | # effective_dates = self.calculate_effective_dates( 121 | # self.last_available_date, self.days_per_query, self.max_days_in_memory 122 | # ) 123 | # for item in [ 124 | # METRICS_API_PROMETHEUS_STATUS_QUERIES.objects_sync_status_name, 125 | # METRICS_API_PROMETHEUS_STATUS_QUERIES.chargeback_sync_status_name, 126 | # ]: 127 | # self.read_all( 128 | # start_date=effective_dates.next_fetch_start_date, 129 | # end_date=effective_dates.next_fetch_end_date, 130 | # query_type=item, 131 | # ) 132 | # self.last_available_date = effective_dates.next_fetch_end_date 133 | # self.cleanup_old_data(retention_start_date=effective_dates.retention_start_date) 134 | # self.metrics_dataset, is_none = self.get_dataset_for_timerange( 135 | # start_datetime=effective_dates.retention_start_date, end_datetime=effective_dates.retention_end_date 136 | # ) 137 | 138 | # def cleanup_old_data(self, retention_start_date: datetime.datetime): 139 | # start_ts = self.convert_dt_to_ts(ts_date=retention_start_date) 140 | # for (k1, k2), _ in self.scrape_status_dataset.copy().items(): 141 | # if k2 < start_ts: 142 | # del self.scrape_status_dataset[(k1, k2)] 143 | 144 | # def _get_dataset_for_timerange( 145 | # self, 146 | # start_datetime: datetime.datetime, 147 | # end_datetime: datetime.datetime, 148 | # **kwargs, 149 | # ): 150 | # start_ts = self.convert_dt_to_ts(ts_date=start_datetime) 151 | # end_ts = self.convert_dt_to_ts(ts_date=end_datetime) 152 | # for (k1, k2), _ in self.scrape_status_dataset.items(): 153 | # if k2 < start_ts or k2 > end_ts: 154 | # yield self.scrape_status_dataset[(k1, k2)] 155 | 156 | # def get_dataset_for_timerange(self, start_datetime: datetime.datetime, end_datetime: datetime.datetime, **kwargs): 157 | # """Wrapper over the internal method so that cross-imports are not necessary 158 | 159 | # Args: 160 | # start_datetime (datetime.datetime): Inclusive Start datetime 161 | # end_datetime (datetime.datetime): Exclusive end datetime 162 | 163 | # Returns: 164 | # pd.Dataframe: Returns a pandas dataframe with the filtered data 165 | # """ 166 | # yield self._get_dataset_for_timerange( 167 | # start_datetime=start_datetime, 168 | # end_datetime=end_datetime, 169 | # ) 170 | 171 | # def get_dataset_for_time_slice(self, time_slice: pd.Timestamp, **kwargs): 172 | # """Wrapper over the internal method so that cross-imports are not necessary 173 | 174 | # Args: 175 | # time_slice (pd.Timestamp): Time slice to be used for fetching the data from datafame for the exact timestamp 176 | 177 | # Returns: 178 | # pd.DataFrame: Returns a pandas Dataframe with the filtered data. 179 | # """ 180 | # temp_data, is_none = self._get_dataset_for_exact_timestamp( 181 | # dataset=self.metrics_dataset, ts_column_name=METRICS_API_COLUMNS.timestamp, time_slice=time_slice 182 | # ) 183 | # if is_none: 184 | # return pd.DataFrame( 185 | # {}, 186 | # index=[ 187 | # METRICS_API_COLUMNS.timestamp, 188 | # METRICS_API_COLUMNS.query_type, 189 | # METRICS_API_COLUMNS.cluster_id, 190 | # METRICS_API_COLUMNS.principal_id, 191 | # ], 192 | # ) 193 | # else: 194 | # return temp_data 195 | -------------------------------------------------------------------------------- /src/data_processing/data_handlers/prom_metrics_api_handler.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from dataclasses import InitVar, dataclass, field 4 | from typing import Dict 5 | from urllib import parse 6 | 7 | import pandas as pd 8 | import requests 9 | 10 | from ccloud.connections import CCloudBase 11 | from data_processing.data_handlers.types import AbstractDataHandler 12 | from helpers import logged_method 13 | 14 | LOGGER = logging.getLogger(__name__) 15 | 16 | 17 | class MetricsAPIPrometheusQueries: 18 | request_bytes_name = "request_bytes" 19 | response_bytes_name = "response_bytes" 20 | request_bytes = "sum by (kafka_id, principal_id) (confluent_kafka_server_request_bytes)" 21 | response_bytes = "sum by (kafka_id, principal_id) (confluent_kafka_server_response_bytes)" 22 | 23 | def override_column_names(self, key, value): 24 | object.__setattr__(self, key, value) 25 | 26 | 27 | class MetricsAPIColumnNames: 28 | timestamp = "Interval" 29 | query_type = "QueryType" 30 | cluster_id = "KafkaClusterID" 31 | principal_id = "PrincipalID" 32 | value = "Value" 33 | 34 | 35 | METRICS_API_PROMETHEUS_QUERIES = MetricsAPIPrometheusQueries() 36 | METRICS_API_COLUMNS = MetricsAPIColumnNames() 37 | 38 | 39 | @dataclass 40 | class PrometheusMetricsDataHandler(AbstractDataHandler, CCloudBase): 41 | in_prometheus_url: InitVar[str | None] = field(default="http://localhost:9090") 42 | in_prometheus_query_endpoint: InitVar[str] = field(default="/api/v1/query_range") 43 | in_connection_kwargs: Dict = field(default=None) 44 | in_connection_auth: Dict = field(default_factory=dict()) 45 | days_per_query: int = field(default=7) 46 | max_days_in_memory: int = field(default=14) 47 | 48 | last_available_date: datetime.datetime = field(init=False) 49 | url: str = field(init=False) 50 | metrics_dataset: pd.DataFrame = field(init=False, default=None) 51 | 52 | def __post_init__(self, in_prometheus_url, in_prometheus_query_endpoint) -> None: 53 | # Initialize the super classes to set the internal attributes 54 | AbstractDataHandler.__init__(self, start_date=self.start_date) 55 | CCloudBase.__post_init__(self) 56 | LOGGER.info("Setting up Auth Type Supplied by the config file") 57 | self.override_auth_type_from_yaml(self.in_connection_auth) 58 | self.url = parse.urljoin(base=in_prometheus_url, url=in_prometheus_query_endpoint) 59 | LOGGER.debug(f"Prometheus URL: {self.url}") 60 | end_date = self.start_date + datetime.timedelta(days=self.days_per_query) 61 | # Set up params for querying the Billing API 62 | for item in [ 63 | METRICS_API_PROMETHEUS_QUERIES.request_bytes_name, 64 | METRICS_API_PROMETHEUS_QUERIES.response_bytes_name, 65 | ]: 66 | self.read_all(start_date=self.start_date, end_date=end_date, query_type=item) 67 | self.last_available_date = end_date 68 | LOGGER.debug(f"Finished Initializing PrometheusMetricsDataHandler") 69 | 70 | @logged_method 71 | def read_all( 72 | self, 73 | start_date: datetime.datetime, 74 | end_date: datetime.datetime, 75 | query_type: str, 76 | params={"step": 3600}, 77 | **kwargs, 78 | ): 79 | headers = {"Content-Type": "application/x-www-form-urlencoded"} 80 | post_body = {} 81 | post_body["start"] = f'{start_date.replace(tzinfo=datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")}+00:00' 82 | post_body["end"] = f'{end_date.replace(tzinfo=datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")}+00:00' 83 | post_body["step"] = params["step"] 84 | post_body["query"] = METRICS_API_PROMETHEUS_QUERIES.__getattribute__(query_type) 85 | LOGGER.debug(f"Post Body: {post_body}") 86 | resp = requests.post( 87 | url=self.url, auth=self.http_connection, headers=headers, data=post_body, **self.in_connection_kwargs 88 | ) 89 | if resp.status_code == 200: 90 | LOGGER.debug("Received 200 OK from API") 91 | out_json = resp.json() 92 | if out_json is not None and out_json["data"] is not None: 93 | if out_json["data"]["result"]: 94 | LOGGER.info(f"Found {len(out_json['data']['result'])} items in API response.") 95 | for item in out_json["data"]["result"]: 96 | temp_data = [ 97 | { 98 | METRICS_API_COLUMNS.timestamp: pd.to_datetime(in_item[0], unit="s", utc=True), 99 | METRICS_API_COLUMNS.query_type: query_type, 100 | METRICS_API_COLUMNS.cluster_id: item["metric"]["kafka_id"], 101 | METRICS_API_COLUMNS.principal_id: item["metric"]["principal_id"], 102 | METRICS_API_COLUMNS.value: in_item[1], 103 | } 104 | for in_item in item["values"] 105 | ] 106 | if temp_data: 107 | if self.metrics_dataset is not None: 108 | self.metrics_dataset = pd.concat( 109 | [ 110 | self.metrics_dataset, 111 | pd.DataFrame.from_records( 112 | temp_data, 113 | index=[ 114 | METRICS_API_COLUMNS.timestamp, 115 | METRICS_API_COLUMNS.query_type, 116 | METRICS_API_COLUMNS.cluster_id, 117 | METRICS_API_COLUMNS.principal_id, 118 | ], 119 | ), 120 | ] 121 | ) 122 | else: 123 | self.metrics_dataset = pd.DataFrame.from_records( 124 | temp_data, 125 | index=[ 126 | METRICS_API_COLUMNS.timestamp, 127 | METRICS_API_COLUMNS.query_type, 128 | METRICS_API_COLUMNS.cluster_id, 129 | METRICS_API_COLUMNS.principal_id, 130 | ], 131 | ) 132 | else: 133 | LOGGER.debug("No data found in the API response. Response Received is: " + str(out_json)) 134 | else: 135 | raise Exception("Could not connect to Prometheus Server. Please check your settings. " + resp.text) 136 | 137 | @logged_method 138 | def read_next_dataset(self, exposed_timestamp: datetime.datetime): 139 | if self.is_next_fetch_required(exposed_timestamp, self.last_available_date, next_fetch_within_days=2): 140 | effective_dates = self.calculate_effective_dates( 141 | self.last_available_date, self.days_per_query, self.max_days_in_memory 142 | ) 143 | for item in [ 144 | METRICS_API_PROMETHEUS_QUERIES.request_bytes_name, 145 | METRICS_API_PROMETHEUS_QUERIES.response_bytes_name, 146 | ]: 147 | self.read_all( 148 | start_date=effective_dates.next_fetch_start_date, 149 | end_date=effective_dates.next_fetch_end_date, 150 | query_type=item, 151 | ) 152 | self.last_available_date = effective_dates.next_fetch_end_date 153 | self.metrics_dataset, is_none = self.get_dataset_for_timerange( 154 | start_datetime=effective_dates.retention_start_date, end_datetime=effective_dates.retention_end_date 155 | ) 156 | 157 | @logged_method 158 | def get_dataset_for_timerange(self, start_datetime: datetime.datetime, end_datetime: datetime.datetime, **kwargs): 159 | """Wrapper over the internal method so that cross-imports are not necessary 160 | 161 | Args: 162 | start_datetime (datetime.datetime): Inclusive Start datetime 163 | end_datetime (datetime.datetime): Exclusive end datetime 164 | 165 | Returns: 166 | pd.Dataframe: Returns a pandas dataframe with the filtered data 167 | """ 168 | return self._get_dataset_for_timerange( 169 | dataset=self.metrics_dataset, 170 | ts_column_name=METRICS_API_COLUMNS.timestamp, 171 | start_datetime=start_datetime, 172 | end_datetime=end_datetime, 173 | ) 174 | 175 | @logged_method 176 | def get_dataset_for_time_slice(self, time_slice: pd.Timestamp, **kwargs): 177 | """Wrapper over the internal method so that cross-imports are not necessary 178 | 179 | Args: 180 | time_slice (pd.Timestamp): Time slice to be used for fetching the data from datafame for the exact timestamp 181 | 182 | Returns: 183 | pd.DataFrame: Returns a pandas Dataframe with the filtered data. 184 | """ 185 | temp_data, is_none = self._get_dataset_for_exact_timestamp( 186 | dataset=self.metrics_dataset, ts_column_name=METRICS_API_COLUMNS.timestamp, time_slice=time_slice 187 | ) 188 | if is_none: 189 | return pd.DataFrame( 190 | {}, 191 | index=[ 192 | METRICS_API_COLUMNS.timestamp, 193 | METRICS_API_COLUMNS.query_type, 194 | METRICS_API_COLUMNS.cluster_id, 195 | METRICS_API_COLUMNS.principal_id, 196 | ], 197 | ) 198 | else: 199 | return temp_data 200 | -------------------------------------------------------------------------------- /src/data_processing/data_handlers/types.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from abc import ABC, abstractmethod 4 | from dataclasses import dataclass, field 5 | from types import NoneType 6 | from typing import Tuple 7 | 8 | import pandas as pd 9 | 10 | from helpers import logged_method 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | 15 | @dataclass 16 | class EffectiveDates: 17 | curr_start_date: datetime.datetime 18 | curr_end_date: datetime.datetime 19 | next_fetch_start_date: datetime.datetime 20 | next_fetch_end_date: datetime.datetime 21 | retention_start_date: datetime.datetime 22 | retention_end_date: datetime.datetime 23 | 24 | 25 | @dataclass 26 | class AbstractDataHandler(ABC): 27 | start_date: datetime.datetime = field(init=True) 28 | 29 | @abstractmethod 30 | def read_all(self, start_date: datetime.datetime, end_date: datetime.datetime, **kwargs): 31 | pass 32 | 33 | @abstractmethod 34 | def get_dataset_for_timerange(self, start_datetime: datetime.datetime, end_datetime: datetime.datetime, **kwargs): 35 | pass 36 | 37 | @abstractmethod 38 | def read_next_dataset(self, exposed_timestamp: datetime.datetime): 39 | pass 40 | 41 | @logged_method 42 | def _generate_date_range_per_row( 43 | self, 44 | start_date: datetime.datetime, 45 | end_date: datetime.datetime, 46 | freq: str = "1H", 47 | ): 48 | start_date = start_date.replace(tzinfo=datetime.timezone.utc).combine( 49 | date=start_date.date(), 50 | time=datetime.time.min, 51 | tzinfo=datetime.timezone.utc, 52 | ) 53 | end_date = end_date.replace(tzinfo=datetime.timezone.utc).combine( 54 | date=end_date.date(), 55 | time=datetime.time.min, 56 | tzinfo=datetime.timezone.utc, 57 | ) 58 | end_date = end_date - datetime.timedelta(minutes=1) 59 | return pd.date_range(start_date, end_date, freq=freq) 60 | 61 | @logged_method 62 | def _generate_next_timestamp( 63 | self, curr_date: datetime.datetime, freq: str = "1H", position: int = 1 64 | ) -> pd.Timestamp: 65 | """Generates the pandas Timestamp item from the provided datetime object based on the frequency provided. 66 | position == 0 for converting current datetime to the pandas timestamp object 67 | position == 1 for converting next in freq sequence item to the pandas timestamp object 68 | 69 | Args: 70 | curr_date (datetime.datetime): datetime object which will be converted 71 | freq (str, optional): pandas freq object compatible string. Defaults to "1H". 72 | position (int, optional): which positional item will be converted to the pandas timestamp object. Defaults to 1. 73 | 74 | Returns: 75 | pd.Timestamp: converted timestamp object 76 | """ 77 | start_date = curr_date.replace(minute=0, microsecond=0, tzinfo=datetime.timezone.utc) 78 | return pd.date_range(start_date, freq=freq, periods=2)[position] 79 | 80 | @logged_method 81 | def _get_dataset_for_timerange( 82 | self, 83 | dataset: pd.DataFrame, 84 | ts_column_name: str, 85 | start_datetime: datetime.datetime, 86 | end_datetime: datetime.datetime, 87 | **kwargs 88 | ): 89 | """Converts the chargeback dict stored internally to a dataframe and filter the data using the args 90 | 91 | Args: 92 | dataset (pd.DataFrame): input pandas Dataframe 93 | ts_column_name (str): Column name for the timestamp column in the index 94 | start_datetime (datetime.datetime): Inclusive start datetime 95 | end_datetime (datetime.datetime): Exclusive End datetime 96 | 97 | Returns: 98 | pd.DatFrame: return filtered pandas DataFrame 99 | """ 100 | start_date = pd.to_datetime(start_datetime) 101 | end_date = pd.to_datetime(end_datetime) 102 | if not isinstance(dataset, NoneType): 103 | if not dataset.empty: 104 | return ( 105 | dataset[ 106 | (dataset.index.get_level_values(ts_column_name) >= start_date) 107 | & (dataset.index.get_level_values(ts_column_name) < end_date) 108 | ], 109 | False, 110 | ) 111 | else: 112 | return (dataset, False) 113 | else: 114 | return (None, True) 115 | 116 | @logged_method 117 | def calculate_effective_dates( 118 | self, 119 | last_available_date: datetime.datetime, 120 | days_per_query: int, 121 | max_days_in_memory: int, 122 | ) -> EffectiveDates: 123 | curr_start_date = last_available_date - datetime.timedelta(days=days_per_query) 124 | curr_end_date = last_available_date 125 | next_fetch_start_date = last_available_date 126 | next_end_fetch_date = last_available_date + datetime.timedelta(days=days_per_query) 127 | retention_start_date = next_end_fetch_date - datetime.timedelta(days=max_days_in_memory) 128 | retention_end_date = next_end_fetch_date 129 | return EffectiveDates( 130 | curr_start_date, 131 | curr_end_date, 132 | next_fetch_start_date, 133 | next_end_fetch_date, 134 | retention_start_date, 135 | retention_end_date, 136 | ) 137 | 138 | @logged_method 139 | def is_next_fetch_required( 140 | self, 141 | curr_exposed_datetime: datetime.datetime, 142 | last_available_date: datetime.datetime, 143 | next_fetch_within_days: int = 2, 144 | ): 145 | if ( 146 | abs(int((curr_exposed_datetime - last_available_date) / datetime.timedelta(days=1))) 147 | < next_fetch_within_days 148 | ): 149 | LOGGER.debug("Next fetch is required as the data is not available in memory") 150 | return True 151 | else: 152 | LOGGER.debug("Next fetch is not required as the data is available in memory") 153 | return False 154 | 155 | @logged_method 156 | def _get_dataset_for_exact_timestamp( 157 | self, dataset: pd.DataFrame, ts_column_name: str, time_slice: pd.Timestamp, **kwargs 158 | ) -> Tuple[pd.DataFrame | None, bool]: 159 | """used to filter down the data in a dataframe to a specific timestamp that is present in a timestamp index 160 | 161 | Args: 162 | dataset (pd.DataFrame): The dataframe to filter the data 163 | ts_column_name (str): The timestamp column name used to filter the data 164 | time_slice (pd.Timestamp): The exact pandas timestamp used as the filter criterion 165 | 166 | Returns: 167 | _type_: _description_ 168 | """ 169 | if not isinstance(dataset, NoneType): 170 | if not dataset.empty: 171 | temp_ds = dataset.index.get_level_values(ts_column_name) 172 | return (dataset[(dataset.index.get_level_values(ts_column_name) == time_slice)], False) 173 | else: 174 | return (dataset, False) 175 | else: 176 | return (None, True) 177 | 178 | @logged_method 179 | def execute_requests(self, exposed_timestamp: datetime.datetime): 180 | self.read_next_dataset(exposed_timestamp=exposed_timestamp) 181 | -------------------------------------------------------------------------------- /src/helpers.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pprint 4 | import timeit 5 | from functools import wraps 6 | from os import environ 7 | 8 | ENV_PREFIX = "env::" 9 | pretty = pprint.PrettyPrinter(indent=2) 10 | 11 | # logging.basicConfig(level=logging.INFO, format="{asctime} {name:25s} {levelname:8s} {message}", style="{") 12 | LOG_LEVEL = logging.INFO 13 | 14 | 15 | def set_logger_level(log_level: int): 16 | global LOG_LEVEL 17 | LOG_LEVEL = log_level 18 | logging.basicConfig(level=LOG_LEVEL, format="{asctime} {name:25s} {levelname:8s} {message}", style="{", force=True) 19 | 20 | 21 | LOGGER = logging.getLogger(__name__) 22 | METHOD_BREADCRUMBS = False 23 | 24 | 25 | def set_breadcrumb_flag(breadcrumbs: bool): 26 | global METHOD_BREADCRUMBS 27 | METHOD_BREADCRUMBS = breadcrumbs 28 | 29 | 30 | def logged_method(func): 31 | @wraps(func) 32 | def add_entry_exit_logs(*args, **kwargs): 33 | if METHOD_BREADCRUMBS: 34 | LOGGER.info(f"Begin method execution:\t\t{str(func.__name__)}") 35 | ret = func(*args, **kwargs) 36 | if METHOD_BREADCRUMBS: 37 | LOGGER.info(f"End method execution:\t\t{str(func.__name__)}") 38 | return ret 39 | 40 | return add_entry_exit_logs 41 | 42 | 43 | @logged_method 44 | def get_env_var(var_name: str): 45 | if environ.get(var_name.strip()) is None: 46 | raise Exception("Cannot find environment variable " + var_name) 47 | else: 48 | return environ[var_name] 49 | 50 | 51 | @logged_method 52 | def find_replace_env_vars(input: str, env_prefix=ENV_PREFIX): 53 | if input.startswith(env_prefix): 54 | input = input.split(env_prefix)[1] 55 | return get_env_var(input) 56 | else: 57 | return input 58 | 59 | 60 | def env_parse_replace(input): 61 | if isinstance(input, dict): 62 | for k, v in input.items(): 63 | if isinstance(v, dict) or isinstance(v, list): 64 | env_parse_replace(v) 65 | elif isinstance(v, str): 66 | input[k] = find_replace_env_vars(v) 67 | elif isinstance(input, list): 68 | for k, v in enumerate(input): 69 | if isinstance(v, dict) or isinstance(v, list): 70 | env_parse_replace(v) 71 | elif isinstance(v, str): 72 | input[k] = find_replace_env_vars(v) 73 | 74 | 75 | @logged_method 76 | def ensure_path(path: str): 77 | if not os.path.exists(path): 78 | os.makedirs(path) 79 | print(f"Directory Created: {path}") 80 | # else: 81 | # print(f"Path already present: {path}") 82 | 83 | 84 | @logged_method 85 | def sanitize_id(input: str) -> str: 86 | return input.strip().replace(" ", "_").lower() 87 | 88 | 89 | @logged_method 90 | def sanitize_metric_name(input: str) -> str: 91 | return input.strip().replace("/", "_").replace(" ", "_").replace(".", "_") 92 | 93 | 94 | @logged_method 95 | def mandatory_check(key, value): 96 | if not value: 97 | raise Exception(key + " is a mandatory attribute. Please populate to ensure correct functionality.") 98 | 99 | 100 | @logged_method 101 | def check_pair(key1Name, key1Value, key2Name, key2Value): 102 | if (key1Value and not key2Value) or (not key1Value and key2Value) or (not key1Value and not key2Value): 103 | raise Exception("Both " + key1Name + " & " + key2Name + " must be present in the configuration.") 104 | return 105 | 106 | 107 | def printline(): 108 | print("=" * 80) 109 | 110 | 111 | def timed_method(func): 112 | @wraps(func) 113 | def add_timer(*args, **kwargs): 114 | start = timeit.default_timer() 115 | ret = func(*args, **kwargs) 116 | stop = timeit.default_timer() 117 | LOGGER.debug(f"Time to execute method:\t\t{func.__name__}:\t\t\t", stop - start) 118 | return ret 119 | 120 | return add_timer 121 | 122 | 123 | # BILLING_METRICS_SCOPE = { 124 | # "request_bytes": sanitize_metric_name("io.confluent.kafka.server/request_bytes"), 125 | # "response_bytes": sanitize_metric_name("io.confluent.kafka.server/response_bytes"), 126 | # } 127 | 128 | 129 | if __name__ == "__main__": 130 | test = ["env:safdsaf", "regular", ENV_PREFIX + "CONFLUENT_CLOUD_EMAIL"] 131 | 132 | for item in test: 133 | print(find_replace_env_vars(item)) 134 | -------------------------------------------------------------------------------- /src/internal_data_probe.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime 3 | 4 | from flask import Flask 5 | 6 | from helpers import logged_method 7 | 8 | LOGGER = logging.getLogger(__name__) 9 | 10 | internal_api = Flask(__name__) 11 | 12 | READINESS_FLAG = False 13 | CURRENT_EXPOSED_DATE: datetime = None 14 | 15 | 16 | @logged_method 17 | def set_readiness(readiness_flag: bool): 18 | global READINESS_FLAG 19 | READINESS_FLAG = readiness_flag 20 | 21 | 22 | @internal_api.route("/is_ready", methods=["GET"]) 23 | def is_ready(): 24 | global READINESS_FLAG 25 | return str(READINESS_FLAG) 26 | 27 | 28 | def set_current_exposed_date(exposed_date: datetime): 29 | global CURRENT_EXPOSED_DATE 30 | CURRENT_EXPOSED_DATE = exposed_date 31 | 32 | 33 | @internal_api.route("/current_exposed_date", methods=["GET"]) 34 | def current_exposed_date(): 35 | global CURRENT_EXPOSED_DATE 36 | return str(CURRENT_EXPOSED_DATE) 37 | 38 | 39 | @internal_api.route("/current_timestamp", methods=["GET"]) 40 | def current_timestamp(): 41 | global CURRENT_EXPOSED_DATE 42 | return str(int(CURRENT_EXPOSED_DATE.timestamp())) 43 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | from workflow_runner import execute_workflow 5 | 6 | parser = argparse.ArgumentParser( 7 | description="Command line arguments for controlling the application", 8 | add_help=True, 9 | ) 10 | 11 | wf_args = parser.add_argument_group("workflow-args", "Workflow Selection Arguments") 12 | wf_args.add_argument( 13 | "--config-file", 14 | type=str, 15 | default="./config/config_internal.yaml", 16 | help="Provide the path to the config file. Default is ./config/config_internal.yaml.", 17 | ) 18 | 19 | arg_flags = parser.parse_args() 20 | 21 | logging.basicConfig(level=logging.INFO, format="{asctime} {name:25s} {levelname:8s} {message}", style="{") 22 | 23 | execute_workflow(arg_flags) 24 | -------------------------------------------------------------------------------- /src/prometheus_processing/custom_collector.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | 4 | from prometheus_client import Gauge 5 | 6 | from helpers import logged_method 7 | from prometheus_processing.notifier import NotifierAbstract 8 | 9 | LOGGER = logging.getLogger(__name__) 10 | 11 | 12 | class TimestampedCollector(NotifierAbstract, Gauge): 13 | def __init__(self, *args, in_begin_timestamp: datetime.datetime = None, **kwargs): 14 | NotifierAbstract.__init__(self) 15 | Gauge.__init__(self, *args, **kwargs) 16 | if in_begin_timestamp is not None: 17 | self.set_timestamp(curr_timestamp=in_begin_timestamp) 18 | 19 | @logged_method 20 | def collect(self): 21 | try: 22 | metrics = super().collect() 23 | ts_value = int(self._exported_timestamp.timestamp()) / 1000 24 | for metric in metrics: 25 | metric.samples = [ 26 | type(sample)(sample.name, sample.labels, sample.value, ts_value, sample.exemplar) 27 | for sample in metric.samples 28 | ] 29 | return metrics 30 | finally: 31 | self.notify() 32 | 33 | @logged_method 34 | def notify(self) -> None: 35 | LOGGER.debug("Notifying observers") 36 | for item in self._observers: 37 | item.update(self) 38 | 39 | @logged_method 40 | def convert_ts_to_str(self, input_datetime: datetime.datetime) -> str: 41 | return input_datetime.strftime("%Y_%m_%d_%H_%M_%S") 42 | -------------------------------------------------------------------------------- /src/prometheus_processing/notifier.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import datetime 4 | import logging 5 | from abc import ABC, abstractmethod 6 | from typing import List 7 | 8 | import pandas as pd 9 | 10 | from helpers import logged_method 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | 15 | class Observer(ABC): 16 | """The Observer Pattern used here is used to signal an observer to a notifier class. 17 | The notifier class collects all the references to the observer and notifies them when a new event occurs. 18 | This needs the observer to be registered under the notifier using the attach method of the observer 19 | 20 | """ 21 | 22 | @abstractmethod 23 | def update(self, notifier: NotifierAbstract) -> None: 24 | """ 25 | Receive update from Notifier and do something. 26 | """ 27 | pass 28 | 29 | def attach(self, notifier: NotifierAbstract) -> None: 30 | """ 31 | Attach an observer to the Notifier. 32 | """ 33 | LOGGER.debug("Observer: Sending request to attach for notifications.") 34 | notifier.attach(self) 35 | 36 | def _generate_next_timestamp( 37 | self, curr_date: datetime.datetime, freq: str = "1H", periods: int = 2 38 | ) -> pd.Timestamp: 39 | start_date = curr_date.replace(minute=0, microsecond=0, tzinfo=datetime.timezone.utc) 40 | return pd.date_range(start_date, freq=freq, periods=periods)[1] 41 | 42 | 43 | class NotifierAbstract(ABC): 44 | """This class works in conjunction with the Observer Class above to gather all the observers in a list 45 | and execute their update method when some action is performed. 46 | It is a custom way to link different objects together while not disturbing any existing functionality. 47 | 48 | """ 49 | 50 | _observers: List[Observer] 51 | _exported_timestamp: datetime.datetime 52 | 53 | def __init__(self) -> None: 54 | self._observers = [] 55 | 56 | def set_timestamp(self, curr_timestamp: datetime.datetime = None): 57 | """used to set the timestamp that we use to assign timestamp to the Prometheus collector 58 | 59 | Args: 60 | curr_timestamp (datetime.datetime, optional): Provide the timestamp to be used as timestamp for the next collection cycle. Defaults to None. 61 | """ 62 | self._exported_timestamp = self.normalize_datetime(in_dt=curr_timestamp) 63 | return self 64 | 65 | def normalize_datetime(self, in_dt: datetime.datetime = None) -> datetime.datetime: 66 | """Internal method to normalize a datetime toa specific targeted format. 67 | Changes the timezone to UTC and sets the time to midnight for the datetime. 68 | 69 | Args: 70 | in_dt (datetime.datetime, optional): Input datetime for normalization. Defaults to None. 71 | 72 | Returns: 73 | datetime.datetime: Output normalized datetime 74 | """ 75 | if in_dt is not None: 76 | return in_dt.replace(minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc) 77 | 78 | else: 79 | return datetime.datetime.utcnow().replace(minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc) 80 | 81 | @logged_method 82 | def attach(self, observer: Observer) -> None: 83 | """ 84 | Attach an observer to the Notifier. 85 | """ 86 | LOGGER.debug("Notifier: Attaching an observer.") 87 | self._observers.append(observer) 88 | 89 | @logged_method 90 | def detach(self, observer: Observer) -> None: 91 | """ 92 | Detach an observer from the Notifier. 93 | """ 94 | LOGGER.debug("Notifier: Detaching an observer.") 95 | self._observers.remove(observer) 96 | 97 | @abstractmethod 98 | def notify(self) -> None: 99 | """ 100 | Notify all observers about an event. 101 | """ 102 | pass 103 | -------------------------------------------------------------------------------- /src/storage_mgmt.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import threading 4 | from dataclasses import dataclass, field 5 | from json import dumps, load 6 | from time import sleep 7 | from typing import Dict, List, Tuple 8 | 9 | import psutil 10 | 11 | from helpers import logged_method, sanitize_metric_name 12 | 13 | LOGGER = logging.getLogger(__name__) 14 | 15 | # class DirType(Enum): 16 | # MetricsData = auto() 17 | # BillingsData = auto() 18 | # OutputData = auto() 19 | # PersistenceStats = auto() 20 | 21 | # code_run_stats = TimestampedCollector( 22 | # "python_custom_memory_used_bytes", 23 | # "Total memory consumed by the process.", 24 | # [], 25 | # in_begin_timestamp=datetime.datetime.now(), 26 | # ) 27 | 28 | 29 | # @dataclass(kw_only=True) 30 | # class StoragePathManagement: 31 | # basepath: str = field(default=os.getcwd()) 32 | # base_dir: str = field(default="output") 33 | 34 | # def __generate_path(self, org_id: str, dir_type: DirType): 35 | # return os.path.join(self.basepath, self.base_dir, org_id, dir_type.name, "") 36 | 37 | # def ensure_path(self, org_id: str, dir_type: List[DirType]): 38 | # for item in dir_type: 39 | # ensure_path(self.__generate_path(org_id=org_id, dir_type=item)) 40 | 41 | # def delete_path(self, org_id: str, dir_type: DirType): 42 | # full_path = self.__generate_path(org_id=org_id, dir_type=dir_type) 43 | # if os.path.exists(full_path) and full_path.startswith(os.path.join(self.basepath, self.base_dir)): 44 | # shutil.rmtree(full_path) 45 | 46 | # def get_path(self, org_id: str, dir_type=DirType, ensure_exists: bool = False): 47 | # if ensure_exists: 48 | # self.ensure_path(org_id=org_id, dir_type=[dir_type]) 49 | # return self.__generate_path(org_id=org_id, dir_type=dir_type) 50 | 51 | 52 | # STORAGE_PATH = StoragePathManagement() 53 | # STORAGE_PATH.ensure_path(org_id="common", dir_type=[]) 54 | 55 | 56 | @dataclass 57 | class ThreadableRunner: 58 | object_lock: threading.Lock = field(init=False) 59 | sync_runner_status: threading.Event = field(init=False) 60 | 61 | def __post_init__(self): 62 | self.sync_runner_status = threading.Event() 63 | self.object_lock = threading.Lock() 64 | self.start_sync() 65 | 66 | def start_sync(self): 67 | self.sync_runner_status.set() 68 | 69 | def stop_sync(self): 70 | self.sync_runner_status.clear() 71 | 72 | def get_new_thread(self, target_func, tick_duration_secs: int): 73 | temp = threading.Thread(target=target_func, args=(self, tick_duration_secs)) 74 | return temp 75 | 76 | def invoke_custom_func(self, target_func, *args): 77 | temp = threading.Thread(target=target_func, args=(self, *args)) 78 | return temp 79 | 80 | 81 | COMMON_THREAD_RUNNER = ThreadableRunner() 82 | 83 | 84 | @dataclass(kw_only=True) 85 | class PersistenceStore(ThreadableRunner): 86 | flush_to_disk_interval_sec: int = field(default=3) 87 | historical_data_to_maintain: int = field(default=7) 88 | data_type: str = field(init=True) 89 | persistence_path: Dict[str, Dict[str, object]] = field(init=False, repr=False, default=dict) 90 | 91 | def __post_init__(self): 92 | super().__post_init__() 93 | self.data_type = sanitize_metric_name(self.data_type).lower() 94 | self.persistence_path = {} 95 | self.add_persistence_path(org_id="common") 96 | self.rehydrate_persistence_status() 97 | 98 | def stop_sync(self): 99 | super().stop_sync() 100 | self.write_file(force_write=True) 101 | 102 | def __encode_key(self, key: Tuple) -> str: 103 | return "___".join(str(x).replace(" ", "_") for x in key) 104 | 105 | def __decode_key(self, key: str) -> str: 106 | return tuple([x for x in key.split("___")]) 107 | 108 | def add_persistence_path(self, org_id: str, ensure_exists: bool = False): 109 | temp = self.persistence_path.get(org_id, {}) 110 | # temp = self.persistence_path.get(org_id, dict()) 111 | if not temp: 112 | with self.object_lock: 113 | temp["path"] = os.path.join( 114 | STORAGE_PATH.get_path( 115 | org_id=org_id, dir_type=DirType.PersistenceStats, ensure_exists=ensure_exists 116 | ), 117 | f"{self.data_type}_{DirType.PersistenceStats.name}.json", 118 | ) 119 | temp["sync_needed"] = False 120 | temp["data"] = dict() 121 | self.persistence_path[org_id] = temp 122 | self.rehydrate_persistence_status(org_id=org_id) 123 | 124 | @logged_method 125 | def rehydrate_persistence_status(self, org_id: str = "common"): 126 | org_details = self.persistence_path[org_id] 127 | path_str = org_details["path"] 128 | if os.path.exists(path_str) and os.stat(path_str).st_size > 0: 129 | with open(path_str, "r") as f: 130 | org_details["data"] = load(f) 131 | for item in self.__find_datasets_to_evict(org_id=org_id): 132 | org_details["data"].pop(item) 133 | # print(org_details["data"]) 134 | 135 | @logged_method 136 | def add_data_to_persistence_store(self, org_id: str, key: Tuple, value: str): 137 | if not org_id in self.persistence_path.keys(): 138 | self.add_persistence_path(org_id=org_id, ensure_exists=True) 139 | org_data = self.persistence_path[org_id]["data"] 140 | org_data: Dict[Tuple, str] = org_data 141 | temp_key = self.__encode_key(key=key) 142 | if temp_key in org_data.keys(): 143 | temp_val = org_data[temp_key] 144 | if value not in temp_val: 145 | with self.object_lock: 146 | temp_val.append(value) 147 | org_data[temp_key] = temp_val 148 | self.persistence_path[org_id]["sync_needed"] = True 149 | else: 150 | with self.object_lock: 151 | org_data[temp_key] = [value] 152 | self.persistence_path[org_id]["sync_needed"] = True 153 | # if sync_needed_flag: 154 | # with self.object_lock: 155 | # self.persistence_path[org_id]["sync_needed"] = True 156 | 157 | @logged_method 158 | def is_dataset_present(self, org_id: str, key: Tuple, value: dict) -> bool: 159 | temp_key = self.__encode_key(key=key) 160 | if org_id in self.persistence_path.keys(): 161 | org_data = self.persistence_path[org_id]["data"] 162 | if temp_key in org_data.keys(): 163 | if value in org_data[temp_key]: 164 | return True 165 | return False 166 | 167 | @logged_method 168 | def write_file(self, force_write: bool = False): 169 | with self.object_lock: 170 | for org_id, v in self.persistence_path.items(): 171 | if org_id not in ["common"] and (v["sync_needed"] or force_write): 172 | with open(v["path"], "w") as f: 173 | f.write(dumps(v["data"], indent=1)) 174 | 175 | @logged_method 176 | def __find_datasets_to_evict(self, org_id: str) -> List[str]: 177 | if self.historical_data_to_maintain == -1: 178 | return [] 179 | org_data = self.persistence_path[org_id]["data"] 180 | temp = list(org_data.keys()) 181 | temp.sort(reverse=True) 182 | return temp[self.historical_data_to_maintain :] 183 | 184 | # if self.historical_data_to_maintain == -1: 185 | # return [] 186 | # temp = list(self.__status.keys()) 187 | # temp.sort(reverse=True) 188 | # return temp[self.historical_data_to_maintain :] 189 | 190 | 191 | @logged_method 192 | def sync_to_file(persistence_object: PersistenceStore, flush_to_file: int = 5): 193 | while persistence_object.sync_runner_status.is_set(): 194 | persistence_object.write_file() 195 | sleep(flush_to_file) 196 | 197 | 198 | @logged_method 199 | def current_memory_usage(persistence_object: ThreadableRunner, evaluation_interval: int = 5): 200 | while persistence_object.sync_runner_status.is_set(): 201 | mem_used = psutil.Process().memory_info().rss 202 | print(f"Current Memory Utilization: {mem_used / (2**20)}") 203 | # curr_ts = datetime.datetime.utcnow().replace( 204 | # hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc 205 | # ) 206 | # code_run_stats.set_timestamp(curr_timestamp=curr_ts) 207 | # code_run_stats.set(mem_used) 208 | sleep(evaluation_interval) 209 | 210 | 211 | # This will be use to store status for writing Metrics datasets to disk. 212 | # METRICS_PERSISTENCE_STORE = PersistenceStore(data_type="Metrics") 213 | 214 | # BILLING_PERSISTENCE_STORE = PersistenceStore( 215 | # out_path=os.path.join(STORAGE_PATH[DirType.PersistenceStats], f"Billing_{DirType.PersistenceStats.name}.json"), 216 | # historical_data_to_maintain=-1, 217 | # ) 218 | # This will be use to store status for writing chargeback datasets to disk. 219 | # CHARGEBACK_PERSISTENCE_STORE = PersistenceStore(data_type="Chargeback", historical_data_to_maintain=-1) 220 | # This will be use to store status for writing chargeback datasets to disk. 221 | # BILLING_PERSISTENCE_STORE = PersistenceStore(data_type="Billing", historical_data_to_maintain=-1) 222 | -------------------------------------------------------------------------------- /src/workflow_runner.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import threading 3 | from argparse import Namespace 4 | from dataclasses import dataclass, field 5 | from enum import Enum, auto 6 | from time import sleep 7 | from typing import Dict 8 | 9 | import prometheus_client 10 | import yaml 11 | 12 | import internal_data_probe 13 | from ccloud.org import CCloudOrgList 14 | from helpers import env_parse_replace, logged_method, set_breadcrumb_flag, set_logger_level 15 | from storage_mgmt import COMMON_THREAD_RUNNER, current_memory_usage 16 | 17 | LOGGER = logging.getLogger(__name__) 18 | 19 | 20 | @dataclass(kw_only=True) 21 | class AppProps: 22 | days_in_memory: int = field(default=30) 23 | relative_output_dir: str = field(default="output") 24 | loglevel: str = field(default="INFO") 25 | 26 | @logged_method 27 | def get_app_props(in_config: Dict): 28 | global APP_PROPS 29 | 30 | if not in_config["system"]: 31 | LOGGER.warning("No System Configuration found. Using default values.") 32 | APP_PROPS = AppProps() 33 | else: 34 | LOGGER.debug("System Configuration found. Using values from config file.") 35 | config: Dict = in_config["system"] 36 | LOGGER.debug("Parsing loglevel from config file") 37 | logLvl = config.get("log_level", "INFO").upper() 38 | match logLvl: 39 | case "DEBUG": 40 | LOGGER.info("Setting loglevel to DEBUG") 41 | loglevel = logging.DEBUG 42 | case "INFO": 43 | LOGGER.info("Setting loglevel to INFO") 44 | loglevel = logging.INFO 45 | case "WARNING": 46 | LOGGER.info("Setting loglevel to WARNING") 47 | loglevel = logging.WARNING 48 | case "ERROR": 49 | LOGGER.info("Setting loglevel to ERROR") 50 | loglevel = logging.ERROR 51 | case _: 52 | LOGGER.info(f"Cannot understand log level {logLvl}. Setting loglevel to INFO") 53 | loglevel = logging.INFO 54 | set_logger_level(loglevel) 55 | breadcrumbs = config.get("enable_method_breadcrumbs", False) 56 | breadcrumbs = bool(breadcrumbs if breadcrumbs is True else False) 57 | set_breadcrumb_flag(breadcrumbs) 58 | LOGGER.info("Parsing Core Application Properties") 59 | APP_PROPS = AppProps( 60 | days_in_memory=config.get("days_in_memory", 7), 61 | relative_output_dir=config.get("output_dir_name", "output"), 62 | loglevel=loglevel, 63 | ) 64 | 65 | 66 | class WorkflowStage(Enum): 67 | GATHER = auto() 68 | CALCULATE_OUTPUT = auto() 69 | SLEEP = auto() 70 | 71 | @logged_method 72 | def try_parse_config_file(config_yaml_path: str) -> Dict: 73 | LOGGER.debug("Trying to parse Configuration File: " + config_yaml_path) 74 | with open(config_yaml_path, "r") as config_file: 75 | core_config = yaml.safe_load(config_file) 76 | LOGGER.debug("Successfully parsed Configuration File: " + config_yaml_path) 77 | LOGGER.debug("Parsing Environment Variables") 78 | env_parse_replace(core_config) 79 | LOGGER.debug("Successfully parsed Environment Variables") 80 | return core_config 81 | 82 | @logged_method 83 | def run_gather_cycle(ccloud_orgs: CCloudOrgList): 84 | # This will try to refresh and read all the data that might be new from the last gather phase. 85 | # Org Object has built in safeguard to prevent repetitive gathering for the same datasets. 86 | # for Cloud Objects --> 30 minutes is the minimum. 87 | # for Metrics API objects --> persistence store knows what all has been cached and written to disk and will not be gathered again. 88 | # for billing CSV files --> if the data is already read in memory, it wont be read in again. 89 | ccloud_orgs.execute_requests() 90 | 91 | @logged_method 92 | def run_calculate_cycle(ccloud_orgs: CCloudOrgList): 93 | ccloud_orgs.run_calculations() 94 | 95 | @logged_method 96 | def execute_workflow(arg_flags: Namespace): 97 | LOGGER.info("Starting Workflow Runner") 98 | LOGGER.debug("Debug Mode is ON") 99 | LOGGER.debug("Parsing config file") 100 | core_config = try_parse_config_file(config_yaml_path=arg_flags.config_file) 101 | LOGGER.debug("Successfully parsed config file") 102 | LOGGER.debug("Setting up Core App Properties") 103 | get_app_props(core_config["config"]) 104 | 105 | 106 | thread_configs = [ 107 | # [COMMON_THREAD_RUNNER, current_memory_usage, 5], 108 | # [METRICS_PERSISTENCE_STORE, sync_to_file, METRICS_PERSISTENCE_STORE.flush_to_disk_interval_sec], 109 | # [CHARGEBACK_PERSISTENCE_STORE, sync_to_file, CHARGEBACK_PERSISTENCE_STORE.flush_to_disk_interval_sec], 110 | # [BILLING_PERSISTENCE_STORE, sync_to_file, BILLING_PERSISTENCE_STORE.flush_to_disk_interval_sec], 111 | ] 112 | 113 | threads_list = list() 114 | for _, item in enumerate(thread_configs): 115 | threads_list.append(item[0].get_new_thread(target_func=item[1], tick_duration_secs=item[2])) 116 | 117 | try: 118 | LOGGER.info("Starting all threads") 119 | for item in threads_list: 120 | item.start() 121 | 122 | LOGGER.debug("Starting Prometheus Server") 123 | prometheus_client.start_http_server(8000) 124 | 125 | LOGGER.debug("Starting Internal API Server for sharing state") 126 | threading.Thread(target=internal_data_probe.internal_api.run, kwargs={"host": "0.0.0.0", "port": 8001}).start() 127 | 128 | # threading.Thread(target=internal_data_probe.internal_api.run, kwargs={"host": "0.0.0.0", "port": 8001}).start() 129 | 130 | # This step will initialize the CCloudOrg structure along with all the internal Objects in it. 131 | # Those will include the first run for all the data gather step as well. 132 | # There are some safeguards already implemented to prevent request choking, so, it should be safe in most use cases. 133 | LOGGER.info("Initializing Core CCloudOrgList Object") 134 | ccloud_orgs = CCloudOrgList( 135 | in_orgs=core_config["config"]["org_details"], 136 | in_days_in_memory=APP_PROPS.days_in_memory, 137 | ) 138 | 139 | LOGGER.info("Initialization Complete.") 140 | internal_data_probe.set_readiness(readiness_flag=True) 141 | 142 | # This is the main loop for the application. 143 | LOGGER.info("Starting Main Loop") 144 | while True: 145 | sleep(10**8) 146 | 147 | finally: 148 | LOGGER.info("Shutting down all threads") 149 | # Begin shutdown process. 150 | for item in thread_configs: 151 | item[0].stop_sync() 152 | LOGGER.info("Waiting for State Sync ticker for Final sync before exit") 153 | for item in threads_list: 154 | item.join() 155 | --------------------------------------------------------------------------------