├── .github ├── dependabot.yml └── workflows │ ├── container.yml │ ├── flake8.yml │ └── publish.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── Manifest.in ├── README.md ├── chaotic ├── __init__.py ├── app.py ├── cloud │ ├── __init__.py │ ├── cloudscale_ch.py │ ├── cloudstack.py │ ├── digitalocean.py │ ├── hcloud.py │ ├── nomad.py │ ├── proxmox_kvm.py │ └── vultr.py ├── log.py └── version.py ├── docker └── config.yaml ├── examples ├── config_cloudscale_ch.yaml ├── config_cloudstack.yaml ├── config_digitalocean.yaml ├── config_hcloud.yaml ├── config_nomad.yaml ├── config_proxmox_kvm.yaml └── config_vultr.yaml ├── logging.ini ├── requirements.dev.txt ├── requirements.in ├── requirements.txt ├── setup.py └── tox.ini /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Set update schedule for GitHub Actions 2 | --- 3 | version: 2 4 | updates: 5 | - package-ecosystem: "github-actions" 6 | directory: "/" 7 | schedule: 8 | interval: "weekly" 9 | 10 | - package-ecosystem: "pip" 11 | directory: "/" 12 | schedule: 13 | interval: "weekly" 14 | - package-ecosystem: "docker" 15 | directory: "/" 16 | schedule: 17 | interval: "weekly" 18 | -------------------------------------------------------------------------------- /.github/workflows/container.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | # GitHub recommends pinning actions to a commit SHA. 7 | # To get a newer version, you will need to update the SHA. 8 | # You can also reference a tag or branch, but the action may change without warning. 9 | --- 10 | name: Create and publish a Container image 11 | 12 | on: 13 | push: 14 | branches: 15 | - "master" 16 | tags: 17 | - "v*" 18 | env: 19 | REGISTRY: ghcr.io 20 | IMAGE_NAME: ngine-io/chaotic 21 | 22 | jobs: 23 | build-and-push-image: 24 | runs-on: ubuntu-latest 25 | permissions: 26 | contents: read 27 | packages: write 28 | 29 | steps: 30 | - name: Checkout repository 31 | uses: actions/checkout@v4 32 | 33 | - name: Set up Python 34 | uses: actions/setup-python@v5 35 | with: 36 | python-version: "3.x" 37 | 38 | - name: Install dependencies 39 | run: | 40 | python -m pip install --upgrade pip 41 | pip install -U setuptools wheel 42 | 43 | - name: Build 44 | run: | 45 | python setup.py sdist bdist_wheel 46 | 47 | - name: Set up QEMU 48 | uses: docker/setup-qemu-action@v3 49 | 50 | - name: Set up Docker Buildx 51 | uses: docker/setup-buildx-action@v3 52 | 53 | - name: Log in to the Container registry 54 | uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 55 | with: 56 | registry: ${{ env.REGISTRY }} 57 | username: ${{ github.actor }} 58 | password: ${{ secrets.GITHUB_TOKEN }} 59 | 60 | - name: Extract metadata (tags, labels) for Docker 61 | id: meta 62 | uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 63 | with: 64 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 65 | tags: | 66 | type=ref,event=branch 67 | type=semver,pattern={{version}} 68 | type=semver,pattern={{major}}.{{minor}} 69 | 70 | - name: Build and push Docker image 71 | uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0 72 | with: 73 | context: . 74 | push: true 75 | pull: true 76 | platforms: linux/arm64,linux/amd64 77 | tags: ${{ steps.meta.outputs.tags }} 78 | labels: ${{ steps.meta.outputs.labels }} 79 | -------------------------------------------------------------------------------- /.github/workflows/flake8.yml: -------------------------------------------------------------------------------- 1 | name: Flake8 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v4 11 | - name: GitHub Action for Flake8 12 | uses: cclauss/GitHub-Action-for-Flake8@v0.5.0 13 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | - name: Set up Python 13 | uses: actions/setup-python@v5 14 | with: 15 | python-version: "3.x" 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install -U setuptools wheel twine 20 | - name: Build and publish 21 | env: 22 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 23 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 24 | run: | 25 | python setup.py sdist bdist_wheel 26 | twine upload dist/* 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.egg-info 3 | build 4 | dist 5 | .env 6 | .venv 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker.io/python:3.13.3-slim 2 | 3 | WORKDIR /build 4 | COPY . . 5 | 6 | RUN pip install . 7 | 8 | WORKDIR /app 9 | 10 | RUN rm -rf /build 11 | COPY ./docker/config.yaml . 12 | 13 | USER 1000 14 | 15 | ENTRYPOINT ["chaotic-ngine"] 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 - René Moser 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | rm -rf *.egg-info 3 | rm -rf *.dist-info 4 | rm -rf dist 5 | rm -rf build 6 | find -name '__pycache__' -exec rm -fr {} || true \; 7 | 8 | build: clean 9 | python3 setup.py sdist bdist_wheel 10 | 11 | test-release: 12 | twine upload --repository testpypi dist/* 13 | 14 | release: 15 | twine upload dist/* 16 | 17 | test: 18 | tox 19 | 20 | update: 21 | pip-compile -U --no-header --no-annotate --strip-extras --resolver backtracking 22 | pip-sync 23 | -------------------------------------------------------------------------------- /Manifest.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | include *.yml 3 | include tox.ini 4 | graft tests 5 | global-exclude *.py[cod] 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![license](https://img.shields.io/pypi/l/chaotic-ngine.svg) 2 | ![python versions](https://img.shields.io/pypi/pyversions/chaotic-ngine.svg) 3 | ![status](https://img.shields.io/pypi/status/chaotic-ngine.svg) 4 | [![pypi version](https://img.shields.io/pypi/v/chaotic-ngine.svg)](https://pypi.org/project/chaotic-ngine/) 5 | ![PyPI - Downloads](https://img.shields.io/pypi/dw/chaotic-ngine) 6 | 7 | # Chaotic - Chaos for Clouds 8 | 9 | Chaotic evaluates a plan, how it will bring chaos in your Cloud environment. 10 | 11 | Depending on the Cloud API used, it may kill allocations (Hashicorp Nomad), reboot or stop/start virtual machines in your Cloud environment. 12 | 13 | With no arguments given, Chaotic runs as a "one shot" meant to be executed as cron job. Passing `--periodic` runs it as daemon with configurable interval `--interval 5` in minutes (1 is the default). 14 | NOTE: The config is re-read on every interval, no need to restart the service after changing the config. 15 | 16 | ## Clouds 17 | 18 | Currently implemented Clouds: 19 | 20 | - DigitalOcean 21 | - Vultr 22 | - Hetzner Cloud 23 | - Proxmox KVM 24 | - CloudStack 25 | - Hashicorp Nomad 26 | - cloudscale.ch 27 | 28 | ## Install 29 | 30 | ``` 31 | pip3 install -U chaotic-ngine 32 | ``` 33 | 34 | ## Configure 35 | 36 | Create a file named `config.yaml` or use the env var `CHAOTIC_CONFIG` to point to a config file (also see the example directory): 37 | 38 | ``` 39 | export CHAOTIC_CONFIG=config_nomad.yaml 40 | ``` 41 | 42 | ### Exclude times 43 | 44 | Define times when the bot should not doing real actions (it will run in dry-run): 45 | 46 | ```yaml 47 | --- 48 | kind: ... 49 | excludes: 50 | weekdays: 51 | - Sun 52 | - Sat 53 | times_of_day: 54 | - 22:00-08:00 55 | - 11:00-14:00 56 | days_of_year: 57 | - Jan01 58 | - Apr01 59 | - May01 60 | - Aug01 61 | - Dec24 62 | ``` 63 | 64 | ### CloudStack 65 | 66 | Chaotic will stop a server selected by an optional filter tag and stop/start it with a delay of a configurable time (default 60s). 67 | 68 | ``` 69 | export CLOUDSTACK_API_KEY="..." 70 | export CLOUDSTACK_API_SECRET="..." 71 | export CLOUDSTACK_API_ENDPOINT="..." 72 | ``` 73 | 74 | ```yaml 75 | --- 76 | kind: cloudstack 77 | dry_run: false 78 | configs: 79 | 80 | # Optional, filter tag 81 | tag: 82 | key: chaos 83 | value: enabled 84 | 85 | # Optional, 60 seconds is the default 86 | wait_before_restart: 60 87 | ``` 88 | 89 | ### Vultr 90 | 91 | Chaotic will stop a server selected by an optional filter tag and stop/start it with a delay of a configurable time (default 60s). 92 | 93 | ``` 94 | export VULTR_API_KEY="..." 95 | ``` 96 | 97 | ```yaml 98 | --- 99 | kind: vultr 100 | dry_run: true 101 | configs: 102 | 103 | # Optional instance tag filter 104 | tag: "chaos=opt-in" 105 | 106 | # Optional, 60 seconds is the default 107 | wait_before_restart: 60 108 | ``` 109 | 110 | ### Cloudscale.ch 111 | 112 | Chaotic will stop a server selected by an optional filter tag and stop/start it with a delay of a configurable time (default 60s). 113 | 114 | 115 | #### Config 116 | 117 | ``` 118 | export CLOUDSCALE_API_TOKEN="..." 119 | ``` 120 | 121 | ```yaml 122 | --- 123 | kind: cloudscale_ch 124 | dry_run: true 125 | configs: 126 | 127 | # Optional server tag filter 128 | filter_tag: "chaos=opt-in" 129 | 130 | # Optional, 60 seconds is the default 131 | wait_before_restart: 60 132 | ``` 133 | 134 | ### Hetzner Cloud 135 | 136 | Chaotic will stop a server selected by an optional filter label and stop/start it with a delay of a configurable time (default 60s). 137 | 138 | #### Config 139 | 140 | ``` 141 | export HCLOUD_API_TOKEN=... 142 | ``` 143 | 144 | ```yaml 145 | --- 146 | kind: hcloud 147 | dry_run: false 148 | configs: 149 | 150 | # Optional server label filter 151 | label: "chaos=enabled" 152 | 153 | # Optional, 60 seconds is the default 154 | wait_before_restart: 60 155 | ``` 156 | 157 | ### DigitalOcean Cloud 158 | 159 | Chaotic will stop a droplet selected by an optional filter tag and stop/start it with a delay of a configurable time (default 60s). 160 | 161 | #### Config 162 | 163 | ``` 164 | export DIGITALOCEAN_ACCESS_TOKEN=... 165 | ``` 166 | 167 | ```yaml 168 | --- 169 | kind: digitalocean 170 | dry_run: false 171 | configs: 172 | 173 | # Optional droplet tag filter 174 | tag: "chaos:enabled" 175 | 176 | # Optional, 60 seconds is the default 177 | wait_before_restart: 60 178 | ``` 179 | 180 | ### Nomad Job 181 | 182 | Chaotic will send an allocation signal to an allocation in the available namespaces selected by an allow list. 183 | 184 | #### Config 185 | 186 | ``` 187 | export NOMAD_ADDR=http://nomad.example.com:4646 188 | ``` 189 | 190 | ```yaml 191 | --- 192 | kind: nomad 193 | dry_run: true 194 | configs: 195 | experiments: 196 | - job 197 | 198 | # Signals to choose from 199 | signals: 200 | - SIGKILL 201 | 202 | # Optional: namespace allowlist 203 | namespace_allowlist: 204 | - example-prod 205 | - foobar-prod 206 | 207 | # Optional: namespace denylist 208 | namespace_denylist: 209 | - default 210 | 211 | # Optional: job type skip list 212 | job_type_skiplist: 213 | - system 214 | - batch 215 | - sysbatch 216 | 217 | # Optional: job name skip list 218 | job_skiplist: 219 | - my-job-name 220 | 221 | # Optional: Add a meta tag in your nomad job "chaotic" = False to opt-out 222 | job_meta_opt_key: chaotic 223 | ``` 224 | 225 | ### Nomad Node 226 | 227 | Chaotic will drain a node and set it to be ineligible for some time. 228 | 229 | #### Config 230 | 231 | ``` 232 | export NOMAD_ADDR=http://nomad.example.com:4646 233 | ``` 234 | 235 | ```yaml 236 | --- 237 | kind: nomad 238 | dry_run: true 239 | configs: 240 | experiments: 241 | - node 242 | 243 | # Optional: Node drain deadline in seconds, default 10 244 | node_drain_deadline_seconds: 15 245 | 246 | # Optional: Skip nodes in these classes 247 | node_class_skiplist: 248 | - storage 249 | 250 | # Optional: Skip nodes with these names 251 | node_skiplist: 252 | - node1 253 | - node5 254 | 255 | # Optional: Wait for this amount of seconds before set node to be eligible again, default 60 256 | node_wait_for: 100 257 | 258 | # Optional: Also drain system jobs, default false 259 | node_drain_system_jobs: true 260 | 261 | # Optional: Drain multiple nodes in one run in percent, fallback 1 node 262 | node_drain_amount_in_percent: 30 263 | 264 | ``` 265 | 266 | ### Proxmox KVM 267 | 268 | Chaotic will stop a VM stop/start it with a delay of a configurable time (default 60s). 269 | 270 | ``` 271 | export PROXMOX_API_HOST="pve1.example.com" 272 | export PROXMOX_API_USER="root@pam" 273 | export PROXMOX_API_PASSWORD="..." 274 | ``` 275 | 276 | ```yaml 277 | --- 278 | kind: proxmox_kvm 279 | dry_run: false 280 | configs: 281 | 282 | # Optional: Do not shutdown VMs having a lower uptime in minutes 283 | min_uptime: 60 284 | 285 | # Optional: Do not shutdown VMs in this name list 286 | denylist: 287 | - my-single-vm 288 | 289 | # Optional: 60 seconds is the default 290 | wait_before_restart: 60 291 | ``` 292 | 293 | ## Run 294 | 295 | ### CLI 296 | ``` 297 | chaos-ngine 298 | ``` 299 | ### Docker 300 | 301 | One shot: 302 | 303 | ``` 304 | docker run -ti --rm -v $PWD/examples/config_nomad.yaml:/app/config.yaml -e TZ=Europe/Zurich -e NOMAD_ADDR=$NOMAD_ADDR --name chaotic ghcr.io/ngine-io/chaotic:latest 305 | ``` 306 | 307 | As service: 308 | 309 | ``` 310 | docker run -ti --rm -v $PWD/examples/config_nomad.yaml:/app/config.yaml -e TZ=Europe/Zurich -e NOMAD_ADDR=$NOMAD_ADDR --name chaotic ghcr.io/ngine-io/chaotic:latest --periodic 311 | ``` 312 | 313 | ## Logs 314 | What you should see (e.g. for kind cloudscale.ch): 315 | ``` 316 | 2021-06-09 09:01:25,433 - cloudscale.log:INFO:Started, version: 0.6.2 317 | 2021-06-09 09:01:25,433 - cloudscale.log:INFO:Using profile default 318 | 2021-06-09 09:01:25,433 - cloudscale.log:INFO:API Token used: xyz... 319 | 2021-06-09 09:01:25,433 - chatic:INFO:Querying with filter_tag: None 320 | 2021-06-09 09:01:25,433 - cloudscale.log:INFO:HTTP GET to https://api.cloudscale.ch/v1/servers 321 | 2021-06-09 09:01:25,651 - cloudscale.log:INFO:HTTP status code 200 322 | 2021-06-09 09:01:25,652 - chatic:INFO:Choose server app3 323 | 2021-06-09 09:01:25,653 - chatic:INFO:Stopping server app3 324 | 2021-06-09 09:01:25,653 - cloudscale.log:INFO:HTTP POST to https://api.cloudscale.ch/v1/servers/d5628484-a6eb-4ea9-b3ef-ba8da2bb9fe0/stop 325 | 2021-06-09 09:01:26,336 - cloudscale.log:INFO:HTTP status code 204 326 | 2021-06-09 09:01:26,336 - chatic:INFO:Sleeping for server 60 327 | 2021-06-09 09:02:26,393 - cloudscale.log:INFO:HTTP POST to https://api.cloudscale.ch/v1/servers/d5628484-a6eb-4ea9-b3ef-ba8da2bb9fe0/start 328 | 2021-06-09 09:02:26,955 - cloudscale.log:INFO:HTTP status code 204 329 | 2021-06-09 09:02:26,956 - chatic:INFO:done 330 | ``` 331 | -------------------------------------------------------------------------------- /chaotic/__init__.py: -------------------------------------------------------------------------------- 1 | from chaotic.cloud import Chaotic 2 | from chaotic.cloud.cloudscale_ch import CloudscaleChChaotic 3 | from chaotic.cloud.cloudstack import CloudStackChaotic 4 | from chaotic.cloud.digitalocean import DigitaloceanChaotic 5 | from chaotic.cloud.hcloud import HcloudChaotic 6 | from chaotic.cloud.nomad import NomadChaotic 7 | from chaotic.cloud.proxmox_kvm import ProxmoxKvmChaotic 8 | from chaotic.cloud.vultr import VultrChaotic 9 | from chaotic.log import log 10 | 11 | 12 | class ChaoticFactory: 13 | 14 | CLOUD_CLASSES: dict = { 15 | "cloudscale_ch": CloudscaleChChaotic, 16 | "cloudstack": CloudStackChaotic, 17 | "digitalocean": DigitaloceanChaotic, 18 | "hcloud": HcloudChaotic, 19 | "nomad": NomadChaotic, 20 | "proxmox_kvm": ProxmoxKvmChaotic, 21 | "vultr": VultrChaotic, 22 | } 23 | 24 | def get_instance(self, name: str) -> Chaotic: 25 | log.info(f"Instantiate {name}") 26 | try: 27 | return self.CLOUD_CLASSES[name]() 28 | except KeyError as e: 29 | raise NotImplementedError(f"{e} not implemented") 30 | -------------------------------------------------------------------------------- /chaotic/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sys 4 | import time 5 | from argparse import ArgumentParser 6 | 7 | import requests 8 | import schedule 9 | import yaml 10 | from requests.models import Response 11 | 12 | from chaotic import ChaoticFactory 13 | from chaotic.cloud import Chaotic 14 | from chaotic.log import log 15 | from chaotic.version import __version__ 16 | 17 | 18 | def app() -> None: 19 | print("") 20 | try: 21 | config: dict = dict() 22 | config_source: str = os.getenv('CHAOTIC_CONFIG', 'config.yaml') 23 | 24 | if config_source.startswith("http"): 25 | res: Response = requests.get( 26 | url=config_source, 27 | ) 28 | res.raise_for_status() 29 | config = res.json() 30 | 31 | elif config_source.endswith(('.yaml', '.yml')): 32 | with open(config_source, "r") as infile: 33 | config = yaml.load(infile, Loader=yaml.FullLoader) 34 | infile.close() 35 | 36 | elif config_source.endswith('json'): 37 | with open(config_source, "r") as infile: 38 | config = json.load(infile) 39 | infile.close() 40 | 41 | if not config: 42 | raise Exception("Empty config file") 43 | 44 | if 'kind' not in config: 45 | raise Exception("No kind defined") 46 | 47 | chaos_factory: ChaoticFactory = ChaoticFactory() 48 | chaos = chaos_factory.get_instance(config['kind']) 49 | chaos.configure( 50 | configs=config.get('configs') or dict(), 51 | dry_run=config.get('dry_run') or False, 52 | excludes=config.get('excludes') or dict(), 53 | ) 54 | chaos.action() 55 | except Exception as ex: 56 | log.error(ex) 57 | sys.exit(1) 58 | 59 | def run_periodic(interval: int = 1) -> None: 60 | log.info(f"Running periodic in intervals of {interval} minute") 61 | schedule.every(interval).minutes.do(app) 62 | time.sleep(1) 63 | schedule.run_all() 64 | while True: 65 | schedule.run_pending() 66 | sys.stdout.write(".") 67 | sys.stdout.flush() 68 | time.sleep(1) 69 | 70 | def main() -> None: 71 | parser: ArgumentParser = ArgumentParser() 72 | parser.add_argument("--periodic", help="run periodic", action="store_true") 73 | parser.add_argument("--interval", help="set interval in minutes", type=int, default=1) 74 | parser.add_argument("--version", help="show version", action="store_true") 75 | args = parser.parse_args() 76 | 77 | if args.version: 78 | print(f"version {__version__}") 79 | sys.exit(0) 80 | 81 | log.info(f"Starting version {__version__}") 82 | 83 | if args.periodic: 84 | try: 85 | run_periodic(args.interval) 86 | except KeyboardInterrupt: 87 | print("") 88 | log.info(f"Stopping...") 89 | schedule.clear() 90 | log.info(f"done") 91 | pass 92 | else: 93 | app() 94 | 95 | if __name__ == "__main__": 96 | main() 97 | -------------------------------------------------------------------------------- /chaotic/cloud/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from datetime import datetime 3 | from chaotic.log import log 4 | 5 | class Chaotic(ABC): 6 | 7 | def configure(self, configs: dict, dry_run: bool, excludes: dict) -> None: 8 | self.configs = configs 9 | self.dry_run = dry_run 10 | self.excludes = excludes 11 | if self.dry_run: 12 | log.info(f"Running in dry-run") 13 | self._handle_excludes() 14 | 15 | def _handle_excludes(self) -> None: 16 | if 'days_of_year' in self.excludes: 17 | today = datetime.today().strftime('%b%d') 18 | if today in self.excludes['days_of_year']: 19 | log.info(f"Today '{today}' in days_of_year excludes, running dry-run") 20 | self.dry_run = True 21 | 22 | if 'weekdays' in self.excludes: 23 | today = datetime.today().strftime('%a') 24 | if today in self.excludes['weekdays']: 25 | log.info(f"Today '{today}' in weekday excludes, running dry-run") 26 | self.dry_run = True 27 | 28 | if 'times_of_day' in self.excludes: 29 | now = datetime.now().time() 30 | for time_range in self.excludes['times_of_day']: 31 | start, end = time_range.split('-') 32 | start_time = datetime.strptime(start, "%H:%M").time() 33 | end_time = datetime.strptime(end, "%H:%M").time() 34 | if start_time > end_time: 35 | end_of_day = datetime.strptime("23:59", "%H:%M").time() 36 | if start_time <= now <= end_of_day: 37 | log.info(f"Exclude {start_time}-{end_time}") 38 | log.info(f"{now} in time of day excludes, running dry-run") 39 | self.dry_run = True 40 | 41 | start_of_day = datetime.strptime("00:01", "%H:%M").time() 42 | if start_of_day <= now <= end_time: 43 | log.info(f"Exclude {start_time}-{end_time}") 44 | log.info(f"{now} in time of day excludes, running dry-run") 45 | self.dry_run = True 46 | else: 47 | if start_time <= now <= end_time: 48 | log.info(f"Exclude {start_time}-{end_time}") 49 | log.info(f"{now} in time of day excludes, running dry-run") 50 | self.dry_run = True 51 | 52 | @abstractmethod 53 | def action(self) -> None: 54 | pass 55 | -------------------------------------------------------------------------------- /chaotic/cloud/cloudscale_ch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | 5 | from cloudscale import Cloudscale, CloudscaleApiException 6 | 7 | from chaotic.cloud import Chaotic 8 | from chaotic.log import log 9 | 10 | CLOUDSCALE_API_TOKEN: str = os.getenv('CLOUDSCALE_API_TOKEN', "") 11 | 12 | class CloudscaleChChaotic(Chaotic): 13 | 14 | def __init__(self) -> None: 15 | super().__init__() 16 | self.cloudscale = Cloudscale(api_token=CLOUDSCALE_API_TOKEN) 17 | 18 | 19 | def action(self) -> None: 20 | filter_tag = self.configs.get('filter_tag') 21 | log.info(f"Querying with filter_tag: {filter_tag}") 22 | servers = self.cloudscale.server.get_all(filter_tag=filter_tag) 23 | if servers: 24 | server = random.choice(servers) 25 | log.info(f"Choose server {server['name']}") 26 | if not self.dry_run: 27 | log.info(f"Stopping server {server['name']}") 28 | self.cloudscale.server.stop(uuid=server['uuid']) 29 | 30 | wait_before_restart = int(self.configs.get('wait_before_restart', 60)) 31 | log.info(f"Sleeping for {wait_before_restart} seconds") 32 | time.sleep(wait_before_restart) 33 | 34 | log.info(f"Starting server {server['name']}") 35 | self.cloudscale.server.start(uuid=server['uuid']) 36 | else: 37 | log.info("No servers found") 38 | 39 | log.info(f"done") 40 | -------------------------------------------------------------------------------- /chaotic/cloud/cloudstack.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | 5 | from cs import CloudStack 6 | 7 | from chaotic.cloud import Chaotic 8 | from chaotic.log import log 9 | 10 | CLOUDSTACK_API_ENDPOINT: str = os.getenv('CLOUDSTACK_API_ENDPOINT', "") 11 | CLOUDSTACK_API_KEY: str = os.getenv('CLOUDSTACK_API_KEY', "") 12 | CLOUDSTACK_API_SECRET: str = os.getenv('CLOUDSTACK_API_SECRET', "") 13 | 14 | 15 | class CloudStackChaotic(Chaotic): 16 | 17 | def __init__(self) -> None: 18 | self.cs = CloudStack( 19 | endpoint=CLOUDSTACK_API_ENDPOINT, 20 | key=CLOUDSTACK_API_KEY, 21 | secret=CLOUDSTACK_API_SECRET, 22 | ) 23 | 24 | def action(self) -> None: 25 | tag = self.configs.get("tag") 26 | if not tag: 27 | return 28 | 29 | log.info(f"Querying with tag: {tag['key']}={tag['value']}") 30 | 31 | instances = self.cs.listVirtualMachines( 32 | tags=[tag], 33 | projectid=self.configs.get('projectid'), 34 | zoneid=self.configs.get('zoneid'), 35 | fetch_list=True, 36 | ) 37 | if instances: 38 | instance = random.choice(instances) 39 | log.info(f"Choose server {instance['name']}") 40 | if not self.dry_run: 41 | log.info(f"Stopping server {instance['name']}") 42 | self.cs.stopVirtualMachine(id=instance['id']) 43 | wait_before_restart = int(self.configs.get('wait_before_restart', 60)) 44 | log.info(f"Sleeping for {wait_before_restart} seconds") 45 | time.sleep(wait_before_restart) 46 | 47 | log.info(f"Starting server {instance['name']}") 48 | self.cs.startVirtualMachine(id=instance['id']) 49 | else: 50 | log.info("No servers found") 51 | 52 | log.info(f"done") 53 | -------------------------------------------------------------------------------- /chaotic/cloud/digitalocean.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time 3 | 4 | import digitalocean 5 | 6 | from chaotic.cloud import Chaotic 7 | from chaotic.log import log 8 | 9 | 10 | class DigitaloceanChaotic(Chaotic): 11 | 12 | def __init__(self) -> None: 13 | super().__init__() 14 | self.do = digitalocean.Manager() 15 | 16 | def action(self) -> None: 17 | tag = self.configs.get('tag') 18 | log.info(f"Querying with tag: {tag}") 19 | droplets = self.do.get_all_droplets(tag_name=tag) 20 | 21 | if droplets: 22 | droplet = random.choice(droplets) 23 | log.info(f"Choose server {droplet.name}") 24 | if not self.dry_run: 25 | log.info(f"Stopping server {droplet.name}") 26 | droplet.shutdown() 27 | 28 | wait_before_restart = int(self.configs.get('wait_before_restart', 60)) 29 | log.info(f"Sleeping for {wait_before_restart} seconds") 30 | time.sleep(wait_before_restart) 31 | 32 | log.info(f"Starting server {droplet.name}") 33 | droplet.power_on() 34 | 35 | else: 36 | log.info("No servers found") 37 | 38 | log.info(f"done") 39 | -------------------------------------------------------------------------------- /chaotic/cloud/hcloud.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | 5 | from hcloud import Client 6 | 7 | from chaotic.cloud import Chaotic 8 | from chaotic.log import log 9 | 10 | HCLOUD_API_TOKEN: str = os.getenv("HCLOUD_API_TOKEN", "") 11 | 12 | class HcloudChaotic(Chaotic): 13 | 14 | def __init__(self) -> None: 15 | super().__init__() 16 | self.hcloud = Client(token=HCLOUD_API_TOKEN) 17 | 18 | def action(self) -> None: 19 | label = self.configs.get('label') 20 | log.info(f"Querying with label: {label}") 21 | servers = self.hcloud.servers.get_all(label_selector=label) 22 | 23 | if servers: 24 | server = random.choice(servers) 25 | log.info(f"Choose server {server.name}") 26 | if not self.dry_run: 27 | log.info(f"Stopping server {server.name}") 28 | self.hcloud.servers.power_off(server) 29 | 30 | wait_before_restart = int(self.configs.get('wait_before_restart', 60)) 31 | log.info(f"Sleeping for {wait_before_restart} seconds") 32 | time.sleep(wait_before_restart) 33 | 34 | log.info(f"Starting server {server.name}") 35 | self.hcloud.servers.power_on(server) 36 | else: 37 | log.info("No servers found") 38 | 39 | log.info(f"done") 40 | -------------------------------------------------------------------------------- /chaotic/cloud/nomad.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | from typing import List, Optional 5 | 6 | import requests 7 | 8 | from chaotic.cloud import Chaotic 9 | from chaotic.log import log 10 | 11 | NOMAD_ADDR: str = os.getenv("NOMAD_ADDR", "") 12 | NOMAD_TOKEN: str = os.getenv("NOMAD_TOKEN", "") 13 | NOMAD_HTTP_AUTH: str = os.getenv("NOMAD_HTTP_AUTH", "") 14 | 15 | 16 | class Nomad: 17 | def __init__(self, api_key: str, api_url: Optional[str] = None, api_auth: Optional[str] = None) -> None: 18 | self.api_key = api_key 19 | self.api_url = api_url or "http://127.0.0.1:4646" 20 | self.api_auth = tuple(api_auth.split(":")) if api_auth else None 21 | 22 | def query_api(self, method: str, path: str, params: Optional[dict] = None, json: Optional[dict] = None) -> requests.Response: 23 | r = requests.request( 24 | method=method, 25 | url=f"{self.api_url}/v1/{path}", 26 | headers={ 27 | "X-Nomad-Token": self.api_key, 28 | "Content-Type": "application/json", 29 | }, 30 | auth=self.api_auth, 31 | params=params, 32 | json=json, 33 | timeout=10, 34 | ) 35 | r.raise_for_status() 36 | return r 37 | 38 | def list_nodes(self) -> List[dict]: 39 | r = self.query_api("get", "nodes") 40 | nodes = [node for node in r.json() if not node["Drain"] and node["SchedulingEligibility"] == "eligible"] 41 | return nodes 42 | 43 | def drain_node(self, node_id: str, deadline_seconds: int = 10, ignore_system_jobs: bool = True) -> None: 44 | json = { 45 | "DrainSpec": { 46 | "Deadline": deadline_seconds * 60 * 10**8, 47 | "IgnoreSystemJobs": ignore_system_jobs, 48 | }, 49 | "Meta": { 50 | "message": "drained by chaotic", 51 | }, 52 | } 53 | self.query_api("post", f"node/{node_id}/drain", json=json) 54 | 55 | def set_node_eligibility(self, node_id: str, eligible: bool = True) -> None: 56 | json = { 57 | "Eligibility": "eligible" if eligible else "ineligible", 58 | } 59 | self.query_api("post", f"node/{node_id}/eligibility", json=json) 60 | 61 | def list_allocs(self, namespace: Optional[str] = None) -> List[dict]: 62 | params = { 63 | "namespace": namespace, 64 | } 65 | r = self.query_api("get", "allocations", params=params) 66 | return r.json() 67 | 68 | def read_alloc(self, alloc_id: str) -> dict: 69 | r = self.query_api("get", f"allocation/{alloc_id}") 70 | return r.json() 71 | 72 | def signal_alloc(self, alloc_id: str, signal: str) -> None: 73 | json = { 74 | "Signal": signal, 75 | } 76 | self.query_api("post", f"client/allocation/{alloc_id}/signal", json=json) 77 | 78 | def list_namespaces(self, prefix: Optional[str] = None) -> List[dict]: 79 | params = { 80 | "prefix": prefix, 81 | } 82 | r = self.query_api("get", "namespaces", params=params) 83 | return r.json() 84 | 85 | 86 | class NomadChaotic(Chaotic): 87 | def __init__(self) -> None: 88 | super().__init__() 89 | self.nomad = Nomad( 90 | api_key=NOMAD_TOKEN, 91 | api_url=NOMAD_ADDR, 92 | api_auth=NOMAD_HTTP_AUTH, 93 | ) 94 | 95 | def get_namespace(self) -> str: 96 | namespaces = [ns["Name"] for ns in self.nomad.list_namespaces()] 97 | 98 | allowed_ns = self.configs.get("namespace_allowlist") 99 | if allowed_ns is not None: 100 | namespaces = [ns for ns in namespaces if ns in allowed_ns] 101 | 102 | denied_ns = self.configs.get("namespace_denylist") 103 | if denied_ns is not None: 104 | namespaces = [ns for ns in namespaces if ns not in denied_ns] 105 | 106 | if not namespaces: 107 | log.info(f"No namespaces eligible") 108 | return "" 109 | 110 | namespace = random.choice(namespaces) 111 | 112 | log.info(f"Selected namespace: {namespace}") 113 | return namespace 114 | 115 | def is_opt_out(self, alloc_id: str) -> bool: 116 | opt_in_key = self.configs.get("job_meta_opt_key") 117 | if opt_in_key: 118 | alloc_details = self.nomad.read_alloc(alloc_id=alloc_id) 119 | job_meta = alloc_details["Job"]["Meta"] 120 | if job_meta: 121 | opt_in = job_meta.get(opt_in_key) 122 | return opt_in is not None and (not opt_in or opt_in == "false") 123 | return False 124 | 125 | def action(self) -> None: 126 | experiments = self.configs.get("experiments", ["job"]) 127 | exp = random.choice(experiments) 128 | log.info(f"Running experiment {exp}") 129 | method_name = f"action_{exp}" 130 | func = getattr(self, method_name) 131 | if func: 132 | func() 133 | 134 | def action_job(self) -> None: 135 | namespace = self.get_namespace() 136 | if namespace: 137 | allocs = [alloc for alloc in self.nomad.list_allocs(namespace=namespace) if alloc["ClientStatus"] == "running"] 138 | 139 | job_type_skiplist = self.configs.get("job_type_skiplist") 140 | if job_type_skiplist: 141 | allocs = [alloc for alloc in allocs if alloc["JobType"] not in job_type_skiplist] 142 | 143 | job_skiplist = self.configs.get("job_skiplist") 144 | if job_skiplist: 145 | allocs = [alloc for alloc in allocs if alloc["JobID"] not in job_skiplist] 146 | 147 | if allocs: 148 | alloc = random.choice(allocs) 149 | log.info(f"Selected alloc: {alloc['Name']} (ID: {alloc['ID']}) on {alloc['NodeName']}") 150 | if not self.is_opt_out(alloc_id=alloc["ID"]): 151 | signal = random.choice(self.configs["signals"]) 152 | log.info(f"Selected signal: {signal}") 153 | if not self.dry_run: 154 | self.nomad.signal_alloc(alloc_id=alloc["ID"], signal=signal) 155 | else: 156 | log.info("Job is opt-out configured, skipping") 157 | 158 | else: 159 | log.info("No allocs found") 160 | 161 | log.info(f"done") 162 | 163 | def action_node(self) -> None: 164 | nodes = self.nomad.list_nodes() 165 | 166 | node_skiplist = self.configs.get("node_skiplist") 167 | if node_skiplist: 168 | nodes = [node for node in nodes if node["Name"] not in node_skiplist] 169 | 170 | node_class_skiplist = self.configs.get("node_class_skiplist") 171 | if node_class_skiplist: 172 | nodes = [node for node in nodes if node["NodeClass"] not in node_class_skiplist] 173 | 174 | if nodes: 175 | # How many nodes to drain in this run 176 | node_drain_amount_in_percent = int(self.configs.get("node_drain_amount_in_percent", 0)) 177 | amount_of_nodes = 1 178 | if node_drain_amount_in_percent and node_drain_amount_in_percent > 0: 179 | amount_of_nodes = round(len(nodes) * node_drain_amount_in_percent / 100) or 1 180 | 181 | nodes_drain = nodes.copy() 182 | nodes_eligible = list() 183 | for i in range(amount_of_nodes): 184 | node = nodes_drain.pop(random.randrange(len(nodes_drain))) 185 | nodes_eligible.append(node) 186 | 187 | log.info(f"Drain node: {node['Name']}") 188 | 189 | if not self.dry_run: 190 | deadline_seconds = int(self.configs.get("node_drain_deadline_seconds", 10)) 191 | ignore_system_jobs = not bool(self.configs.get("node_drain_system_jobs", False)) 192 | self.nomad.drain_node( 193 | node_id=node["ID"], 194 | deadline_seconds=deadline_seconds, 195 | ignore_system_jobs=ignore_system_jobs, 196 | ) 197 | 198 | node_wait_for = int(self.configs.get("node_wait_for", 60)) 199 | log.info(f"Sleeping for {node_wait_for} seconds") 200 | if not self.dry_run: 201 | time.sleep(node_wait_for) 202 | 203 | for i in range(amount_of_nodes): 204 | node = nodes_eligible.pop(random.randrange(len(nodes_eligible))) 205 | log.info(f"Set node to be eligible: {node['Name']}") 206 | if not self.dry_run: 207 | self.nomad.set_node_eligibility( 208 | node_id=node["ID"], 209 | eligible=True, 210 | ) 211 | 212 | log.info(f"done") 213 | -------------------------------------------------------------------------------- /chaotic/cloud/proxmox_kvm.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | 5 | from proxmoxer import ProxmoxAPI 6 | 7 | from chaotic.cloud import Chaotic 8 | from chaotic.log import log 9 | 10 | PROXMOX_API_HOST: str = os.getenv("PROXMOX_API_HOST", "") 11 | PROXMOX_API_USER: str = os.getenv("PROXMOX_API_USER", 'root@pam') 12 | PROXMOX_API_PASSWORD: str = os.getenv("ROXMOX_API_PASSWORD", "") 13 | PROXMOX_API_VERIFY_SSL: bool = bool(os.getenv('PROXMOX_API_VERIFY_SSL', False)) 14 | 15 | 16 | class ProxmoxKvmChaotic(Chaotic): 17 | 18 | def __init__(self) -> None: 19 | super().__init__() 20 | log.info(f"Proxmox host: {PROXMOX_API_HOST}") 21 | log.info(f"Proxmox user: {PROXMOX_API_USER}") 22 | 23 | self.pve = ProxmoxAPI( 24 | host=PROXMOX_API_HOST, 25 | user=PROXMOX_API_USER, 26 | password=PROXMOX_API_PASSWORD, 27 | verify_ssl=PROXMOX_API_VERIFY_SSL 28 | ) 29 | 30 | def action(self) -> None: 31 | vms = self.pve.cluster.resources.get(type='vm') 32 | 33 | denylist = self.configs.get('denylist') or [] 34 | vms = [vm for vm in vms if vm['status'] == "running" and vm['name'] not in denylist] 35 | 36 | if vms: 37 | vm = random.choice(vms) 38 | log.info(f"Choose VM ID={vm['vmid']}, name={vm['name']} on node={vm['node']}") 39 | 40 | min_uptime = self.configs.get('min_uptime') 41 | if min_uptime is not None: 42 | current = self.pve.nodes(vm['node']).qemu(vm['vmid']).status.current.get() 43 | required_uptime = min_uptime * 60 44 | if current['uptime'] < required_uptime: 45 | log.info(f"VM {vm['name']} required uptime lower then {min_uptime} min: {current['uptime'] / 60:.2f}, skipping") 46 | log.info(f"done") 47 | return 48 | 49 | if not self.dry_run: 50 | log.info(f"Stopping VM {vm['name']}") 51 | self.pve.nodes(vm['node']).qemu(vm['vmid']).status.shutdown.post(forceStop=1) 52 | 53 | wait_before_restart = int(self.configs.get('wait_before_restart', 60)) 54 | log.info(f"Sleeping for {wait_before_restart} seconds") 55 | time.sleep(wait_before_restart) 56 | 57 | log.info(f"Starting VM {vm['name']}") 58 | self.pve.nodes(vm['node']).qemu(vm['vmid']).status.start.post() 59 | 60 | else: 61 | log.info("No VMs found") 62 | 63 | log.info(f"done") 64 | -------------------------------------------------------------------------------- /chaotic/cloud/vultr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | from typing import List, Optional 5 | 6 | import requests 7 | 8 | from chaotic.cloud import Chaotic 9 | from chaotic.log import log 10 | 11 | VULTR_API_KEY: str = os.getenv('VULTR_API_KEY', "") 12 | 13 | 14 | class Vultr: 15 | 16 | VULTR_API_URL: str = "https://api.vultr.com/v2" 17 | 18 | def __init__(self, api_key: str) -> None: 19 | self.api_key = api_key 20 | 21 | def query_api(self, method: str, path: str, params: Optional[dict] = None, json: Optional[dict] = None) -> requests.Response: 22 | r = requests.request( 23 | method=method, 24 | url=f"{self.VULTR_API_URL}/{path}", 25 | headers={ 26 | 'Authorization': f"Bearer {self.api_key}", 27 | 'Content-Type': "application/json", 28 | }, 29 | params=params, 30 | json=json, 31 | timeout=10, 32 | ) 33 | r.raise_for_status() 34 | return r 35 | 36 | def list_instances(self, tag=None, label=None) -> List[dict]: 37 | params = { 38 | 'tag': tag, 39 | 'label': label, 40 | } 41 | r = self.query_api('get', 'instances', params=params) 42 | return r.json().get('instances', dict()) 43 | 44 | def halt_instances(self, instance_ids: List[str]) -> None: 45 | json = { 46 | 'instance_ids': instance_ids, 47 | } 48 | self.query_api('post', f'instances/halt', json=json) 49 | 50 | def halt_instance(self, instance_id: str) -> None: 51 | self.halt_instances(instance_ids=[instance_id]) 52 | 53 | def start_instance(self, instance_id: str) -> None: 54 | self.query_api('post', f'instances/{instance_id}/start') 55 | 56 | 57 | class VultrChaotic(Chaotic): 58 | 59 | def __init__(self) -> None: 60 | super().__init__() 61 | self.vultr = Vultr(api_key=VULTR_API_KEY) 62 | 63 | def action(self) -> None: 64 | tag = self.configs.get('tag') 65 | log.info(f"Querying with tag: {tag}") 66 | instances = self.vultr.list_instances(tag=tag) 67 | 68 | if instances: 69 | instance = random.choice(instances) 70 | log.info(f"Choose server {instance['label']}") 71 | if not self.dry_run: 72 | log.info(f"Stopping server {instance['label']}") 73 | self.vultr.halt_instance(instance['id']) 74 | 75 | wait_before_restart = int(self.configs.get('wait_before_restart', 60)) 76 | log.info(f"Sleeping for {wait_before_restart} seconds") 77 | time.sleep(wait_before_restart) 78 | 79 | log.info(f"Starting server {instance['label']}") 80 | self.vultr.start_instance(instance['id']) 81 | else: 82 | log.info("No servers found") 83 | 84 | log.info(f"done") 85 | -------------------------------------------------------------------------------- /chaotic/log.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import logging 4 | from logging.config import fileConfig 5 | from dotenv import load_dotenv 6 | from pathlib import Path 7 | 8 | env_path = Path('.') / '.env' 9 | load_dotenv(dotenv_path=env_path) 10 | 11 | logging_config_file_path = os.environ.get('CHAOTIC_LOG_CONFIG', 'logging.ini') 12 | 13 | logging_config = Path(logging_config_file_path) 14 | if logging_config.is_file(): 15 | fileConfig(logging_config_file_path) 16 | else: 17 | logging.basicConfig( 18 | stream=sys.stdout, 19 | level=os.environ.get('CHAOTIC_LOG_LEVEL', 'INFO').upper(), 20 | format='%(asctime)s - %(name)s:%(levelname)s:%(message)s') 21 | 22 | log = logging.getLogger('chaotic') 23 | log.debug('Init') 24 | -------------------------------------------------------------------------------- /chaotic/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.14.1" 2 | -------------------------------------------------------------------------------- /docker/config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: unset 3 | dry_run: true 4 | configs: {} 5 | -------------------------------------------------------------------------------- /examples/config_cloudscale_ch.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: cloudscale_ch 3 | dry_run: false 4 | configs: 5 | filter_tag: "chaos=enabled" 6 | -------------------------------------------------------------------------------- /examples/config_cloudstack.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: cloudstack 3 | dry_run: true 4 | configs: 5 | tag: 6 | key: chaos 7 | value: enabled 8 | # zoneid: xyz 9 | # projectid: 10 | -------------------------------------------------------------------------------- /examples/config_digitalocean.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: digitalocean 3 | dry_run: false 4 | configs: 5 | tag: "chaos:enabled" 6 | -------------------------------------------------------------------------------- /examples/config_hcloud.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: hcloud 3 | dry_run: false 4 | configs: 5 | label: "chaos=enabled" 6 | -------------------------------------------------------------------------------- /examples/config_nomad.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: nomad 3 | dry_run: true 4 | excludes: 5 | weekdays: 6 | - Sun 7 | - Sat 8 | times_of_day: 9 | - 22:00-08:00 10 | - 11:00-14:00 11 | days_of_year: 12 | - Jan01 13 | - Apr01 14 | - May01 15 | - Aug01 16 | - Dec24 17 | configs: 18 | experiments: 19 | - job 20 | - node 21 | 22 | ## Job experiments configs 23 | namespace_denylist: 24 | - default 25 | signals: 26 | - SIGKILL 27 | job_type_skiplist: 28 | - system 29 | - batch 30 | - sysbatch 31 | job_skiplist: 32 | - prometheus 33 | # Add a meta tag on in your nomad job "chaotic" = False to opt-out 34 | job_meta_opt_key: chaotic 35 | 36 | ## Node experiments configs 37 | node_drain_deadline_seconds: 15 38 | node_class_skiplist: 39 | - storage 40 | node_skiplist: 41 | - node1 42 | - node2 43 | node_wait_for: 100 44 | node_drain_system_jobs: true 45 | node_drain_amount_in_percent: 66 46 | -------------------------------------------------------------------------------- /examples/config_proxmox_kvm.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: proxmox_kvm 3 | dry_run: false 4 | configs: 5 | 6 | # Optional: Do not shutdown VMs having a lower uptime in minutes 7 | min_uptime: 60 8 | 9 | # Optional: Do not shutdown VMs in this name list 10 | denylist: 11 | - my-single-vm 12 | -------------------------------------------------------------------------------- /examples/config_vultr.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: vultr 3 | dry_run: true 4 | configs: 5 | tag: "chaos=enabled" 6 | -------------------------------------------------------------------------------- /logging.ini: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root 3 | 4 | [handlers] 5 | keys=stream_handler 6 | 7 | [formatters] 8 | keys=json 9 | 10 | [logger_root] 11 | level=DEBUG 12 | handlers=stream_handler 13 | 14 | [handler_stream_handler] 15 | class=StreamHandler 16 | level=DEBUG 17 | formatter=json 18 | args=(sys.stderr,) 19 | 20 | [formatter_json] 21 | format = %(asctime)s %(name)-12s %(levelname)-8s %(message)s 22 | class = pythonjsonlogger.jsonlogger.JsonFormatter 23 | -------------------------------------------------------------------------------- /requirements.dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngine-io/chaotic/4242caa7c7444af04b6be83679f14750b19f28bb/requirements.dev.txt -------------------------------------------------------------------------------- /requirements.in: -------------------------------------------------------------------------------- 1 | cloudscale-sdk 2 | hcloud 3 | proxmoxer<3.0 4 | python-digitalocean 5 | python-dotenv 6 | python-json-logger 7 | pyyaml 8 | requests 9 | schedule 10 | cs 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2024.8.30 2 | charset-normalizer==3.4.0 3 | cloudscale-sdk==0.7.0 4 | cs==3.3.1 5 | hcloud==2.5.1 6 | idna==3.10 7 | jsonpickle==3.3.0 8 | proxmoxer==2.2.0 9 | python-dateutil==2.9.0.post0 10 | python-digitalocean==1.17.0 11 | python-dotenv==1.1.0 12 | python-json-logger==2.0.7 13 | pytz==2024.2 14 | pyyaml==6.0.2 15 | requests==2.32.3 16 | schedule==1.2.2 17 | six==1.16.0 18 | urllib3==2.2.3 19 | xdg==6.0.0 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from setuptools import find_packages, setup 5 | 6 | with open("README.md", "r") as fh: 7 | long_description = fh.read() 8 | 9 | install_requires = [] 10 | with open("requirements.txt", "r", encoding="utf-8") as f: 11 | install_requires = list(i.rstrip() for i in f.readlines()) 12 | 13 | tests_require = [] 14 | with open("requirements.dev.txt", "r", encoding="utf-8") as f: 15 | tests_require = list(i.rstrip() for i in f.readlines()) 16 | 17 | version = {} 18 | with open("chaotic/version.py") as fp: 19 | exec(fp.read(), version) 20 | 21 | setup( 22 | name="chaotic-ngine", 23 | version=version['__version__'], 24 | author="René Moser", 25 | author_email="mail@renemoser.net", 26 | license="MIT", 27 | description="Chaos for Clouds.", 28 | long_description=long_description, 29 | long_description_content_type="text/markdown", 30 | url="https://github.com/ngine-io/chaotic", 31 | packages=find_packages(exclude=["test.*", "tests"]), 32 | classifiers=[ 33 | "Intended Audience :: Developers", 34 | "Intended Audience :: System Administrators", 35 | "Development Status :: 4 - Beta", 36 | "Programming Language :: Python :: 3", 37 | "License :: OSI Approved :: MIT License", 38 | "Operating System :: OS Independent", 39 | "Environment :: Web Environment", 40 | ], 41 | install_requires=install_requires, 42 | tests_require=tests_require, 43 | python_requires='>=3.6', 44 | entry_points={ 45 | 'console_scripts': [ 46 | 'chaotic-ngine = chaotic.app:main', 47 | ], 48 | }, 49 | ) 50 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{310,311} 3 | skip_missing_interpreters = True 4 | skipsdist = True 5 | 6 | [gh-actions] 7 | python = 8 | 3.10: py310 9 | 3.11: py311 10 | 11 | [testenv] 12 | changedir = tests 13 | deps = 14 | -r{toxinidir}/requirements.txt 15 | -r{toxinidir}/requirements.dev.txt 16 | commands = 17 | python --version 18 | pytest -v --cov --cov-append --cov-report=xml 19 | 20 | [testenv:report] 21 | deps = coverage 22 | skip_install = true 23 | commands = 24 | coverage report 25 | coverage html 26 | 27 | [testenv:clean] 28 | deps = coverage 29 | skip_install = true 30 | commands = coverage erase 31 | --------------------------------------------------------------------------------