├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── container.yml
    │   ├── flake8.yml
    │   └── publish.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── Manifest.in
├── README.md
├── chaotic
    ├── __init__.py
    ├── app.py
    ├── cloud
    │   ├── __init__.py
    │   ├── cloudscale_ch.py
    │   ├── cloudstack.py
    │   ├── digitalocean.py
    │   ├── hcloud.py
    │   ├── nomad.py
    │   ├── proxmox_kvm.py
    │   └── vultr.py
    ├── log.py
    └── version.py
├── docker
    └── config.yaml
├── examples
    ├── config_cloudscale_ch.yaml
    ├── config_cloudstack.yaml
    ├── config_digitalocean.yaml
    ├── config_hcloud.yaml
    ├── config_nomad.yaml
    ├── config_proxmox_kvm.yaml
    └── config_vultr.yaml
├── logging.ini
├── requirements.dev.txt
├── requirements.in
├── requirements.txt
├── setup.py
└── tox.ini


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # Set update schedule for GitHub Actions
 2 | ---
 3 | version: 2
 4 | updates:
 5 |   - package-ecosystem: "github-actions"
 6 |     directory: "/"
 7 |     schedule:
 8 |       interval: "weekly"
 9 | 
10 |   - package-ecosystem: "pip"
11 |     directory: "/"
12 |     schedule:
13 |       interval: "weekly"
14 |   - package-ecosystem: "docker"
15 |     directory: "/"
16 |     schedule:
17 |       interval: "weekly"
18 | 


--------------------------------------------------------------------------------
/.github/workflows/container.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses actions that are not certified by GitHub.
 2 | # They are provided by a third-party and are governed by
 3 | # separate terms of service, privacy policy, and support
 4 | # documentation.
 5 | 
 6 | # GitHub recommends pinning actions to a commit SHA.
 7 | # To get a newer version, you will need to update the SHA.
 8 | # You can also reference a tag or branch, but the action may change without warning.
 9 | ---
10 | name: Create and publish a Container image
11 | 
12 | on:
13 |   push:
14 |     branches:
15 |       - "master"
16 |     tags:
17 |       - "v*"
18 | env:
19 |   REGISTRY: ghcr.io
20 |   IMAGE_NAME: ngine-io/chaotic
21 | 
22 | jobs:
23 |   build-and-push-image:
24 |     runs-on: ubuntu-latest
25 |     permissions:
26 |       contents: read
27 |       packages: write
28 | 
29 |     steps:
30 |       - name: Checkout repository
31 |         uses: actions/checkout@v4
32 | 
33 |       - name: Set up Python
34 |         uses: actions/setup-python@v5
35 |         with:
36 |           python-version: "3.x"
37 | 
38 |       - name: Install dependencies
39 |         run: |
40 |           python -m pip install --upgrade pip
41 |           pip install -U setuptools wheel
42 | 
43 |       - name: Build
44 |         run: |
45 |           python setup.py sdist bdist_wheel
46 | 
47 |       - name: Set up QEMU
48 |         uses: docker/setup-qemu-action@v3
49 | 
50 |       - name: Set up Docker Buildx
51 |         uses: docker/setup-buildx-action@v3
52 | 
53 |       - name: Log in to the Container registry
54 |         uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772
55 |         with:
56 |           registry: ${{ env.REGISTRY }}
57 |           username: ${{ github.actor }}
58 |           password: ${{ secrets.GITHUB_TOKEN }}
59 | 
60 |       - name: Extract metadata (tags, labels) for Docker
61 |         id: meta
62 |         uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804
63 |         with:
64 |           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
65 |           tags: |
66 |             type=ref,event=branch
67 |             type=semver,pattern={{version}}
68 |             type=semver,pattern={{major}}.{{minor}}
69 | 
70 |       - name: Build and push Docker image
71 |         uses: docker/build-push-action@1dc73863535b631f98b2378be8619f83b136f4a0
72 |         with:
73 |           context: .
74 |           push: true
75 |           pull: true
76 |           platforms: linux/arm64,linux/amd64
77 |           tags: ${{ steps.meta.outputs.tags }}
78 |           labels: ${{ steps.meta.outputs.labels }}
79 | 


--------------------------------------------------------------------------------
/.github/workflows/flake8.yml:
--------------------------------------------------------------------------------
 1 | name: Flake8
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - uses: actions/checkout@v4
11 |     - name: GitHub Action for Flake8
12 |       uses: cclauss/GitHub-Action-for-Flake8@v0.5.0
13 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Upload Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   deploy:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v4
12 |       - name: Set up Python
13 |         uses: actions/setup-python@v5
14 |         with:
15 |           python-version: "3.x"
16 |       - name: Install dependencies
17 |         run: |
18 |           python -m pip install --upgrade pip
19 |           pip install -U setuptools wheel twine
20 |       - name: Build and publish
21 |         env:
22 |           TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
23 |           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
24 |         run: |
25 |           python setup.py sdist bdist_wheel
26 |           twine upload dist/*
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.egg-info
3 | build
4 | dist
5 | .env
6 | .venv
7 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM docker.io/python:3.13.3-slim
 2 | 
 3 | WORKDIR /build
 4 | COPY . .
 5 | 
 6 | RUN pip install .
 7 | 
 8 | WORKDIR /app
 9 | 
10 | RUN rm -rf /build
11 | COPY ./docker/config.yaml .
12 | 
13 | USER 1000
14 | 
15 | ENTRYPOINT ["chaotic-ngine"]
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2021 - René Moser
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | clean:
 2 | 	rm -rf *.egg-info
 3 | 	rm -rf *.dist-info
 4 | 	rm -rf dist
 5 | 	rm -rf build
 6 | 	find -name '__pycache__' -exec rm -fr {} || true \;
 7 | 
 8 | build: clean
 9 | 	python3 setup.py sdist bdist_wheel
10 | 
11 | test-release:
12 | 	twine upload --repository testpypi dist/*
13 | 
14 | release:
15 | 	twine upload dist/*
16 | 
17 | test:
18 | 	tox
19 | 
20 | update:
21 | 	pip-compile -U --no-header --no-annotate --strip-extras --resolver backtracking
22 | 	pip-sync
23 | 


--------------------------------------------------------------------------------
/Manifest.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | include *.yml
3 | include tox.ini
4 | graft tests
5 | global-exclude *.py[cod]
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![license](https://img.shields.io/pypi/l/chaotic-ngine.svg)
  2 | ![python versions](https://img.shields.io/pypi/pyversions/chaotic-ngine.svg)
  3 | ![status](https://img.shields.io/pypi/status/chaotic-ngine.svg)
  4 | [![pypi version](https://img.shields.io/pypi/v/chaotic-ngine.svg)](https://pypi.org/project/chaotic-ngine/)
  5 | ![PyPI - Downloads](https://img.shields.io/pypi/dw/chaotic-ngine)
  6 | 
  7 | # Chaotic - Chaos for Clouds
  8 | 
  9 | Chaotic evaluates a plan, how it will bring chaos in your Cloud environment.
 10 | 
 11 | Depending on the Cloud API used, it may kill allocations (Hashicorp Nomad), reboot or stop/start virtual machines in your Cloud environment.
 12 | 
 13 | With no arguments given, Chaotic runs as a "one shot" meant to be executed as cron job. Passing `--periodic` runs it as daemon with configurable interval `--interval 5` in minutes (1 is the default).
 14 | NOTE: The config is re-read on every interval, no need to restart the service after changing the config.
 15 | 
 16 | ## Clouds
 17 | 
 18 | Currently implemented Clouds:
 19 | 
 20 | - DigitalOcean
 21 | - Vultr
 22 | - Hetzner Cloud
 23 | - Proxmox KVM
 24 | - CloudStack
 25 | - Hashicorp Nomad
 26 | - cloudscale.ch
 27 | 
 28 | ## Install
 29 | 
 30 | ```
 31 | pip3 install -U chaotic-ngine
 32 | ```
 33 | 
 34 | ## Configure
 35 | 
 36 | Create a file named `config.yaml` or use the env var `CHAOTIC_CONFIG` to point to a config file (also see the example directory):
 37 | 
 38 | ```
 39 | export CHAOTIC_CONFIG=config_nomad.yaml
 40 | ```
 41 | 
 42 | ### Exclude times
 43 | 
 44 | Define times when the bot should not doing real actions (it will run in dry-run):
 45 | 
 46 | ```yaml
 47 | ---
 48 | kind: ...
 49 | excludes:
 50 |   weekdays:
 51 |     - Sun
 52 |     - Sat
 53 |   times_of_day:
 54 |     - 22:00-08:00
 55 |     - 11:00-14:00
 56 |   days_of_year:
 57 |     - Jan01
 58 |     - Apr01
 59 |     - May01
 60 |     - Aug01
 61 |     - Dec24
 62 |   ```
 63 | 
 64 | ### CloudStack
 65 | 
 66 | Chaotic will stop a server selected by an optional filter tag and stop/start it with a delay of a configurable time (default 60s).
 67 | 
 68 | ```
 69 | export CLOUDSTACK_API_KEY="..."
 70 | export CLOUDSTACK_API_SECRET="..."
 71 | export CLOUDSTACK_API_ENDPOINT="..."
 72 | ```
 73 | 
 74 | ```yaml
 75 | ---
 76 | kind: cloudstack
 77 | dry_run: false
 78 | configs:
 79 | 
 80 |   # Optional, filter tag
 81 |   tag:
 82 |     key: chaos
 83 |     value: enabled
 84 | 
 85 |   # Optional, 60 seconds is the default
 86 |   wait_before_restart: 60
 87 | ```
 88 | 
 89 | ### Vultr
 90 | 
 91 | Chaotic will stop a server selected by an optional filter tag and stop/start it with a delay of a configurable time (default 60s).
 92 | 
 93 | ```
 94 | export VULTR_API_KEY="..."
 95 | ```
 96 | 
 97 | ```yaml
 98 | ---
 99 | kind: vultr
100 | dry_run: true
101 | configs:
102 | 
103 |   # Optional instance tag filter
104 |   tag: "chaos=opt-in"
105 | 
106 |   # Optional, 60 seconds is the default
107 |   wait_before_restart: 60
108 | ```
109 | 
110 | ### Cloudscale.ch
111 | 
112 | Chaotic will stop a server selected by an optional filter tag and stop/start it with a delay of a configurable time (default 60s).
113 | 
114 | 
115 | #### Config
116 | 
117 | ```
118 | export CLOUDSCALE_API_TOKEN="..."
119 | ```
120 | 
121 | ```yaml
122 | ---
123 | kind: cloudscale_ch
124 | dry_run: true
125 | configs:
126 | 
127 |   # Optional server tag filter
128 |   filter_tag: "chaos=opt-in"
129 | 
130 |   # Optional, 60 seconds is the default
131 |   wait_before_restart: 60
132 | ```
133 | 
134 | ### Hetzner Cloud
135 | 
136 | Chaotic will stop a server selected by an optional filter label and stop/start it with a delay of a configurable time (default 60s).
137 | 
138 | #### Config
139 | 
140 | ```
141 | export HCLOUD_API_TOKEN=...
142 | ```
143 | 
144 | ```yaml
145 | ---
146 | kind: hcloud
147 | dry_run: false
148 | configs:
149 | 
150 |   # Optional server label filter
151 |   label: "chaos=enabled"
152 | 
153 |   # Optional, 60 seconds is the default
154 |   wait_before_restart: 60
155 | ```
156 | 
157 | ### DigitalOcean Cloud
158 | 
159 | Chaotic will stop a droplet selected by an optional filter tag and stop/start it with a delay of a configurable time (default 60s).
160 | 
161 | #### Config
162 | 
163 | ```
164 | export DIGITALOCEAN_ACCESS_TOKEN=...
165 | ```
166 | 
167 | ```yaml
168 | ---
169 | kind: digitalocean
170 | dry_run: false
171 | configs:
172 | 
173 |   # Optional droplet tag filter
174 |   tag: "chaos:enabled"
175 | 
176 |   # Optional, 60 seconds is the default
177 |   wait_before_restart: 60
178 | ```
179 | 
180 | ### Nomad Job
181 | 
182 | Chaotic will send an allocation signal to an allocation in the available namespaces selected by an allow list.
183 | 
184 | #### Config
185 | 
186 | ```
187 | export NOMAD_ADDR=http://nomad.example.com:4646
188 | ```
189 | 
190 | ```yaml
191 | ---
192 | kind: nomad
193 | dry_run: true
194 | configs:
195 |   experiments:
196 |     - job
197 | 
198 |   # Signals to choose from
199 |   signals:
200 |     - SIGKILL
201 | 
202 |   # Optional: namespace allowlist
203 |   namespace_allowlist:
204 |     - example-prod
205 |     - foobar-prod
206 | 
207 |   # Optional: namespace denylist
208 |   namespace_denylist:
209 |     - default
210 | 
211 |   # Optional: job type skip list
212 |   job_type_skiplist:
213 |     - system
214 |     - batch
215 |     - sysbatch
216 | 
217 |   # Optional: job name skip list
218 |   job_skiplist:
219 |     - my-job-name
220 | 
221 |   # Optional: Add a meta tag in your nomad job "chaotic" = False to opt-out
222 |   job_meta_opt_key: chaotic
223 | ```
224 | 
225 | ### Nomad Node
226 | 
227 | Chaotic will drain a node and set it to be ineligible for some time.
228 | 
229 | #### Config
230 | 
231 | ```
232 | export NOMAD_ADDR=http://nomad.example.com:4646
233 | ```
234 | 
235 | ```yaml
236 | ---
237 | kind: nomad
238 | dry_run: true
239 | configs:
240 |   experiments:
241 |     - node
242 | 
243 |   # Optional: Node drain deadline in seconds, default 10
244 |   node_drain_deadline_seconds: 15
245 | 
246 |   # Optional: Skip nodes in these classes
247 |   node_class_skiplist:
248 |     - storage
249 | 
250 |   # Optional: Skip nodes with these names
251 |   node_skiplist:
252 |     - node1
253 |     - node5
254 | 
255 |   # Optional: Wait for this amount of seconds before set node to be eligible again, default 60
256 |   node_wait_for: 100
257 | 
258 |   # Optional: Also drain system jobs, default false
259 |   node_drain_system_jobs: true
260 | 
261 |   # Optional: Drain multiple nodes in one run in percent, fallback 1 node
262 |   node_drain_amount_in_percent: 30
263 | 
264 | ```
265 | 
266 | ### Proxmox KVM
267 | 
268 | Chaotic will stop a VM stop/start it with a delay of a configurable time (default 60s).
269 | 
270 | ```
271 | export PROXMOX_API_HOST="pve1.example.com"
272 | export PROXMOX_API_USER="root@pam"
273 | export PROXMOX_API_PASSWORD="..."
274 | ```
275 | 
276 | ```yaml
277 | ---
278 | kind: proxmox_kvm
279 | dry_run: false
280 | configs:
281 | 
282 |   # Optional: Do not shutdown VMs having a lower uptime in minutes
283 |   min_uptime: 60
284 | 
285 |   # Optional: Do not shutdown VMs in this name list
286 |   denylist:
287 |     - my-single-vm
288 | 
289 |   # Optional: 60 seconds is the default
290 |   wait_before_restart: 60
291 | ```
292 | 
293 | ## Run
294 | 
295 | ### CLI
296 | ```
297 | chaos-ngine
298 | ```
299 | ### Docker
300 | 
301 | One shot:
302 | 
303 | ```
304 | docker run -ti --rm -v $PWD/examples/config_nomad.yaml:/app/config.yaml -e TZ=Europe/Zurich -e NOMAD_ADDR=$NOMAD_ADDR --name chaotic ghcr.io/ngine-io/chaotic:latest
305 | ```
306 | 
307 | As service:
308 | 
309 | ```
310 | docker run -ti --rm -v $PWD/examples/config_nomad.yaml:/app/config.yaml -e TZ=Europe/Zurich -e NOMAD_ADDR=$NOMAD_ADDR --name chaotic ghcr.io/ngine-io/chaotic:latest --periodic
311 | ```
312 | 
313 | ## Logs
314 | What you should see (e.g. for kind cloudscale.ch):
315 | ```
316 | 2021-06-09 09:01:25,433 - cloudscale.log:INFO:Started, version: 0.6.2
317 | 2021-06-09 09:01:25,433 - cloudscale.log:INFO:Using profile default
318 | 2021-06-09 09:01:25,433 - cloudscale.log:INFO:API Token used: xyz...
319 | 2021-06-09 09:01:25,433 - chatic:INFO:Querying with filter_tag: None
320 | 2021-06-09 09:01:25,433 - cloudscale.log:INFO:HTTP GET to https://api.cloudscale.ch/v1/servers
321 | 2021-06-09 09:01:25,651 - cloudscale.log:INFO:HTTP status code 200
322 | 2021-06-09 09:01:25,652 - chatic:INFO:Choose server app3
323 | 2021-06-09 09:01:25,653 - chatic:INFO:Stopping server app3
324 | 2021-06-09 09:01:25,653 - cloudscale.log:INFO:HTTP POST to https://api.cloudscale.ch/v1/servers/d5628484-a6eb-4ea9-b3ef-ba8da2bb9fe0/stop
325 | 2021-06-09 09:01:26,336 - cloudscale.log:INFO:HTTP status code 204
326 | 2021-06-09 09:01:26,336 - chatic:INFO:Sleeping for server 60
327 | 2021-06-09 09:02:26,393 - cloudscale.log:INFO:HTTP POST to https://api.cloudscale.ch/v1/servers/d5628484-a6eb-4ea9-b3ef-ba8da2bb9fe0/start
328 | 2021-06-09 09:02:26,955 - cloudscale.log:INFO:HTTP status code 204
329 | 2021-06-09 09:02:26,956 - chatic:INFO:done
330 | ```
331 | 


--------------------------------------------------------------------------------
/chaotic/__init__.py:
--------------------------------------------------------------------------------
 1 | from chaotic.cloud import Chaotic
 2 | from chaotic.cloud.cloudscale_ch import CloudscaleChChaotic
 3 | from chaotic.cloud.cloudstack import CloudStackChaotic
 4 | from chaotic.cloud.digitalocean import DigitaloceanChaotic
 5 | from chaotic.cloud.hcloud import HcloudChaotic
 6 | from chaotic.cloud.nomad import NomadChaotic
 7 | from chaotic.cloud.proxmox_kvm import ProxmoxKvmChaotic
 8 | from chaotic.cloud.vultr import VultrChaotic
 9 | from chaotic.log import log
10 | 
11 | 
12 | class ChaoticFactory:
13 | 
14 |     CLOUD_CLASSES: dict = {
15 |         "cloudscale_ch": CloudscaleChChaotic,
16 |         "cloudstack": CloudStackChaotic,
17 |         "digitalocean": DigitaloceanChaotic,
18 |         "hcloud": HcloudChaotic,
19 |         "nomad": NomadChaotic,
20 |         "proxmox_kvm": ProxmoxKvmChaotic,
21 |         "vultr": VultrChaotic,
22 |     }
23 | 
24 |     def get_instance(self, name: str) -> Chaotic:
25 |         log.info(f"Instantiate {name}")
26 |         try:
27 |             return self.CLOUD_CLASSES[name]()
28 |         except KeyError as e:
29 |             raise NotImplementedError(f"{e} not implemented")
30 | 


--------------------------------------------------------------------------------
/chaotic/app.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import sys
 4 | import time
 5 | from argparse import ArgumentParser
 6 | 
 7 | import requests
 8 | import schedule
 9 | import yaml
10 | from requests.models import Response
11 | 
12 | from chaotic import ChaoticFactory
13 | from chaotic.cloud import Chaotic
14 | from chaotic.log import log
15 | from chaotic.version import __version__
16 | 
17 | 
18 | def app() -> None:
19 |     print("")
20 |     try:
21 |         config: dict = dict()
22 |         config_source: str = os.getenv('CHAOTIC_CONFIG', 'config.yaml')
23 | 
24 |         if config_source.startswith("http"):
25 |             res: Response = requests.get(
26 |                 url=config_source,
27 |             )
28 |             res.raise_for_status()
29 |             config = res.json()
30 | 
31 |         elif config_source.endswith(('.yaml', '.yml')):
32 |             with open(config_source, "r") as infile:
33 |                 config = yaml.load(infile, Loader=yaml.FullLoader)
34 |                 infile.close()
35 | 
36 |         elif config_source.endswith('json'):
37 |             with open(config_source, "r") as infile:
38 |                 config = json.load(infile)
39 |                 infile.close()
40 | 
41 |         if not config:
42 |             raise Exception("Empty config file")
43 | 
44 |         if 'kind' not in config:
45 |             raise Exception("No kind defined")
46 | 
47 |         chaos_factory: ChaoticFactory = ChaoticFactory()
48 |         chaos = chaos_factory.get_instance(config['kind'])
49 |         chaos.configure(
50 |             configs=config.get('configs') or dict(),
51 |             dry_run=config.get('dry_run') or False,
52 |             excludes=config.get('excludes') or dict(),
53 |         )
54 |         chaos.action()
55 |     except Exception as ex:
56 |         log.error(ex)
57 |         sys.exit(1)
58 | 
59 | def run_periodic(interval: int = 1) -> None:
60 |     log.info(f"Running periodic in intervals of {interval} minute")
61 |     schedule.every(interval).minutes.do(app)
62 |     time.sleep(1)
63 |     schedule.run_all()
64 |     while True:
65 |         schedule.run_pending()
66 |         sys.stdout.write(".")
67 |         sys.stdout.flush()
68 |         time.sleep(1)
69 | 
70 | def main() -> None:
71 |     parser: ArgumentParser = ArgumentParser()
72 |     parser.add_argument("--periodic", help="run periodic", action="store_true")
73 |     parser.add_argument("--interval", help="set interval in minutes", type=int, default=1)
74 |     parser.add_argument("--version", help="show version", action="store_true")
75 |     args = parser.parse_args()
76 | 
77 |     if args.version:
78 |         print(f"version {__version__}")
79 |         sys.exit(0)
80 | 
81 |     log.info(f"Starting version {__version__}")
82 | 
83 |     if args.periodic:
84 |         try:
85 |             run_periodic(args.interval)
86 |         except KeyboardInterrupt:
87 |             print("")
88 |             log.info(f"Stopping...")
89 |             schedule.clear()
90 |             log.info(f"done")
91 |             pass
92 |     else:
93 |         app()
94 | 
95 | if __name__ == "__main__":
96 |     main()
97 | 


--------------------------------------------------------------------------------
/chaotic/cloud/__init__.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from datetime import datetime
 3 | from chaotic.log import log
 4 | 
 5 | class Chaotic(ABC):
 6 | 
 7 |     def configure(self, configs: dict, dry_run: bool, excludes: dict) -> None:
 8 |         self.configs = configs
 9 |         self.dry_run = dry_run
10 |         self.excludes = excludes
11 |         if self.dry_run:
12 |             log.info(f"Running in dry-run")
13 |         self._handle_excludes()
14 | 
15 |     def _handle_excludes(self) -> None:
16 |         if 'days_of_year' in self.excludes:
17 |             today = datetime.today().strftime('%b%d')
18 |             if today in self.excludes['days_of_year']:
19 |                 log.info(f"Today '{today}' in days_of_year excludes, running dry-run")
20 |                 self.dry_run = True
21 | 
22 |         if 'weekdays' in self.excludes:
23 |             today = datetime.today().strftime('%a')
24 |             if today in self.excludes['weekdays']:
25 |                 log.info(f"Today '{today}' in weekday excludes, running dry-run")
26 |                 self.dry_run = True
27 | 
28 |         if 'times_of_day' in self.excludes:
29 |             now = datetime.now().time()
30 |             for time_range in self.excludes['times_of_day']:
31 |                 start, end = time_range.split('-')
32 |                 start_time = datetime.strptime(start, "%H:%M").time()
33 |                 end_time = datetime.strptime(end, "%H:%M").time()
34 |                 if start_time > end_time:
35 |                     end_of_day = datetime.strptime("23:59", "%H:%M").time()
36 |                     if start_time <= now <= end_of_day:
37 |                         log.info(f"Exclude {start_time}-{end_time}")
38 |                         log.info(f"{now} in time of day excludes, running dry-run")
39 |                         self.dry_run = True
40 | 
41 |                     start_of_day = datetime.strptime("00:01", "%H:%M").time()
42 |                     if start_of_day <= now <= end_time:
43 |                         log.info(f"Exclude {start_time}-{end_time}")
44 |                         log.info(f"{now} in time of day excludes, running dry-run")
45 |                         self.dry_run = True
46 |                 else:
47 |                     if start_time <= now <= end_time:
48 |                         log.info(f"Exclude {start_time}-{end_time}")
49 |                         log.info(f"{now} in time of day excludes, running dry-run")
50 |                         self.dry_run = True
51 | 
52 |     @abstractmethod
53 |     def action(self) -> None:
54 |         pass
55 | 


--------------------------------------------------------------------------------
/chaotic/cloud/cloudscale_ch.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import time
 4 | 
 5 | from cloudscale import Cloudscale, CloudscaleApiException
 6 | 
 7 | from chaotic.cloud import Chaotic
 8 | from chaotic.log import log
 9 | 
10 | CLOUDSCALE_API_TOKEN: str = os.getenv('CLOUDSCALE_API_TOKEN', "")
11 | 
12 | class CloudscaleChChaotic(Chaotic):
13 | 
14 |     def __init__(self) -> None:
15 |         super().__init__()
16 |         self.cloudscale = Cloudscale(api_token=CLOUDSCALE_API_TOKEN)
17 | 
18 | 
19 |     def action(self) -> None:
20 |         filter_tag = self.configs.get('filter_tag')
21 |         log.info(f"Querying with filter_tag: {filter_tag}")
22 |         servers = self.cloudscale.server.get_all(filter_tag=filter_tag)
23 |         if servers:
24 |             server = random.choice(servers)
25 |             log.info(f"Choose server {server['name']}")
26 |             if not self.dry_run:
27 |                 log.info(f"Stopping server {server['name']}")
28 |                 self.cloudscale.server.stop(uuid=server['uuid'])
29 | 
30 |                 wait_before_restart = int(self.configs.get('wait_before_restart', 60))
31 |                 log.info(f"Sleeping for {wait_before_restart} seconds")
32 |                 time.sleep(wait_before_restart)
33 | 
34 |                 log.info(f"Starting server {server['name']}")
35 |                 self.cloudscale.server.start(uuid=server['uuid'])
36 |         else:
37 |             log.info("No servers found")
38 | 
39 |         log.info(f"done")
40 | 


--------------------------------------------------------------------------------
/chaotic/cloud/cloudstack.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import time
 4 | 
 5 | from cs import CloudStack
 6 | 
 7 | from chaotic.cloud import Chaotic
 8 | from chaotic.log import log
 9 | 
10 | CLOUDSTACK_API_ENDPOINT: str = os.getenv('CLOUDSTACK_API_ENDPOINT', "")
11 | CLOUDSTACK_API_KEY: str = os.getenv('CLOUDSTACK_API_KEY', "")
12 | CLOUDSTACK_API_SECRET: str = os.getenv('CLOUDSTACK_API_SECRET', "")
13 | 
14 | 
15 | class CloudStackChaotic(Chaotic):
16 | 
17 |     def __init__(self) -> None:
18 |         self.cs = CloudStack(
19 |             endpoint=CLOUDSTACK_API_ENDPOINT,
20 |             key=CLOUDSTACK_API_KEY,
21 |             secret=CLOUDSTACK_API_SECRET,
22 |         )
23 | 
24 |     def action(self) -> None:
25 |         tag = self.configs.get("tag")
26 |         if not tag:
27 |             return
28 | 
29 |         log.info(f"Querying with tag: {tag['key']}={tag['value']}")
30 | 
31 |         instances = self.cs.listVirtualMachines(
32 |             tags=[tag],
33 |             projectid=self.configs.get('projectid'),
34 |             zoneid=self.configs.get('zoneid'),
35 |             fetch_list=True,
36 |         )
37 |         if instances:
38 |             instance = random.choice(instances)
39 |             log.info(f"Choose server {instance['name']}")
40 |             if not self.dry_run:
41 |                 log.info(f"Stopping server {instance['name']}")
42 |                 self.cs.stopVirtualMachine(id=instance['id'])
43 |                 wait_before_restart = int(self.configs.get('wait_before_restart', 60))
44 |                 log.info(f"Sleeping for {wait_before_restart} seconds")
45 |                 time.sleep(wait_before_restart)
46 | 
47 |                 log.info(f"Starting server {instance['name']}")
48 |                 self.cs.startVirtualMachine(id=instance['id'])
49 |         else:
50 |             log.info("No servers found")
51 | 
52 |         log.info(f"done")
53 | 


--------------------------------------------------------------------------------
/chaotic/cloud/digitalocean.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import time
 3 | 
 4 | import digitalocean
 5 | 
 6 | from chaotic.cloud import Chaotic
 7 | from chaotic.log import log
 8 | 
 9 | 
10 | class DigitaloceanChaotic(Chaotic):
11 | 
12 |     def __init__(self) -> None:
13 |         super().__init__()
14 |         self.do = digitalocean.Manager()
15 | 
16 |     def action(self) -> None:
17 |         tag = self.configs.get('tag')
18 |         log.info(f"Querying with tag: {tag}")
19 |         droplets = self.do.get_all_droplets(tag_name=tag)
20 | 
21 |         if droplets:
22 |             droplet = random.choice(droplets)
23 |             log.info(f"Choose server {droplet.name}")
24 |             if not self.dry_run:
25 |                 log.info(f"Stopping server {droplet.name}")
26 |                 droplet.shutdown()
27 | 
28 |                 wait_before_restart = int(self.configs.get('wait_before_restart', 60))
29 |                 log.info(f"Sleeping for {wait_before_restart} seconds")
30 |                 time.sleep(wait_before_restart)
31 | 
32 |                 log.info(f"Starting server {droplet.name}")
33 |                 droplet.power_on()
34 | 
35 |         else:
36 |             log.info("No servers found")
37 | 
38 |         log.info(f"done")
39 | 


--------------------------------------------------------------------------------
/chaotic/cloud/hcloud.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import time
 4 | 
 5 | from hcloud import Client
 6 | 
 7 | from chaotic.cloud import Chaotic
 8 | from chaotic.log import log
 9 | 
10 | HCLOUD_API_TOKEN: str = os.getenv("HCLOUD_API_TOKEN", "")
11 | 
12 | class HcloudChaotic(Chaotic):
13 | 
14 |     def __init__(self) -> None:
15 |         super().__init__()
16 |         self.hcloud = Client(token=HCLOUD_API_TOKEN)
17 | 
18 |     def action(self) -> None:
19 |         label = self.configs.get('label')
20 |         log.info(f"Querying with label: {label}")
21 |         servers = self.hcloud.servers.get_all(label_selector=label)
22 | 
23 |         if servers:
24 |             server = random.choice(servers)
25 |             log.info(f"Choose server {server.name}")
26 |             if not self.dry_run:
27 |                 log.info(f"Stopping server {server.name}")
28 |                 self.hcloud.servers.power_off(server)
29 | 
30 |                 wait_before_restart = int(self.configs.get('wait_before_restart', 60))
31 |                 log.info(f"Sleeping for {wait_before_restart} seconds")
32 |                 time.sleep(wait_before_restart)
33 | 
34 |                 log.info(f"Starting server {server.name}")
35 |                 self.hcloud.servers.power_on(server)
36 |         else:
37 |             log.info("No servers found")
38 | 
39 |         log.info(f"done")
40 | 


--------------------------------------------------------------------------------
/chaotic/cloud/nomad.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import time
  4 | from typing import List, Optional
  5 | 
  6 | import requests
  7 | 
  8 | from chaotic.cloud import Chaotic
  9 | from chaotic.log import log
 10 | 
 11 | NOMAD_ADDR: str = os.getenv("NOMAD_ADDR", "")
 12 | NOMAD_TOKEN: str = os.getenv("NOMAD_TOKEN", "")
 13 | NOMAD_HTTP_AUTH: str = os.getenv("NOMAD_HTTP_AUTH", "")
 14 | 
 15 | 
 16 | class Nomad:
 17 |     def __init__(self, api_key: str, api_url: Optional[str] = None, api_auth: Optional[str] = None) -> None:
 18 |         self.api_key = api_key
 19 |         self.api_url = api_url or "http://127.0.0.1:4646"
 20 |         self.api_auth = tuple(api_auth.split(":")) if api_auth else None
 21 | 
 22 |     def query_api(self, method: str, path: str, params: Optional[dict] = None, json: Optional[dict] = None) -> requests.Response:
 23 |         r = requests.request(
 24 |             method=method,
 25 |             url=f"{self.api_url}/v1/{path}",
 26 |             headers={
 27 |                 "X-Nomad-Token": self.api_key,
 28 |                 "Content-Type": "application/json",
 29 |             },
 30 |             auth=self.api_auth,
 31 |             params=params,
 32 |             json=json,
 33 |             timeout=10,
 34 |         )
 35 |         r.raise_for_status()
 36 |         return r
 37 | 
 38 |     def list_nodes(self) -> List[dict]:
 39 |         r = self.query_api("get", "nodes")
 40 |         nodes = [node for node in r.json() if not node["Drain"] and node["SchedulingEligibility"] == "eligible"]
 41 |         return nodes
 42 | 
 43 |     def drain_node(self, node_id: str, deadline_seconds: int = 10, ignore_system_jobs: bool = True) -> None:
 44 |         json = {
 45 |             "DrainSpec": {
 46 |                 "Deadline": deadline_seconds * 60 * 10**8,
 47 |                 "IgnoreSystemJobs": ignore_system_jobs,
 48 |             },
 49 |             "Meta": {
 50 |                 "message": "drained by chaotic",
 51 |             },
 52 |         }
 53 |         self.query_api("post", f"node/{node_id}/drain", json=json)
 54 | 
 55 |     def set_node_eligibility(self, node_id: str, eligible: bool = True) -> None:
 56 |         json = {
 57 |             "Eligibility": "eligible" if eligible else "ineligible",
 58 |         }
 59 |         self.query_api("post", f"node/{node_id}/eligibility", json=json)
 60 | 
 61 |     def list_allocs(self, namespace: Optional[str] = None) -> List[dict]:
 62 |         params = {
 63 |             "namespace": namespace,
 64 |         }
 65 |         r = self.query_api("get", "allocations", params=params)
 66 |         return r.json()
 67 | 
 68 |     def read_alloc(self, alloc_id: str) -> dict:
 69 |         r = self.query_api("get", f"allocation/{alloc_id}")
 70 |         return r.json()
 71 | 
 72 |     def signal_alloc(self, alloc_id: str, signal: str) -> None:
 73 |         json = {
 74 |             "Signal": signal,
 75 |         }
 76 |         self.query_api("post", f"client/allocation/{alloc_id}/signal", json=json)
 77 | 
 78 |     def list_namespaces(self, prefix: Optional[str] = None) -> List[dict]:
 79 |         params = {
 80 |             "prefix": prefix,
 81 |         }
 82 |         r = self.query_api("get", "namespaces", params=params)
 83 |         return r.json()
 84 | 
 85 | 
 86 | class NomadChaotic(Chaotic):
 87 |     def __init__(self) -> None:
 88 |         super().__init__()
 89 |         self.nomad = Nomad(
 90 |             api_key=NOMAD_TOKEN,
 91 |             api_url=NOMAD_ADDR,
 92 |             api_auth=NOMAD_HTTP_AUTH,
 93 |         )
 94 | 
 95 |     def get_namespace(self) -> str:
 96 |         namespaces = [ns["Name"] for ns in self.nomad.list_namespaces()]
 97 | 
 98 |         allowed_ns = self.configs.get("namespace_allowlist")
 99 |         if allowed_ns is not None:
100 |             namespaces = [ns for ns in namespaces if ns in allowed_ns]
101 | 
102 |         denied_ns = self.configs.get("namespace_denylist")
103 |         if denied_ns is not None:
104 |             namespaces = [ns for ns in namespaces if ns not in denied_ns]
105 | 
106 |         if not namespaces:
107 |             log.info(f"No namespaces eligible")
108 |             return ""
109 | 
110 |         namespace = random.choice(namespaces)
111 | 
112 |         log.info(f"Selected namespace: {namespace}")
113 |         return namespace
114 | 
115 |     def is_opt_out(self, alloc_id: str) -> bool:
116 |         opt_in_key = self.configs.get("job_meta_opt_key")
117 |         if opt_in_key:
118 |             alloc_details = self.nomad.read_alloc(alloc_id=alloc_id)
119 |             job_meta = alloc_details["Job"]["Meta"]
120 |             if job_meta:
121 |                 opt_in = job_meta.get(opt_in_key)
122 |                 return opt_in is not None and (not opt_in or opt_in == "false")
123 |         return False
124 | 
125 |     def action(self) -> None:
126 |         experiments = self.configs.get("experiments", ["job"])
127 |         exp = random.choice(experiments)
128 |         log.info(f"Running experiment {exp}")
129 |         method_name = f"action_{exp}"
130 |         func = getattr(self, method_name)
131 |         if func:
132 |             func()
133 | 
134 |     def action_job(self) -> None:
135 |         namespace = self.get_namespace()
136 |         if namespace:
137 |             allocs = [alloc for alloc in self.nomad.list_allocs(namespace=namespace) if alloc["ClientStatus"] == "running"]
138 | 
139 |             job_type_skiplist = self.configs.get("job_type_skiplist")
140 |             if job_type_skiplist:
141 |                 allocs = [alloc for alloc in allocs if alloc["JobType"] not in job_type_skiplist]
142 | 
143 |             job_skiplist = self.configs.get("job_skiplist")
144 |             if job_skiplist:
145 |                 allocs = [alloc for alloc in allocs if alloc["JobID"] not in job_skiplist]
146 | 
147 |             if allocs:
148 |                 alloc = random.choice(allocs)
149 |                 log.info(f"Selected alloc: {alloc['Name']} (ID: {alloc['ID']}) on {alloc['NodeName']}")
150 |                 if not self.is_opt_out(alloc_id=alloc["ID"]):
151 |                     signal = random.choice(self.configs["signals"])
152 |                     log.info(f"Selected signal: {signal}")
153 |                     if not self.dry_run:
154 |                         self.nomad.signal_alloc(alloc_id=alloc["ID"], signal=signal)
155 |                 else:
156 |                     log.info("Job is opt-out configured, skipping")
157 | 
158 |             else:
159 |                 log.info("No allocs found")
160 | 
161 |         log.info(f"done")
162 | 
163 |     def action_node(self) -> None:
164 |         nodes = self.nomad.list_nodes()
165 | 
166 |         node_skiplist = self.configs.get("node_skiplist")
167 |         if node_skiplist:
168 |             nodes = [node for node in nodes if node["Name"] not in node_skiplist]
169 | 
170 |         node_class_skiplist = self.configs.get("node_class_skiplist")
171 |         if node_class_skiplist:
172 |             nodes = [node for node in nodes if node["NodeClass"] not in node_class_skiplist]
173 | 
174 |         if nodes:
175 |             # How many nodes to drain in this run
176 |             node_drain_amount_in_percent = int(self.configs.get("node_drain_amount_in_percent", 0))
177 |             amount_of_nodes = 1
178 |             if node_drain_amount_in_percent and node_drain_amount_in_percent > 0:
179 |                 amount_of_nodes = round(len(nodes) * node_drain_amount_in_percent / 100) or 1
180 | 
181 |             nodes_drain = nodes.copy()
182 |             nodes_eligible = list()
183 |             for i in range(amount_of_nodes):
184 |                 node = nodes_drain.pop(random.randrange(len(nodes_drain)))
185 |                 nodes_eligible.append(node)
186 | 
187 |                 log.info(f"Drain node: {node['Name']}")
188 | 
189 |                 if not self.dry_run:
190 |                     deadline_seconds = int(self.configs.get("node_drain_deadline_seconds", 10))
191 |                     ignore_system_jobs = not bool(self.configs.get("node_drain_system_jobs", False))
192 |                     self.nomad.drain_node(
193 |                         node_id=node["ID"],
194 |                         deadline_seconds=deadline_seconds,
195 |                         ignore_system_jobs=ignore_system_jobs,
196 |                     )
197 | 
198 |             node_wait_for = int(self.configs.get("node_wait_for", 60))
199 |             log.info(f"Sleeping for {node_wait_for} seconds")
200 |             if not self.dry_run:
201 |                 time.sleep(node_wait_for)
202 | 
203 |             for i in range(amount_of_nodes):
204 |                 node = nodes_eligible.pop(random.randrange(len(nodes_eligible)))
205 |                 log.info(f"Set node to be eligible: {node['Name']}")
206 |                 if not self.dry_run:
207 |                     self.nomad.set_node_eligibility(
208 |                         node_id=node["ID"],
209 |                         eligible=True,
210 |                     )
211 | 
212 |         log.info(f"done")
213 | 


--------------------------------------------------------------------------------
/chaotic/cloud/proxmox_kvm.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import time
 4 | 
 5 | from proxmoxer import ProxmoxAPI
 6 | 
 7 | from chaotic.cloud import Chaotic
 8 | from chaotic.log import log
 9 | 
10 | PROXMOX_API_HOST: str = os.getenv("PROXMOX_API_HOST", "")
11 | PROXMOX_API_USER: str = os.getenv("PROXMOX_API_USER", 'root@pam')
12 | PROXMOX_API_PASSWORD: str = os.getenv("ROXMOX_API_PASSWORD", "")
13 | PROXMOX_API_VERIFY_SSL: bool = bool(os.getenv('PROXMOX_API_VERIFY_SSL', False))
14 | 
15 | 
16 | class ProxmoxKvmChaotic(Chaotic):
17 | 
18 |     def __init__(self) -> None:
19 |         super().__init__()
20 |         log.info(f"Proxmox host: {PROXMOX_API_HOST}")
21 |         log.info(f"Proxmox user: {PROXMOX_API_USER}")
22 | 
23 |         self.pve = ProxmoxAPI(
24 |             host=PROXMOX_API_HOST,
25 |             user=PROXMOX_API_USER,
26 |             password=PROXMOX_API_PASSWORD,
27 |             verify_ssl=PROXMOX_API_VERIFY_SSL
28 |         )
29 | 
30 |     def action(self) -> None:
31 |         vms = self.pve.cluster.resources.get(type='vm')
32 | 
33 |         denylist = self.configs.get('denylist') or []
34 |         vms = [vm for vm in vms if vm['status'] == "running" and vm['name'] not in denylist]
35 | 
36 |         if vms:
37 |             vm = random.choice(vms)
38 |             log.info(f"Choose VM ID={vm['vmid']}, name={vm['name']} on node={vm['node']}")
39 | 
40 |             min_uptime = self.configs.get('min_uptime')
41 |             if min_uptime is not None:
42 |                 current = self.pve.nodes(vm['node']).qemu(vm['vmid']).status.current.get()
43 |                 required_uptime = min_uptime * 60
44 |                 if current['uptime'] < required_uptime:
45 |                     log.info(f"VM {vm['name']} required uptime lower then {min_uptime} min: {current['uptime'] / 60:.2f}, skipping")
46 |                     log.info(f"done")
47 |                     return
48 | 
49 |             if not self.dry_run:
50 |                 log.info(f"Stopping VM {vm['name']}")
51 |                 self.pve.nodes(vm['node']).qemu(vm['vmid']).status.shutdown.post(forceStop=1)
52 | 
53 |                 wait_before_restart = int(self.configs.get('wait_before_restart', 60))
54 |                 log.info(f"Sleeping for {wait_before_restart} seconds")
55 |                 time.sleep(wait_before_restart)
56 | 
57 |                 log.info(f"Starting VM {vm['name']}")
58 |                 self.pve.nodes(vm['node']).qemu(vm['vmid']).status.start.post()
59 | 
60 |         else:
61 |             log.info("No VMs found")
62 | 
63 |         log.info(f"done")
64 | 


--------------------------------------------------------------------------------
/chaotic/cloud/vultr.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import time
 4 | from typing import List, Optional
 5 | 
 6 | import requests
 7 | 
 8 | from chaotic.cloud import Chaotic
 9 | from chaotic.log import log
10 | 
11 | VULTR_API_KEY: str = os.getenv('VULTR_API_KEY', "")
12 | 
13 | 
14 | class Vultr:
15 | 
16 |     VULTR_API_URL: str = "https://api.vultr.com/v2"
17 | 
18 |     def __init__(self, api_key: str) -> None:
19 |         self.api_key = api_key
20 | 
21 |     def query_api(self, method: str, path: str, params: Optional[dict] = None, json: Optional[dict] = None) -> requests.Response:
22 |         r = requests.request(
23 |             method=method,
24 |             url=f"{self.VULTR_API_URL}/{path}",
25 |             headers={
26 |                 'Authorization': f"Bearer {self.api_key}",
27 |                 'Content-Type': "application/json",
28 |             },
29 |             params=params,
30 |             json=json,
31 |             timeout=10,
32 |         )
33 |         r.raise_for_status()
34 |         return r
35 | 
36 |     def list_instances(self, tag=None, label=None) -> List[dict]:
37 |         params = {
38 |             'tag': tag,
39 |             'label': label,
40 |         }
41 |         r = self.query_api('get', 'instances', params=params)
42 |         return r.json().get('instances', dict())
43 | 
44 |     def halt_instances(self, instance_ids: List[str]) -> None:
45 |         json = {
46 |             'instance_ids': instance_ids,
47 |         }
48 |         self.query_api('post', f'instances/halt', json=json)
49 | 
50 |     def halt_instance(self, instance_id: str) -> None:
51 |         self.halt_instances(instance_ids=[instance_id])
52 | 
53 |     def start_instance(self, instance_id: str) -> None:
54 |         self.query_api('post', f'instances/{instance_id}/start')
55 | 
56 | 
57 | class VultrChaotic(Chaotic):
58 | 
59 |     def __init__(self) -> None:
60 |         super().__init__()
61 |         self.vultr = Vultr(api_key=VULTR_API_KEY)
62 | 
63 |     def action(self) -> None:
64 |         tag = self.configs.get('tag')
65 |         log.info(f"Querying with tag: {tag}")
66 |         instances = self.vultr.list_instances(tag=tag)
67 | 
68 |         if instances:
69 |             instance = random.choice(instances)
70 |             log.info(f"Choose server {instance['label']}")
71 |             if not self.dry_run:
72 |                 log.info(f"Stopping server {instance['label']}")
73 |                 self.vultr.halt_instance(instance['id'])
74 | 
75 |                 wait_before_restart = int(self.configs.get('wait_before_restart', 60))
76 |                 log.info(f"Sleeping for {wait_before_restart} seconds")
77 |                 time.sleep(wait_before_restart)
78 | 
79 |                 log.info(f"Starting server {instance['label']}")
80 |                 self.vultr.start_instance(instance['id'])
81 |         else:
82 |             log.info("No servers found")
83 | 
84 |         log.info(f"done")
85 | 


--------------------------------------------------------------------------------
/chaotic/log.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import logging
 4 | from logging.config import fileConfig
 5 | from dotenv import load_dotenv
 6 | from pathlib import Path
 7 | 
 8 | env_path = Path('.') / '.env'
 9 | load_dotenv(dotenv_path=env_path)
10 | 
11 | logging_config_file_path = os.environ.get('CHAOTIC_LOG_CONFIG', 'logging.ini')
12 | 
13 | logging_config = Path(logging_config_file_path)
14 | if logging_config.is_file():
15 |     fileConfig(logging_config_file_path)
16 | else:
17 |     logging.basicConfig(
18 |         stream=sys.stdout,
19 |         level=os.environ.get('CHAOTIC_LOG_LEVEL', 'INFO').upper(),
20 |         format='%(asctime)s - %(name)s:%(levelname)s:%(message)s')
21 | 
22 | log = logging.getLogger('chaotic')
23 | log.debug('Init')
24 | 


--------------------------------------------------------------------------------
/chaotic/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.14.1"
2 | 


--------------------------------------------------------------------------------
/docker/config.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | kind: unset
3 | dry_run: true
4 | configs: {}
5 | 


--------------------------------------------------------------------------------
/examples/config_cloudscale_ch.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | kind: cloudscale_ch
3 | dry_run: false
4 | configs:
5 |   filter_tag: "chaos=enabled"
6 | 


--------------------------------------------------------------------------------
/examples/config_cloudstack.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: cloudstack
 3 | dry_run: true
 4 | configs:
 5 |   tag:
 6 |     key: chaos
 7 |     value: enabled
 8 |   # zoneid: xyz
 9 |   # projectid:
10 | 


--------------------------------------------------------------------------------
/examples/config_digitalocean.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | kind: digitalocean
3 | dry_run: false
4 | configs:
5 |   tag: "chaos:enabled"
6 | 


--------------------------------------------------------------------------------
/examples/config_hcloud.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | kind: hcloud
3 | dry_run: false
4 | configs:
5 |   label: "chaos=enabled"
6 | 


--------------------------------------------------------------------------------
/examples/config_nomad.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: nomad
 3 | dry_run: true
 4 | excludes:
 5 |   weekdays:
 6 |     - Sun
 7 |     - Sat
 8 |   times_of_day:
 9 |     - 22:00-08:00
10 |     - 11:00-14:00
11 |   days_of_year:
12 |     - Jan01
13 |     - Apr01
14 |     - May01
15 |     - Aug01
16 |     - Dec24
17 | configs:
18 |   experiments:
19 |     - job
20 |     - node
21 | 
22 |   ## Job experiments configs
23 |   namespace_denylist:
24 |     - default
25 |   signals:
26 |     - SIGKILL
27 |   job_type_skiplist:
28 |     - system
29 |     - batch
30 |     - sysbatch
31 |   job_skiplist:
32 |     - prometheus
33 |   # Add a meta tag on in your nomad job "chaotic" = False to opt-out
34 |   job_meta_opt_key: chaotic
35 | 
36 |   ## Node experiments configs
37 |   node_drain_deadline_seconds: 15
38 |   node_class_skiplist:
39 |     - storage
40 |   node_skiplist:
41 |     - node1
42 |     - node2
43 |   node_wait_for: 100
44 |   node_drain_system_jobs: true
45 |   node_drain_amount_in_percent: 66
46 | 


--------------------------------------------------------------------------------
/examples/config_proxmox_kvm.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | kind: proxmox_kvm
 3 | dry_run: false
 4 | configs:
 5 | 
 6 |   # Optional: Do not shutdown VMs having a lower uptime in minutes
 7 |   min_uptime: 60
 8 | 
 9 |   # Optional: Do not shutdown VMs in this name list
10 |   denylist:
11 |     - my-single-vm
12 | 


--------------------------------------------------------------------------------
/examples/config_vultr.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | kind: vultr
3 | dry_run: true
4 | configs:
5 |   tag: "chaos=enabled"
6 | 


--------------------------------------------------------------------------------
/logging.ini:
--------------------------------------------------------------------------------
 1 | [loggers]
 2 | keys=root
 3 | 
 4 | [handlers]
 5 | keys=stream_handler
 6 | 
 7 | [formatters]
 8 | keys=json
 9 | 
10 | [logger_root]
11 | level=DEBUG
12 | handlers=stream_handler
13 | 
14 | [handler_stream_handler]
15 | class=StreamHandler
16 | level=DEBUG
17 | formatter=json
18 | args=(sys.stderr,)
19 | 
20 | [formatter_json]
21 | format = %(asctime)s %(name)-12s %(levelname)-8s %(message)s
22 | class = pythonjsonlogger.jsonlogger.JsonFormatter
23 | 


--------------------------------------------------------------------------------
/requirements.dev.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ngine-io/chaotic/4242caa7c7444af04b6be83679f14750b19f28bb/requirements.dev.txt


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
 1 | cloudscale-sdk
 2 | hcloud
 3 | proxmoxer<3.0
 4 | python-digitalocean
 5 | python-dotenv
 6 | python-json-logger
 7 | pyyaml
 8 | requests
 9 | schedule
10 | cs
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi==2024.8.30
 2 | charset-normalizer==3.4.0
 3 | cloudscale-sdk==0.7.0
 4 | cs==3.3.1
 5 | hcloud==2.5.1
 6 | idna==3.10
 7 | jsonpickle==3.3.0
 8 | proxmoxer==2.2.0
 9 | python-dateutil==2.9.0.post0
10 | python-digitalocean==1.17.0
11 | python-dotenv==1.1.0
12 | python-json-logger==2.0.7
13 | pytz==2024.2
14 | pyyaml==6.0.2
15 | requests==2.32.3
16 | schedule==1.2.2
17 | six==1.16.0
18 | urllib3==2.2.3
19 | xdg==6.0.0
20 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from setuptools import find_packages, setup
 5 | 
 6 | with open("README.md", "r") as fh:
 7 |     long_description = fh.read()
 8 | 
 9 | install_requires = []
10 | with open("requirements.txt", "r", encoding="utf-8") as f:
11 |     install_requires = list(i.rstrip() for i in f.readlines())
12 | 
13 | tests_require = []
14 | with open("requirements.dev.txt", "r", encoding="utf-8") as f:
15 |     tests_require = list(i.rstrip() for i in f.readlines())
16 | 
17 | version = {}
18 | with open("chaotic/version.py") as fp:
19 |     exec(fp.read(), version)
20 | 
21 | setup(
22 |     name="chaotic-ngine",
23 |     version=version['__version__'],
24 |     author="René Moser",
25 |     author_email="mail@renemoser.net",
26 |     license="MIT",
27 |     description="Chaos for Clouds.",
28 |     long_description=long_description,
29 |     long_description_content_type="text/markdown",
30 |     url="https://github.com/ngine-io/chaotic",
31 |     packages=find_packages(exclude=["test.*", "tests"]),
32 |     classifiers=[
33 |         "Intended Audience :: Developers",
34 |         "Intended Audience :: System Administrators",
35 |         "Development Status :: 4 - Beta",
36 |         "Programming Language :: Python :: 3",
37 |         "License :: OSI Approved :: MIT License",
38 |         "Operating System :: OS Independent",
39 |         "Environment :: Web Environment",
40 |     ],
41 |     install_requires=install_requires,
42 |     tests_require=tests_require,
43 |     python_requires='>=3.6',
44 |     entry_points={
45 |         'console_scripts': [
46 |             'chaotic-ngine = chaotic.app:main',
47 |         ],
48 |     },
49 | )
50 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py{310,311}
 3 | skip_missing_interpreters = True
 4 | skipsdist = True
 5 | 
 6 | [gh-actions]
 7 | python =
 8 |     3.10: py310
 9 |     3.11: py311
10 | 
11 | [testenv]
12 | changedir = tests
13 | deps =
14 |     -r{toxinidir}/requirements.txt
15 |     -r{toxinidir}/requirements.dev.txt
16 | commands =
17 |     python --version
18 |     pytest -v --cov --cov-append --cov-report=xml
19 | 
20 | [testenv:report]
21 | deps = coverage
22 | skip_install = true
23 | commands =
24 |     coverage report
25 |     coverage html
26 | 
27 | [testenv:clean]
28 | deps = coverage
29 | skip_install = true
30 | commands = coverage erase
31 | 


--------------------------------------------------------------------------------