├── .github ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── docker-build-push.yml │ └── go.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── examples ├── configmap.yaml ├── debug-mode-configmap.yaml ├── deployment.yaml └── notifications-configmap.yaml ├── go.mod ├── go.sum ├── helm └── kubemonkey │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── templates │ ├── NOTES.txt │ ├── _helpers.tpl │ ├── configmap.yaml │ ├── deployment.yaml │ └── rbac.yaml │ └── values.yaml ├── internal └── pkg │ ├── calendar │ ├── calendar.go │ └── calendar_test.go │ ├── chaos │ ├── chaos.go │ ├── chaos_test.go │ ├── chaosmock.go │ └── chaosresult.go │ ├── config │ ├── config.go │ ├── config_test.go │ ├── param │ │ └── param.go │ ├── validations.go │ └── validations_test.go │ ├── kubemonkey │ └── kubemonkey.go │ ├── kubernetes │ └── kubernetes.go │ ├── notifications │ ├── client.go │ ├── client_test.go │ ├── notifications.go │ ├── util.go │ └── util_test.go │ ├── schedule │ ├── schedule.go │ └── schedule_test.go │ └── victims │ ├── factory │ ├── daemonsets │ │ ├── daemonsets.go │ │ ├── daemonsets_test.go │ │ ├── eligible_daemonsets.go │ │ └── eligible_daemonsets_test.go │ ├── deployments │ │ ├── deployments.go │ │ ├── deployments_test.go │ │ ├── eligible_deployments.go │ │ └── eligible_deployments_test.go │ ├── factory.go │ └── statefulsets │ │ ├── eligible_statefulsets.go │ │ ├── eligible_statefulsets_test.go │ │ ├── statefulset_test.go │ │ └── statefulsets.go │ ├── victims.go │ └── victims_test.go └── main.go /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 12 | 13 | ### :pencil: Description 14 | 15 | 16 | ### :link: Related Issues 17 | -------------------------------------------------------------------------------- /.github/workflows/docker-build-push.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docker image 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | jobs: 9 | publish-docker: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Docker meta 15 | id: meta 16 | uses: docker/metadata-action@v4 17 | with: 18 | # list of Docker images to use as base name for tags 19 | images: | 20 | ayushsobti/kube-monkey 21 | # Generate Docker tags based on the following events/attributes 22 | tags: | 23 | type=ref,event=tag 24 | 25 | - name: Set up QEMU 26 | uses: docker/setup-qemu-action@v1 27 | 28 | - name: Set up Docker Buildx 29 | uses: docker/setup-buildx-action@v2 30 | 31 | - name: Login to DockerHub 32 | uses: docker/login-action@v2 33 | with: 34 | username: ${{ secrets.DOCKER_HUB_USERNAME }} 35 | password: ${{ secrets.DOCKER_HUB_PASSWORD }} 36 | 37 | - name: Build and push 38 | uses: docker/build-push-action@v3 39 | with: 40 | platforms: linux/amd64,linux/arm64,linux/arm/v7 41 | push: true 42 | tags: ${{ steps.meta.outputs.tags }} 43 | labels: ${{ steps.meta.outputs.labels }} 44 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - master 7 | push: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | 13 | build: 14 | name: Build and Unit Tests 15 | runs-on: ubuntu-latest 16 | steps: 17 | - name: Set up Go 18 | uses: actions/setup-go@v3 19 | with: 20 | go-version: 1.21 21 | 22 | - name: Check out code 23 | uses: actions/checkout@v3.0.2 24 | with: 25 | fetch-depth: 0 26 | 27 | - name: Build 28 | run: make test 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .idea/ 3 | build/ 4 | RPMS/ 5 | kube-monkey 6 | bin/ 7 | .vscode/ 8 | .DS_Store -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at ayushsobti@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Git Flow for Dev Work 2 | Install go 1.18 3 | 4 | Fork the project on github and install golang 5 | ```bash 6 | go get github.com/asobti/kube-monkey 7 | git remote rename origin upstream 8 | git remote add origin https://github.com//kube-monkey 9 | git checkout --track -b feature/branchname 10 | ``` 11 | Then code & stuff. 12 | 13 | Make sure to test your branch from scratch and run `make test`! 14 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ######################## 2 | ### Builder ### 3 | ######################## 4 | FROM golang:1.21 as builder 5 | RUN mkdir -p /kube-monkey 6 | COPY ./ /kube-monkey/ 7 | WORKDIR /kube-monkey 8 | RUN make build 9 | 10 | ######################## 11 | ### Final ### 12 | ######################## 13 | FROM scratch 14 | COPY --from=builder /kube-monkey/kube-monkey /kube-monkey 15 | COPY --from=builder /usr/share/zoneinfo /usr/share/zoneinfo 16 | ENTRYPOINT ["/kube-monkey"] 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: test 2 | 3 | ENVVAR = GOOS=linux GOARCH=amd64 CGO_ENABLED=0 4 | GOLANGCI_INSTALLED := $(shell which bin/golangci-lint) 5 | 6 | 7 | .PHONY: all build container clean gofmt lint test 8 | 9 | # linting is temporarily disabled 10 | # see https://github.com/asobti/kube-monkey/pull/123 11 | lint: 12 | ifdef GOLANGCI_INSTALLED 13 | bin/golangci-lint run -E golint -E goimports 14 | else 15 | @echo Warning golangci-lint not installed. Skipping linting 16 | @echo Installation instructions: https://github.com/golangci/golangci-lint#ci-installation 17 | endif 18 | 19 | build: clean gofmt 20 | $(ENVVAR) go build -o kube-monkey 21 | 22 | docker_args= 23 | ifdef http_proxy 24 | docker_args+= --build-arg http_proxy=$(http_proxy) 25 | endif 26 | ifdef https_proxy 27 | docker_args+= --build-arg https_proxy=$(https_proxy) 28 | endif 29 | 30 | # Suppressing docker build avoids printing the env variables 31 | container: 32 | @echo "Running docker with '$(docker_args)'" 33 | @docker build $(docker_args) -t kube-monkey:latest . 34 | 35 | gofmt: 36 | gofmt -s -w . 37 | 38 | # Same as gofmt, but also orders imports 39 | goimports: 40 | goimports -s -w . 41 | 42 | clean: 43 | rm -f kube-monkey 44 | 45 | test: build 46 | go test -v -cover -gcflags=-l ./... 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build](https://github.com/asobti/kube-monkey/actions/workflows/go.yml/badge.svg)](https://github.com/asobti/kube-monkey/actions/workflows/go.yml) 2 | [![Go Report Card](https://goreportcard.com/badge/github.com/asobti/kube-monkey)](https://goreportcard.com/report/github.com/asobti/kube-monkey) 3 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 4 | [![Docker Pulls](https://img.shields.io/docker/pulls/ayushsobti/kube-monkey?label=Docker%20pulls&logo=docker)](https://hub.docker.com/r/ayushsobti/kube-monkey/) 5 | [![Artifact Hub](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/kubemonkey)](https://artifacthub.io/packages/search?repo=kubemonkey) 6 | 7 | kube-monkey is an implementation of [Netflix's Chaos Monkey](https://github.com/Netflix/chaosmonkey) for [Kubernetes](http://kubernetes.io/) clusters. It randomly deletes Kubernetes (k8s) pods in the cluster encouraging and validating the development of failure-resilient services. 8 | 9 | Join us at [#kube-monkey](https://kubernetes.slack.com/messages/kube-monkey) on Kubernetes Slack. 10 | 11 | --- 12 | 13 | kube-monkey runs at a pre-configured hour (`run_hour`, defaults to 8 am) on weekdays, and builds a schedule of deployments that will face a random 14 | Pod death sometime during the same day. The time-range during the day when the random pod Death might occur is configurable and defaults to 10 am to 4 pm. 15 | 16 | kube-monkey can be configured with a list of namespaces 17 | * to blacklist (any deployments within a blacklisted namespace will not be touched) 18 | 19 | To disable the blacklist provide `[""]` in the `blacklisted_namespaces` config.param. 20 | 21 | ## Opting-In to Chaos 22 | 23 | kube-monkey works on an opt-in model and will only schedule terminations for Kubernetes (k8s) apps that have explicitly agreed to have their pods terminated by kube-monkey. 24 | 25 | Opt-in is done by setting the following labels on a k8s app: 26 | 27 | **`kube-monkey/enabled`**: Set to **`"enabled"`** to opt-in to kube-monkey 28 | **`kube-monkey/mtbf`**: Mean time between failure (in days). For example, if set to **`"3"`**, the k8s app can expect to have a Pod 29 | killed approximately every third weekday. 30 | **`kube-monkey/identifier`**: A unique identifier for the k8s apps. This is used to identify the pods 31 | that belong to a k8s app as Pods inherit labels from their k8s app. So, if kube-monkey detects that app `foo` has enrolled to be a victim, kube-monkey will look for all pods that have the label `kube-monkey/identifier: foo` to determine which pods are candidates for killing. The recommendation is to set this value to be the same as the app's name. 32 | **`kube-monkey/kill-mode`**: Default behavior is for kube-monkey to kill only ONE pod of your app. You can override this behavior by setting the value to: 33 | * `kill-all` if you want kube-monkey to kill **ALL** of your pods regardless of status (including not ready and not running pods). Does not require `kill-value`. **Use this label carefully.** 34 | * `fixed` if you want to kill a specific number of running pods with `kill-value`. If you overspecify, it will kill **all** running pods and issue a warning. 35 | * `random-max-percent` to specify a *maximum* `%` with `kill-value` that can be killed. At the scheduled time, a uniform *random specified* `%` of the running pods will be terminated. 36 | * `fixed-percent` to specify a *fixed* `%` with `kill-value` that can be killed. At the scheduled time, a specified *fixed* `%` of the running pods will be terminated. 37 | 38 | 39 | **`kube-monkey/kill-value`**: Specify value for kill-mode 40 | * if `fixed`, provide an integer of pods to kill 41 | * if `random-max-percent`, provide a number from `0`-`100` to specify the max `%` of pods kube-monkey can kill 42 | * if `fixed-percent`, provide a number from `0`-`100` to specify the `%` of pods to kill 43 | 44 | #### Example of opted-in Deployment killing one pod per purge 45 | 46 | ```yaml 47 | --- 48 | apiVersion: apps/v1 49 | kind: Deployment 50 | metadata: 51 | name: monkey-victim 52 | namespace: app-namespace 53 | spec: 54 | template: 55 | metadata: 56 | labels: 57 | kube-monkey/enabled: enabled 58 | kube-monkey/identifier: monkey-victim 59 | kube-monkey/mtbf: '2' 60 | kube-monkey/kill-mode: "fixed" 61 | kube-monkey/kill-value: '1' 62 | [... omitted ...] 63 | ``` 64 | 65 | For newer versions of kubernetes you may need to add the labels to the k8s app metadata as well. 66 | 67 | ```yaml 68 | --- 69 | apiVersion: apps/v1 70 | kind: Deployment 71 | metadata: 72 | name: monkey-victim 73 | namespace: app-namespace 74 | labels: 75 | kube-monkey/enabled: enabled 76 | kube-monkey/identifier: monkey-victim 77 | kube-monkey/mtbf: '2' 78 | kube-monkey/kill-mode: "fixed" 79 | kube-monkey/kill-value: '1' 80 | spec: 81 | template: 82 | metadata: 83 | labels: 84 | kube-monkey/enabled: enabled 85 | kube-monkey/identifier: monkey-victim 86 | [... omitted ...] 87 | ``` 88 | 89 | ### Overriding the apiserver 90 | #### Use cases: 91 | * Since client-go does not support [cluster dns](https://github.com/kubernetes/client-go/blob/master/rest/config.go#L331) explicitly with a `// TODO: switch to using cluster DNS.` note in the code, you may need to override the apiserver. 92 | * If you are running an unauthenticated system, you may need to force the http apiserver endpoint. 93 | 94 | #### To override the apiserver specify in the config.toml file 95 | ```toml 96 | [kubernetes] 97 | host="https://your-apiserver-url.com:apiport" 98 | ``` 99 | 100 | ## How kube-monkey works 101 | 102 | #### Scheduling time 103 | Scheduling happens once a day on Weekdays - this is when a schedule for terminations for the current day is generated. During scheduling, kube-monkey will: 104 | 1. Generate a list of eligible k8s apps (k8s apps that have opted-in and are not blacklisted, if specified, and are whitelisted, if specified) 105 | 2. For each eligible k8s app, flip a biased coin (bias determined by `kube-monkey/mtbf`) to determine if a pod for that k8s app should be killed today 106 | 3. For each victim, calculate a random time when a pod will be killed 107 | 108 | #### Termination time 109 | This is the randomly generated time during the day when a victim k8s app will have a pod killed. 110 | At termination time, kube-monkey will: 111 | 1. Check if the k8s app is still eligible (has not opted-out or been blacklisted or removed from the whitelist since scheduling) 112 | 2. Check if the k8s app has updated kill-mode and kill-value 113 | 3. Depending on kill-mode and kill-value, execute pods 114 | 115 | ## Docker Images 116 | 117 | Docker images for kube-monkey can be found at [DockerHub](https://hub.docker.com/r/ayushsobti/kube-monkey/tags/) 118 | 119 | ## Building 120 | 121 | Clone the repository and build the container. 122 | 123 | ```bash 124 | go get github.com/asobti/kube-monkey 125 | cd $GOPATH/src/github.com/asobti/kube-monkey 126 | make build 127 | make container 128 | ``` 129 | 130 | ## Configuring 131 | kube-monkey is configured by environment variables or a toml file placed at `/etc/kube-monkey/config.toml` and expects the configmap to exist before the kube-monkey deployment. 132 | 133 | Configuration keys and descriptions can be found in [`config/param/param.go`](https://github.com/asobti/kube-monkey/blob/master/internal/pkg/config/param/param.go) 134 | 135 | #### Example config.toml file 136 | ```toml 137 | [kubemonkey] 138 | dry_run = true # Terminations are only logged 139 | run_hour = 8 # Run scheduling at 8am on weekdays 140 | start_hour = 10 # Don't schedule any pod deaths before 10am 141 | end_hour = 16 # Don't schedule any pod deaths after 4pm 142 | blacklisted_namespaces = ["kube-system"] # Critical apps live here 143 | time_zone = "America/New_York" # Set tzdata timezone example. Note the field is time_zone not timezone 144 | ``` 145 | 146 | #### Example environment variables 147 | ``` 148 | KUBEMONKEY_DRY_RUN=true 149 | KUBEMONKEY_RUN_HOUR=8 150 | KUBEMONKEY_START_HOUR=10 151 | KUBEMONKEY_END_HOUR=16 152 | KUBEMONKEY_BLACKLISTED_NAMESPACES=kube-system 153 | KUBEMONKEY_TIME_ZONE=America/New_York 154 | ``` 155 | #### Example Config to test kube-monkey works by enabling debug mode 156 | 157 | Note: this will keep attacking pods every 60s regardless of what you configured for the `startHour` and `endHour`. 158 | 159 | ```toml 160 | [debug] 161 | enabled= true 162 | schedule_immediate_kill= true 163 | ``` 164 | 165 | ## Notifications 166 | 167 | Kube-monkey supports notifications and can notify an endpoint of your choice after an attack. 168 | It can be a Slack webhook or a custom API. 169 | 170 | #### Example Config for posting attack notifications to an HTTP endpoint 171 | ```toml 172 | [notifications] 173 | enabled = true 174 | reportSchedule = true 175 | [notifications.attacks] 176 | endpoint = "http://url1" 177 | message = "message1" 178 | headers = ["header1Key:header1Value","header2Key:header2/Value"] 179 | ``` 180 | 181 | #### Placeholders 182 | 183 | The message supports the following placeholders: 184 | * `{$name}`: victim's name 185 | * `{$kind}`: victim's kind 186 | * `{$namespace}`: victim's namespace 187 | * `{$timestamp}`: attack's time from Unix epoch in milliseconds 188 | * `{$time}`: attack's time 189 | * `{$date}`: attack's date 190 | * `{$error}`: result's error, if any 191 | * `{$kubemonkeyid}`: kube-monkey id (set using KUBE_MONKEY_ID env variable otherwise empty) 192 | 193 | ``` 194 | message: '{ 195 | "what": "Kube-monkey(${kubemonkeyid}) attack of {$name} in {$namespace}", 196 | "who": "{$name}", 197 | "when": {$timestamp} 198 | }' 199 | ``` 200 | 201 | The header supports a special placeholder to retrieve the value of an environment variable. 202 | This is useful when calling an API that has a protected endpoint. 203 | A typical scenario will be to pass an API token to the Kube-monkey container, this token is stored in a Kubernetes Secret and you want to pass it via an environment variable. 204 | 205 | ``` 206 | headers = ["api-key:{$env:API_TOKEN}", "Content-Type:application/json"] 207 | ``` 208 | 209 | `{$env:API_TOKEN}` will be replaced by the environment variable `API_TOKEN` value. 210 | 211 | Note if the environment variable does not exist, the notification call will NOT be cancelled. The value will resolve to an empty string, and a warning will show up in the logs. 212 | 213 | ## Deploying 214 | 215 | **Manually** 216 | 1. First, deploy the expected `kube-monkey-config-map` configmap in the namespace you intend to run kube-monkey in (for example, the `kube-system` namespace). Make sure to define the keyname as `config.toml` 217 | 218 | > For example `kubectl create configmap km-config --from-file=config.toml=km-config.toml` or `kubectl apply -f km-config.yaml` 219 | 220 | 2. Run kube-monkey as a k8s app within the Kubernetes cluster, in a namespace that has permissions to kill Pods in other namespaces (eg. `kube-system`). 221 | 222 | See dir [`examples/`](https://github.com/asobti/kube-monkey/tree/master/examples) for example Kubernetes yaml files. 223 | 224 | 3. You should be able to see debug logs by `kubectl logs -f deployment.apps/kube-monkey --namespace=kube-system` here the `deployment.apps/kube-monkey` is the k8s deployment for kube-monkey. 225 | 226 | 227 | **Helm Chart** 228 | 229 | See [How to install kube-monkey with Helm](helm/kubemonkey/README.md). 230 | 231 | ## Logging 232 | 233 | kube-monkey uses [glog](github.com/golang/glog) and supports all command-line features for glog. To specify a custom v level or a custom log directory on the pod, see `args: ["-v=5", "-log_dir=/path/to/custom/log"]` in the [example deployment file](https://github.com/asobti/kube-monkey/tree/master/examples/deployment.yaml) 234 | 235 | > **Standardized glog levels `grep -r V\([0-9]\) *`** 236 | > 237 | > L0: None 238 | > 239 | > L1: Highest Level current status info and Errors with Terminations 240 | > 241 | > L2: Successful terminations 242 | > 243 | > L3: More detailed schedule status info 244 | > 245 | > L4: Debugging verbose schedule and config info 246 | > 247 | > L5: Auto-resolved inconsequential issues 248 | 249 | More resources: See the [k8s logging page](https://kubernetes.io/docs/concepts/cluster-administration/logging/) suggesting [community conventions for logging severity](https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md) 250 | 251 | ## Instructions on how to get this working on OpenShift 3.x 252 | 253 | ``` 254 | git clone https://github.com/asobti/kube-monkey.git 255 | cd examples 256 | oc login http://someserver/ -u system:admin 257 | oc project kube-system 258 | oc create -f configmap.yaml 259 | oc -n kube-system adm policy add-role-to-user -z deployer system:deployer 260 | oc -n kube-system adm policy add-role-to-user -z builder system:image-builder 261 | oc -n kube-system adm policy add-role-to-group system:image-puller system:serviceaccounts:kube-system 262 | oc run kube-monkey --image=docker.io/ayushsobti/kube-monkey:v0.4.0 --command -- /kube-monkey -v=5 -log_dir=/var/log/kube-monkey 263 | oc volume dc/kube-monkey --add --name=kubeconfigmap -m /etc/kube-monkey -t configmap --configmap-name=kube-monkey-config-map 264 | ``` 265 | 266 | ### OpenShift 4.x 267 | 268 | ``` 269 | git clone https://github.com/asobti/kube-monkey.git 270 | cd examples 271 | oc login http://someserver/ -u system:admin 272 | oc project kube-system 273 | oc create -f configmap.yaml 274 | oc -n kube-system adm policy add-cluster-role-to-user edit -z default --rolebinding-name kube-monkey-edit 275 | oc run kube-monkey --image=docker.io/ayushsobti/kube-monkey:v0.3.0 --command -- /kube-monkey -v=5 -log_dir=/var/log/kube-monkey 276 | oc set volume dc/kube-monkey --add --name=kubeconfigmap -m /etc/kube-monkey -t configmap --configmap-name=kube-monkey-config-map 277 | ``` 278 | 279 | ## Ways to contribute 280 | 281 | See [How to Contribute](CONTRIBUTING.md) 282 | 283 | ## License 284 | This project is licensed under the Apache License v2.0 - see the [LICENSE](LICENSE) file for details. 285 | -------------------------------------------------------------------------------- /examples/configmap.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: kube-monkey-config-map 6 | namespace: kube-system 7 | data: 8 | config.toml: | 9 | [kubemonkey] 10 | run_hour = 8 11 | start_hour = 10 12 | end_hour = 16 13 | blacklisted_namespaces = ["kube-system"] 14 | -------------------------------------------------------------------------------- /examples/debug-mode-configmap.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | apiVersion: v1 4 | kind: ConfigMap 5 | metadata: 6 | name: kube-monkey-config-map 7 | namespace: kube-system 8 | data: 9 | config.toml: | 10 | [kubemonkey] 11 | run_hour = 20 12 | start_hour = 21 13 | end_hour = 23 14 | blacklisted_namespaces = ["kube-system"] 15 | whitelisted_namespaces = [ "default", "test-kill-namespace" ] 16 | time_zone = "Australia/Melbourne" 17 | graceperiod_sec= 10 18 | [debug] 19 | enabled= true 20 | schedule_immediate_kill= true 21 | -------------------------------------------------------------------------------- /examples/deployment.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: kube-monkey 6 | namespace: kube-system 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app: kube-monkey 12 | template: 13 | metadata: 14 | labels: 15 | app: kube-monkey 16 | spec: 17 | containers: 18 | - name: kube-monkey 19 | command: 20 | - "/kube-monkey" 21 | args: ["-v=5", "-log_dir=/var/log/kube-monkey"] 22 | image: ayushsobti/kube-monkey:v0.4.0 23 | env: 24 | - name: KUBE_MONKEY_ID 25 | value: CLUSTER_A 26 | volumeMounts: 27 | - name: config-volume 28 | mountPath: "/etc/kube-monkey" 29 | volumes: 30 | - name: config-volume 31 | configMap: 32 | name: kube-monkey-config-map 33 | -------------------------------------------------------------------------------- /examples/notifications-configmap.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | apiVersion: v1 4 | kind: ConfigMap 5 | metadata: 6 | name: kube-monkey-config-map 7 | namespace: kube-system 8 | data: 9 | config.toml: | 10 | [kubemonkey] 11 | run_hour = 20 12 | start_hour = 21 13 | end_hour = 23 14 | blacklisted_namespaces = ["kube-system"] 15 | whitelisted_namespaces = [ "default", "test-kill-namespace" ] 16 | time_zone = "Australia/Melbourne" 17 | graceperiod_sec= 10 18 | [notifications] 19 | enabled = true 20 | proxy = "host:port" 21 | reportSchedule = true 22 | [notifications.attacks] 23 | endpoint = "test1" 24 | message = '''{"foo":"bar","bar":"foo"}''' 25 | headers = ["header1Key:header1Value", "header2Key:header2Value" ] 26 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module kube-monkey 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/fsnotify/fsnotify v1.6.0 7 | github.com/golang/glog v1.1.2 8 | github.com/pkg/errors v0.9.1 9 | github.com/spf13/viper v1.16.0 10 | github.com/stretchr/testify v1.8.4 11 | k8s.io/api v0.28.1 12 | k8s.io/apimachinery v0.28.1 13 | k8s.io/client-go v0.28.1 14 | ) 15 | 16 | require ( 17 | github.com/davecgh/go-spew v1.1.1 // indirect 18 | github.com/emicklei/go-restful/v3 v3.11.0 // indirect 19 | github.com/evanphx/json-patch v5.6.0+incompatible // indirect 20 | github.com/go-logr/logr v1.2.4 // indirect 21 | github.com/go-openapi/jsonpointer v0.20.0 // indirect 22 | github.com/go-openapi/jsonreference v0.20.2 // indirect 23 | github.com/go-openapi/swag v0.22.4 // indirect 24 | github.com/gogo/protobuf v1.3.2 // indirect 25 | github.com/golang/protobuf v1.5.3 // indirect 26 | github.com/google/gnostic-models v0.6.8 // indirect 27 | github.com/google/go-cmp v0.5.9 // indirect 28 | github.com/google/gofuzz v1.2.0 // indirect 29 | github.com/google/uuid v1.3.1 // indirect 30 | github.com/hashicorp/hcl v1.0.0 // indirect 31 | github.com/josharian/intern v1.0.0 // indirect 32 | github.com/json-iterator/go v1.1.12 // indirect 33 | github.com/magiconair/properties v1.8.7 // indirect 34 | github.com/mailru/easyjson v0.7.7 // indirect 35 | github.com/mitchellh/mapstructure v1.5.0 // indirect 36 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 37 | github.com/modern-go/reflect2 v1.0.2 // indirect 38 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 39 | github.com/pelletier/go-toml/v2 v2.1.0 // indirect 40 | github.com/pmezard/go-difflib v1.0.0 // indirect 41 | github.com/spf13/afero v1.9.5 // indirect 42 | github.com/spf13/cast v1.5.1 // indirect 43 | github.com/spf13/jwalterweatherman v1.1.0 // indirect 44 | github.com/spf13/pflag v1.0.5 // indirect 45 | github.com/stretchr/objx v0.5.1 // indirect 46 | github.com/subosito/gotenv v1.6.0 // indirect 47 | golang.org/x/net v0.17.0 // indirect 48 | golang.org/x/oauth2 v0.12.0 // indirect 49 | golang.org/x/sys v0.13.0 // indirect 50 | golang.org/x/term v0.13.0 // indirect 51 | golang.org/x/text v0.13.0 // indirect 52 | golang.org/x/time v0.3.0 // indirect 53 | google.golang.org/appengine v1.6.8 // indirect 54 | google.golang.org/protobuf v1.31.0 // indirect 55 | gopkg.in/inf.v0 v0.9.1 // indirect 56 | gopkg.in/ini.v1 v1.67.0 // indirect 57 | gopkg.in/yaml.v2 v2.4.0 // indirect 58 | gopkg.in/yaml.v3 v3.0.1 // indirect 59 | k8s.io/klog/v2 v2.100.1 // indirect 60 | k8s.io/kube-openapi v0.0.0-20230905202853-d090da108d2f // indirect 61 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect 62 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect 63 | sigs.k8s.io/structured-merge-diff/v4 v4.3.0 // indirect 64 | sigs.k8s.io/yaml v1.3.0 // indirect 65 | ) 66 | -------------------------------------------------------------------------------- /helm/kubemonkey/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /helm/kubemonkey/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | appVersion: 0.5.2 3 | description: A Helm chart for Kubernetes 4 | name: kube-monkey 5 | version: 1.5.2 6 | -------------------------------------------------------------------------------- /helm/kubemonkey/README.md: -------------------------------------------------------------------------------- 1 | # Kube-Monkey Helm Chart 2 | 3 | [Kube-Monkey](https://github.com/asobti/kube-monkey) periodically kills pods in your Kubernetes cluster, that are opt-in based on their own rules. 4 | 5 | ## Add repository 6 | 7 | ```bash 8 | helm repo add kubemonkey https://asobti.github.io/kube-monkey/charts/repo 9 | helm repo update 10 | ``` 11 | 12 | ## Installing the Chart 13 | 14 | To install the chart with the release name `my-release`: 15 | 16 | With Helm v3 17 | 18 | ```bash 19 | helm install my-release kubemonkey/kube-monkey --version 1.5.0 20 | ``` 21 | 22 | With Helm v2 23 | 24 | ```bash 25 | helm install --name my-release kubemonkey/kube-monkey --version 1.5.0 26 | ``` 27 | 28 | The command deploys kube-monkey on the Kubernetes cluster in the default configuration. The [configurations](#Configurations) section lists the parameters that can be configured during installation. 29 | 30 | ## Uninstalling the Chart 31 | 32 | To uninstall/delete the `my-release` deployment: 33 | 34 | ```console 35 | $ helm delete my-release 36 | ``` 37 | 38 | The command removes all the Kubernetes components associated with the chart and deletes the release. 39 | 40 | ## Customising Configurations 41 | 42 | By default `Kube-Monkey` runs in dry-run mode so it doesn't actually kill anything. 43 | If you're confident you want to use it in real run `helm` with: 44 | 45 | ```console 46 | $ helm install --name my-release kubemonkey --set config.dryRun=false 47 | ``` 48 | 49 | By default `Kube-Monkey` runs in without any white listed namespace assigned so it doesn't actually kill anything. 50 | If you're confident you want to enable it in real, run `helm` with: 51 | 52 | ```console 53 | $ helm install --name my-release kubemonkey \ 54 | --set config.dryRun=false \ 55 | --set config.whitelistedNamespaces="{namespace1,namespace2,namespace3}" 56 | ``` 57 | 58 | **Note: replace namespace with your real namespaces** 59 | 60 | If you want to see how kube-monkey kills pods immediately in debug mode. 61 | 62 | ```console 63 | $ helm install --name my-release kubemonkey \ 64 | --set config.dryRun=false \ 65 | --set config.whitelistedNamespaces="{namespace1,namespace2,namespace3}" 66 | --set config.debug.enabled=true \ 67 | --set config.debug.schedule_immediate_kill=true 68 | ``` 69 | If you want to change the time kube-monkey wakes up and start and end killing pods. 70 | 71 | ```console 72 | $ helm install --name my-release kubemonkey \ 73 | --set config.dryRun=false \ 74 | --set config.whitelistedNamespaces="{namespace1,namespace2,namespace3}" 75 | --set config.runHour=10 \ 76 | --set config.startHour=11 \ 77 | --set config.endHour=17 78 | ``` 79 | If you want to enable attacks notifications. 80 | 81 | ```console 82 | $ helm install --name my-release kubemonkey \ 83 | --set config.dryRun=false \ 84 | --set config.whitelistedNamespaces="namespace1\"\,\"namespace2\"\,\"namespace3" \ 85 | --set config.notifications.enabled=true \ 86 | --set config.notifications.endpoint=http://localhost:8080/path \ 87 | --set config.notifications.message="{\"foo\":\"bar\"}" \ 88 | --set config.notifications.headers="Content-Type:application/json\"\,\"client-id:kubemonkey" 89 | ``` 90 | If you want validate intended values passed in to configmap . 91 | 92 | ```console 93 | $ helm get manifest my-release 94 | ``` 95 | ## Configurations 96 | 97 | | Parameter | Description | Default | 98 | |----------------------------------------|-----------------------------------------------------------------------------------------|----------------------------------| 99 | | `image.repository` | docker image repo | ayushsobti/kube-monkey | 100 | | `image.tag` | docker image tag | v0.4.1 | 101 | | `replicaCount` | number of replicas to run | 1 | 102 | | `image.pullPolicy` | image pull logic | IfNotPresent | 103 | | `config.dryRun` | will not kill pods, only logs behaviour | true | 104 | | `config.runHour` | schedule start time in 24hr format | 8 | 105 | | `config.startHour` | pod killing start time in 24hr format | 10 | 106 | | `config.endHour` | pod killing stop time in 24hr format | 16 | 107 | | `config.whitelistedNamespaces` | pods in this namespace that opt-in will be killed | | 108 | | `config.blacklistedNamespaces` | pods in this namespace will not be killed | kube-system | 109 | | `config.timeZone` | time zone in DZ format | America/New_York | 110 | | `config.debug.enabled` | debug mode,need to be enabled to see debuging behaviour | false | 111 | | `config.debug.schedule_immediate_kill` | immediate pod kill matching other rules apart from time | false | 112 | | `config.notifications.enabled` | enables reporting of attacks to an HTTP endpoint | false | 113 | | `config.notifications.proxy` | notifications proxy URL | | 114 | | `config.notifications.attacks` | HTTP collector in the form (endpoint,message,headers) where attacks will be reported to | | 115 | | `args.logLevel` | go log level | 5 | 116 | | `args.logDir` | log directory | /var/log/kube-monkey | 117 | 118 | after all you can simply edit values.yaml with your preferred configs and run as below 119 | 120 | ```console 121 | $ helm install --name my-release kubemonkey --namespace=kube-monkey 122 | ``` 123 | example of a modified values.yaml (only important parts are displayed) 124 | 125 | ```yaml 126 | ... 127 | replicaCount: 1 128 | image: 129 | repository: ayushsobti/kube-monkey 130 | tag: v0.4.1 131 | pullPolicy: IfNotPresent 132 | config: 133 | dryRun: false 134 | runHour: 8 135 | startHour: 10 136 | endHour: 16 137 | blacklistedNamespaces: [ "kube-system" ] 138 | whitelistedNamespaces: [ "namespace1", "namespace2" ] 139 | timeZone: America/New_York 140 | args: 141 | logLevel: 5 142 | logDir: /var/log/kube-monkey 143 | ... 144 | ``` 145 | -------------------------------------------------------------------------------- /helm/kubemonkey/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 1. Wait until the application is rolled out: 2 | kubectl -n {{ .Release.Namespace }} rollout status deployment {{ template "kubemonkey.fullname" . }} 3 | 2. Check the logs: 4 | kubectl logs -f deployment.apps/{{ template "kubemonkey.fullname" . }} -n {{ .Release.Namespace }} 5 | 6 | -------------------------------------------------------------------------------- /helm/kubemonkey/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "kubemonkey.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "kubemonkey.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "kubemonkey.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | The name of the Service Account to use 36 | */}} 37 | {{- define "kubemonkey.serviceAccountName" -}} 38 | {{- default (include "kubemonkey.fullname" .) .Values.serviceAccount.name -}} 39 | {{- end -}} 40 | -------------------------------------------------------------------------------- /helm/kubemonkey/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ template "kubemonkey.fullname" . }} 5 | namespace: {{ .Release.Namespace }} 6 | data: 7 | config.toml: | 8 | [kubemonkey] 9 | dry_run = {{ .Values.config.dryRun }} 10 | run_hour = {{ .Values.config.runHour }} 11 | start_hour = {{ .Values.config.startHour }} 12 | end_hour = {{ .Values.config.endHour }} 13 | blacklisted_namespaces = [ {{- range .Values.config.blacklistedNamespaces }} {{ . | trim | quote }}, {{- end }} ] 14 | {{- $whitelen := len .Values.config.whitelistedNamespaces }} 15 | {{- if gt $whitelen 0 }} 16 | whitelisted_namespaces = [ {{- range .Values.config.whitelistedNamespaces }} {{ . | trim | quote }}, {{- end }} ] 17 | {{- end }} 18 | time_zone = {{ .Values.config.timeZone | quote }} 19 | [debug] 20 | enabled = {{ .Values.config.debug.enabled }} 21 | schedule_immediate_kill = {{ .Values.config.debug.schedule_immediate_kill }} 22 | [notifications] 23 | enabled = {{ .Values.config.notifications.enabled }} 24 | {{- if ne .Values.config.notifications.proxy "" }} 25 | proxy = {{ .Values.config.notifications.proxy}} 26 | {{- end }} 27 | [notifications.attacks] 28 | {{ indent 6 .Values.config.notifications.attacks -}} 29 | 30 | -------------------------------------------------------------------------------- /helm/kubemonkey/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ template "kubemonkey.fullname" . }} 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | app: {{ template "kubemonkey.name" . }} 8 | chart: {{ template "kubemonkey.chart" . }} 9 | release: {{ .Release.Name }} 10 | heritage: {{ .Release.Service }} 11 | spec: 12 | replicas: {{ .Values.replicaCount }} 13 | selector: 14 | matchLabels: 15 | app: {{ template "kubemonkey.name" . }} 16 | release: {{ .Release.Name }} 17 | template: 18 | metadata: 19 | labels: 20 | app: {{ template "kubemonkey.name" . }} 21 | release: {{ .Release.Name }} 22 | spec: 23 | containers: 24 | - name: {{ .Chart.Name }} 25 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 26 | imagePullPolicy: {{ .Values.image.pullPolicy }} 27 | command: 28 | - "/kube-monkey" 29 | args: ["-v={{ .Values.args.logLevel }}", "-log_dir={{ .Values.args.logDir }}"] 30 | resources: 31 | {{- toYaml .Values.resources | trimSuffix "\n" | nindent 12 }} 32 | volumeMounts: 33 | - name: config-volume 34 | mountPath: "/etc/kube-monkey" 35 | {{- if .Values.additionalVolumeMounts }} 36 | {{- toYaml .Values.additionalVolumeMounts | default "" | nindent 12 }} 37 | {{- end}} 38 | securityContext: 39 | {{- toYaml .Values.podSecurityContext | trimSuffix "\n" | nindent 8 }} 40 | serviceAccountName: {{ include "kubemonkey.serviceAccountName" . }} 41 | volumes: 42 | - name: config-volume 43 | configMap: 44 | name: {{ template "kubemonkey.fullname" . }} 45 | {{- if .Values.additionalVolumes }} 46 | {{- toYaml .Values.additionalVolumes | default "" | nindent 8 }} 47 | {{- end}} 48 | {{- with .Values.imagePullSecrets }} 49 | imagePullSecrets: 50 | {{ toYaml . | indent 8 }} 51 | {{- end }} 52 | {{- with .Values.nodeSelector }} 53 | nodeSelector: 54 | {{ toYaml . | indent 8 }} 55 | {{- end }} 56 | {{- with .Values.affinity }} 57 | affinity: 58 | {{ toYaml . | indent 8 }} 59 | {{- end }} 60 | {{- with .Values.tolerations }} 61 | tolerations: 62 | {{ toYaml . | indent 8 }} 63 | {{- end }} 64 | -------------------------------------------------------------------------------- /helm/kubemonkey/templates/rbac.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.serviceAccount }} 2 | {{- if not .Values.serviceAccount.name }} 3 | apiVersion: v1 4 | kind: ServiceAccount 5 | metadata: 6 | name: {{ template "kubemonkey.fullname" . }} 7 | namespace: {{ .Release.Namespace }} 8 | {{- end}} 9 | {{- end}} 10 | 11 | --- 12 | 13 | kind: ClusterRole 14 | apiVersion: rbac.authorization.k8s.io/v1 15 | metadata: 16 | name: {{ template "kubemonkey.fullname" . }} 17 | rules: 18 | - apiGroups: 19 | - "" 20 | - "extensions" 21 | - "apps" 22 | resources: 23 | - daemonsets 24 | - deployments 25 | - deployments/rollback 26 | - deployments/scale 27 | - replicasets 28 | - replicasets/scale 29 | - statefulsets 30 | - statefulsets/scale 31 | verbs: 32 | - get 33 | - list 34 | - watch 35 | - apiGroups: 36 | - "" 37 | resources: 38 | - "namespaces" 39 | verbs: 40 | - get 41 | - list 42 | - watch 43 | - apiGroups: 44 | - "" 45 | resources: 46 | - "pods" 47 | verbs: 48 | - "get" 49 | - "list" 50 | - "watch" 51 | - "delete" 52 | 53 | --- 54 | 55 | apiVersion: rbac.authorization.k8s.io/v1 56 | kind: ClusterRoleBinding 57 | metadata: 58 | name: {{ template "kubemonkey.fullname" . }} 59 | namespace: {{ .Release.Namespace }} 60 | roleRef: 61 | apiGroup: rbac.authorization.k8s.io 62 | kind: ClusterRole 63 | name: {{ template "kubemonkey.fullname" . }} 64 | subjects: 65 | - kind: ServiceAccount 66 | name: {{ include "kubemonkey.serviceAccountName" . }} 67 | namespace: {{ .Release.Namespace }} 68 | -------------------------------------------------------------------------------- /helm/kubemonkey/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for kubemonkey. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | replicaCount: 1 6 | 7 | # The service account the pods will use to interact with the Kubernetes API 8 | serviceAccount: 9 | # If set, an existing Service Account is used. Else a Service Account is created automatically 10 | name: "" 11 | 12 | image: 13 | repository: ayushsobti/kube-monkey 14 | tag: v0.5.2 15 | pullPolicy: IfNotPresent 16 | imagePullSecrets: {} 17 | config: 18 | dryRun: true 19 | runHour: 8 20 | startHour: 10 21 | endHour: 16 22 | blacklistedNamespaces: 23 | - kube-system 24 | whitelistedNamespaces: [] 25 | timeZone: America/New_York 26 | debug: 27 | enabled: false # if you want to enable debugging and see how pods killed immediately set enabled and schedule_immediate_kill to true 28 | schedule_immediate_kill: false 29 | notifications: 30 | enabled: false 31 | proxy: "" 32 | attacks: "" 33 | 34 | args: 35 | logLevel: 5 36 | logDir: /var/log/kube-monkey 37 | 38 | resources: {} 39 | # We usually recommend not to specify default resources and to leave this as a conscious 40 | # choice for the user. This also increases chances charts run on environments with little 41 | # resources, such as Minikube. If you do want to specify resources, uncomment the following 42 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'. 43 | # limits: 44 | # cpu: 100m 45 | # memory: 128Mi 46 | # requests: 47 | # cpu: 100m 48 | # memory: 128Mi 49 | 50 | nodeSelector: {} 51 | 52 | tolerations: [] 53 | 54 | affinity: {} 55 | 56 | additionalVolumes: {} 57 | # - name: log 58 | # emptyDir: {} 59 | 60 | additionalVolumeMounts: {} 61 | # - name: log 62 | # mountPath: "/var/log" 63 | 64 | podSecurityContext: {} 65 | # runAsNonRoot: true 66 | # runAsUser: 1001 67 | # runAsGroup: 1001 68 | # fsGroup: 1001 -------------------------------------------------------------------------------- /internal/pkg/calendar/calendar.go: -------------------------------------------------------------------------------- 1 | package calendar 2 | 3 | import ( 4 | "math/rand" 5 | "time" 6 | 7 | "github.com/golang/glog" 8 | ) 9 | 10 | // Checks if specified Time is a weekday 11 | func isWeekday(t time.Time) bool { 12 | switch t.Weekday() { 13 | case time.Monday, time.Tuesday, time.Wednesday, time.Thursday, time.Friday: 14 | return true 15 | case time.Saturday, time.Sunday: 16 | return false 17 | } 18 | 19 | glog.Fatalf("Unrecognized day of the week: %s", t.Weekday().String()) 20 | 21 | panic("Explicit Panic to avoid compiler error: missing return at end of function") 22 | } 23 | 24 | // Returns the next weekday in Location 25 | func nextWeekday(loc *time.Location) time.Time { 26 | check := time.Now().In(loc) 27 | for { 28 | check = check.AddDate(0, 0, 1) 29 | if isWeekday(check) { 30 | return check 31 | } 32 | } 33 | } 34 | 35 | // NextRuntime calculates the next time the Scheduled should run 36 | func NextRuntime(loc *time.Location, r int) time.Time { 37 | now := time.Now().In(loc) 38 | 39 | // Is today a weekday and are we still in time for it? 40 | if isWeekday(now) { 41 | runtimeToday := time.Date(now.Year(), now.Month(), now.Day(), r, 0, 0, 0, loc) 42 | if runtimeToday.After(now) { 43 | return runtimeToday 44 | } 45 | } 46 | 47 | // Missed the train for today. Schedule on next weekday 48 | year, month, day := nextWeekday(loc).Date() 49 | return time.Date(year, month, day, r, 0, 0, 0, loc) 50 | } 51 | 52 | // RandomTimeInRange returns a random time within the range specified by startHour and endHour 53 | func RandomTimeInRange(startHour int, endHour int, loc *time.Location) time.Time { 54 | // calculate the number of minutes in the range 55 | minutesInRange := (endHour - startHour) * 60 56 | 57 | // calculate a random minute-offset in range [0, minutesInRange) 58 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 59 | randMinuteOffset := r.Intn(minutesInRange) 60 | offsetDuration := time.Duration(randMinuteOffset) * time.Minute 61 | 62 | // Add the minute offset to the start of the range to get a random 63 | // time within the range 64 | year, month, date := time.Now().Date() 65 | rangeStart := time.Date(year, month, date, startHour, 0, 0, 0, loc) 66 | return rangeStart.Add(offsetDuration) 67 | } 68 | -------------------------------------------------------------------------------- /internal/pkg/calendar/calendar_test.go: -------------------------------------------------------------------------------- 1 | package calendar 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestIsWeekDay(t *testing.T) { 11 | monday := time.Date(2018, 4, 16, 0, 0, 0, 0, time.UTC) 12 | 13 | assert.True(t, isWeekday(monday)) 14 | assert.True(t, isWeekday(monday.Add(time.Hour*24))) 15 | assert.True(t, isWeekday(monday.Add(time.Hour*24*2))) 16 | assert.True(t, isWeekday(monday.Add(time.Hour*24*3))) 17 | assert.True(t, isWeekday(monday.Add(time.Hour*24*4))) 18 | 19 | assert.False(t, isWeekday(monday.Add(time.Hour*24*5))) 20 | assert.False(t, isWeekday(monday.Add(time.Hour*24*6))) 21 | } 22 | 23 | // FIXME: add more tests 24 | -------------------------------------------------------------------------------- /internal/pkg/chaos/chaos.go: -------------------------------------------------------------------------------- 1 | package chaos 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/pkg/errors" 8 | 9 | "kube-monkey/internal/pkg/config" 10 | "kube-monkey/internal/pkg/kubernetes" 11 | "kube-monkey/internal/pkg/victims" 12 | 13 | kube "k8s.io/client-go/kubernetes" 14 | ) 15 | 16 | type Chaos struct { 17 | killAt time.Time 18 | victim victims.Victim 19 | } 20 | 21 | // New creates a new Chaos instance 22 | func New(killtime time.Time, victim victims.Victim) *Chaos { 23 | // TargetPodName will be populated at time of termination 24 | return &Chaos{ 25 | killAt: killtime, 26 | victim: victim, 27 | } 28 | } 29 | 30 | func (c *Chaos) Victim() victims.Victim { 31 | return c.victim 32 | } 33 | 34 | func (c *Chaos) KillAt() time.Time { 35 | return c.killAt 36 | } 37 | 38 | // Schedule the execution of Chaos 39 | func (c *Chaos) Schedule(resultchan chan<- *Result) { 40 | time.Sleep(c.DurationToKillTime()) 41 | c.Execute(resultchan) 42 | } 43 | 44 | // DurationToKillTime calculates the duration from now until Chaos.killAt 45 | func (c *Chaos) DurationToKillTime() time.Duration { 46 | return time.Until(c.killAt) 47 | } 48 | 49 | // Execute exposed function that calls the actual execution of the chaos, i.e. termination of pods 50 | // The result is sent back over the channel provided 51 | func (c *Chaos) Execute(resultchan chan<- *Result) { 52 | // Create kubernetes clientset 53 | clientset, err := kubernetes.CreateClient() 54 | if err != nil { 55 | resultchan <- c.NewResult(err) 56 | return 57 | } 58 | 59 | err = c.verifyExecution(clientset) 60 | if err != nil { 61 | resultchan <- c.NewResult(err) 62 | return 63 | } 64 | 65 | err = c.terminate(clientset) 66 | if err != nil { 67 | resultchan <- c.NewResult(err) 68 | return 69 | } 70 | 71 | // Send a success msg 72 | resultchan <- c.NewResult(nil) 73 | } 74 | 75 | // Verify if the victim has opted out since scheduling 76 | func (c *Chaos) verifyExecution(clientset kube.Interface) error { 77 | // Is victim still enrolled in kube-monkey 78 | enrolled, err := c.Victim().IsEnrolled(clientset) 79 | if err != nil { 80 | return err 81 | } 82 | 83 | if !enrolled { 84 | return fmt.Errorf("%s %s is no longer enrolled in kube-monkey. Skipping", c.Victim().Kind(), c.Victim().Name()) 85 | } 86 | 87 | // Has the victim been blacklisted since scheduling? 88 | if c.Victim().IsBlacklisted() { 89 | return fmt.Errorf("%s %s is blacklisted. Skipping", c.Victim().Kind(), c.Victim().Name()) 90 | } 91 | 92 | // Has the victim been removed from the whitelist since scheduling? 93 | if !c.Victim().IsWhitelisted() { 94 | return fmt.Errorf("%s %s is not whitelisted. Skipping", c.Victim().Kind(), c.Victim().Name()) 95 | } 96 | 97 | // Send back valid for termination 98 | return nil 99 | } 100 | 101 | // The termination type and value is processed here 102 | func (c *Chaos) terminate(clientset kube.Interface) error { 103 | killType, err := c.Victim().KillType(clientset) 104 | if err != nil { 105 | return errors.Wrapf(err, "Failed to check KillType label for %s %s", c.Victim().Kind(), c.Victim().Name()) 106 | } 107 | 108 | killValue, err := c.getKillValue(clientset) 109 | 110 | // KillAll is the only kill type that does not require a kill-value 111 | if killType != config.KillAllLabelValue && err != nil { 112 | return err 113 | } 114 | 115 | // Validate killtype 116 | switch killType { 117 | case config.KillFixedLabelValue: 118 | return c.Victim().DeleteRandomPods(clientset, killValue) 119 | case config.KillAllLabelValue: 120 | killNum, err := c.Victim().KillNumberForKillingAll(clientset) 121 | if err != nil { 122 | return err 123 | } 124 | return c.Victim().DeleteRandomPods(clientset, killNum) 125 | case config.KillRandomMaxLabelValue: 126 | killNum, err := c.Victim().KillNumberForMaxPercentage(clientset, killValue) 127 | if err != nil { 128 | return err 129 | } 130 | return c.Victim().DeleteRandomPods(clientset, killNum) 131 | case config.KillFixedPercentageLabelValue: 132 | killNum, err := c.Victim().KillNumberForFixedPercentage(clientset, killValue) 133 | if err != nil { 134 | return err 135 | } 136 | return c.Victim().DeleteRandomPods(clientset, killNum) 137 | default: 138 | return fmt.Errorf("failed to recognize KillType label for %s %s", c.Victim().Kind(), c.Victim().Name()) 139 | } 140 | } 141 | 142 | func (c *Chaos) getKillValue(clientset kube.Interface) (int, error) { 143 | killValue, err := c.Victim().KillValue(clientset) 144 | if err != nil { 145 | return 0, errors.Wrapf(err, "Failed to check KillValue label for %s %s", c.Victim().Kind(), c.Victim().Name()) 146 | } 147 | 148 | return killValue, nil 149 | } 150 | 151 | // NewResult creates a ChaosResult instance 152 | func (c *Chaos) NewResult(e error) *Result { 153 | return &Result{ 154 | chaos: c, 155 | err: e, 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /internal/pkg/chaos/chaos_test.go: -------------------------------------------------------------------------------- 1 | package chaos 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | 7 | "kube-monkey/internal/pkg/config" 8 | 9 | "github.com/stretchr/testify/mock" 10 | "github.com/stretchr/testify/suite" 11 | 12 | kube "k8s.io/client-go/kubernetes" 13 | "k8s.io/client-go/kubernetes/fake" 14 | ) 15 | 16 | type ChaosTestSuite struct { 17 | suite.Suite 18 | chaos *Chaos 19 | client kube.Interface 20 | } 21 | 22 | func (s *ChaosTestSuite) SetupTest() { 23 | s.chaos = NewMock() 24 | s.client = fake.NewSimpleClientset() 25 | } 26 | 27 | func (s *ChaosTestSuite) TestVerifyExecutionNotEnrolled() { 28 | v := s.chaos.victim.(*VictimMock) 29 | v.On("IsEnrolled", s.client).Return(false, nil) 30 | err := s.chaos.verifyExecution(s.client) 31 | v.AssertExpectations(s.T()) 32 | s.EqualError(err, v.Kind()+" "+v.Name()+" is no longer enrolled in kube-monkey. Skipping") 33 | } 34 | 35 | func (s *ChaosTestSuite) TestVerifyExecutionBlacklisted() { 36 | v := s.chaos.victim.(*VictimMock) 37 | v.On("IsEnrolled", s.client).Return(true, nil) 38 | v.On("IsBlacklisted").Return(true) 39 | err := s.chaos.verifyExecution(s.client) 40 | v.AssertExpectations(s.T()) 41 | s.EqualError(err, v.Kind()+" "+v.Name()+" is blacklisted. Skipping") 42 | } 43 | 44 | func (s *ChaosTestSuite) TestVerifyExecutionNotWhitelisted() { 45 | v := s.chaos.victim.(*VictimMock) 46 | v.On("IsEnrolled", s.client).Return(true, nil) 47 | v.On("IsBlacklisted").Return(false) 48 | v.On("IsWhitelisted").Return(false) 49 | err := s.chaos.verifyExecution(s.client) 50 | v.AssertExpectations(s.T()) 51 | s.EqualError(err, v.Kind()+" "+v.Name()+" is not whitelisted. Skipping") 52 | } 53 | 54 | func (s *ChaosTestSuite) TestVerifyExecutionWhitelisted() { 55 | v := s.chaos.victim.(*VictimMock) 56 | v.On("IsEnrolled", s.client).Return(true, nil) 57 | v.On("IsBlacklisted").Return(false) 58 | v.On("IsWhitelisted").Return(true) 59 | err := s.chaos.verifyExecution(s.client) 60 | v.AssertExpectations(s.T()) 61 | s.NoError(err) 62 | } 63 | 64 | func (s *ChaosTestSuite) TestTerminateKillTypeError() { 65 | v := s.chaos.victim.(*VictimMock) 66 | err := errors.New("KillType Error") 67 | v.On("KillType", s.client).Return("", err) 68 | 69 | s.NotNil(s.chaos.terminate(s.client)) 70 | v.AssertExpectations(s.T()) 71 | } 72 | 73 | func (s *ChaosTestSuite) TestTerminateKillValueError() { 74 | v := s.chaos.victim.(*VictimMock) 75 | errMsg := "KillValue Error" 76 | v.On("KillType", s.client).Return(config.KillFixedLabelValue, nil) 77 | v.On("KillValue", s.client).Return(0, errors.New(errMsg)) 78 | s.NotNil(s.chaos.terminate(s.client)) 79 | v.AssertExpectations(s.T()) 80 | } 81 | 82 | func (s *ChaosTestSuite) TestTerminateKillFixed() { 83 | v := s.chaos.victim.(*VictimMock) 84 | killValue := 1 85 | v.On("KillType", s.client).Return(config.KillFixedLabelValue, nil) 86 | v.On("KillValue", s.client).Return(killValue, nil) 87 | v.On("DeleteRandomPods", s.client, killValue).Return(nil) 88 | _ = s.chaos.terminate(s.client) 89 | v.AssertExpectations(s.T()) 90 | } 91 | 92 | func (s *ChaosTestSuite) TestTerminateAllPods() { 93 | v := s.chaos.victim.(*VictimMock) 94 | v.On("KillType", s.client).Return(config.KillAllLabelValue, nil) 95 | v.On("KillValue", s.client).Return(0, nil) 96 | v.On("KillNumberForKillingAll", s.client).Return(0, nil) 97 | v.On("DeleteRandomPods", s.client, 0).Return(nil) 98 | _ = s.chaos.terminate(s.client) 99 | v.AssertExpectations(s.T()) 100 | } 101 | 102 | func (s *ChaosTestSuite) TestTerminateKillRandomMaxPercentage() { 103 | v := s.chaos.victim.(*VictimMock) 104 | killValue := 1 105 | v.On("KillType", s.client).Return(config.KillRandomMaxLabelValue, nil) 106 | v.On("KillValue", s.client).Return(killValue, nil) 107 | v.On("KillNumberForMaxPercentage", s.client, mock.AnythingOfType("int")).Return(0, nil) 108 | v.On("DeleteRandomPods", s.client, 0).Return(nil) 109 | _ = s.chaos.terminate(s.client) 110 | v.AssertExpectations(s.T()) 111 | } 112 | 113 | func (s *ChaosTestSuite) TestTerminateKillFixedPercentage() { 114 | v := s.chaos.victim.(*VictimMock) 115 | killValue := 1 116 | v.On("KillType", s.client).Return(config.KillFixedPercentageLabelValue, nil) 117 | v.On("KillValue", s.client).Return(killValue, nil) 118 | v.On("KillNumberForFixedPercentage", s.client, mock.AnythingOfType("int")).Return(0, nil) 119 | v.On("DeleteRandomPods", s.client, 0).Return(nil) 120 | _ = s.chaos.terminate(s.client) 121 | v.AssertExpectations(s.T()) 122 | } 123 | 124 | func (s *ChaosTestSuite) TestInvalidKillType() { 125 | v := s.chaos.victim.(*VictimMock) 126 | v.On("KillType", s.client).Return("InvalidKillTypeHere", nil) 127 | v.On("KillValue", s.client).Return(0, nil) 128 | err := s.chaos.terminate(s.client) 129 | v.AssertExpectations(s.T()) 130 | s.NotNil(err) 131 | } 132 | 133 | func (s *ChaosTestSuite) TestGetKillValue() { 134 | v := s.chaos.victim.(*VictimMock) 135 | killValue := 5 136 | v.On("KillValue", s.client).Return(killValue, nil) 137 | result, err := s.chaos.getKillValue(s.client) 138 | s.Nil(err) 139 | s.Equal(killValue, result) 140 | } 141 | 142 | func (s *ChaosTestSuite) TestGetKillValueReturnsError() { 143 | v := s.chaos.victim.(*VictimMock) 144 | v.On("KillValue", s.client).Return(0, errors.New("InvalidKillValue")) 145 | _, err := s.chaos.getKillValue(s.client) 146 | s.NotNil(err) 147 | } 148 | 149 | // Disabling test 150 | // See https://github.com/asobti/kube-monkey/issues/126 151 | //func (s *ChaosTestSuite) TestDurationToKillTime() { 152 | // t := s.chaos.DurationToKillTime() 153 | // s.WithinDuration(s.chaos.KillAt(), time.Now(), t+time.Millisecond) 154 | //} 155 | 156 | func TestSuite(t *testing.T) { 157 | suite.Run(t, new(ChaosTestSuite)) 158 | } 159 | -------------------------------------------------------------------------------- /internal/pkg/chaos/chaosmock.go: -------------------------------------------------------------------------------- 1 | package chaos 2 | 3 | import ( 4 | "time" 5 | 6 | "kube-monkey/internal/pkg/victims" 7 | 8 | "github.com/stretchr/testify/mock" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | kube "k8s.io/client-go/kubernetes" 11 | ) 12 | 13 | const ( 14 | NAMESPACE = metav1.NamespaceDefault 15 | IDENTIFIER = "kube-monkey-id" 16 | KIND = "Pod" 17 | NAME = "name" 18 | ) 19 | 20 | type VictimMock struct { 21 | mock.Mock 22 | victims.VictimBase 23 | } 24 | 25 | func (vm *VictimMock) IsEnrolled(clientset kube.Interface) (bool, error) { 26 | args := vm.Called(clientset) 27 | return args.Bool(0), args.Error(1) 28 | } 29 | 30 | func (vm *VictimMock) KillType(clientset kube.Interface) (string, error) { 31 | args := vm.Called(clientset) 32 | return args.String(0), args.Error(1) 33 | } 34 | 35 | func (vm *VictimMock) KillValue(clientset kube.Interface) (int, error) { 36 | args := vm.Called(clientset) 37 | return args.Int(0), args.Error(1) 38 | } 39 | 40 | func (vm *VictimMock) DeleteRandomPod(clientset kube.Interface) error { 41 | args := vm.Called(clientset) 42 | return args.Error(0) 43 | } 44 | 45 | func (vm *VictimMock) DeleteRandomPods(clientset kube.Interface, killValue int) error { 46 | args := vm.Called(clientset, killValue) 47 | return args.Error(0) 48 | } 49 | 50 | func (vm *VictimMock) KillNumberForKillingAll(clientset kube.Interface) (int, error) { 51 | args := vm.Called(clientset) 52 | return args.Int(0), args.Error(1) 53 | } 54 | 55 | func (vm *VictimMock) KillNumberForMaxPercentage(clientset kube.Interface, killValue int) (int, error) { 56 | args := vm.Called(clientset, killValue) 57 | return args.Int(0), args.Error(1) 58 | } 59 | 60 | func (vm *VictimMock) KillNumberForFixedPercentage(clientset kube.Interface, killValue int) (int, error) { 61 | args := vm.Called(clientset, killValue) 62 | return args.Int(0), args.Error(1) 63 | } 64 | 65 | func (vm *VictimMock) IsBlacklisted() bool { 66 | args := vm.Called() 67 | return args.Bool(0) 68 | } 69 | 70 | func (vm *VictimMock) IsWhitelisted() bool { 71 | args := vm.Called() 72 | return args.Bool(0) 73 | } 74 | 75 | func NewVictimMock() *VictimMock { 76 | v := victims.New(KIND, NAME, NAMESPACE, IDENTIFIER, 1) 77 | return &VictimMock{ 78 | VictimBase: *v, 79 | } 80 | } 81 | 82 | func NewMock() *Chaos { 83 | return &Chaos{ 84 | killAt: time.Now(), 85 | victim: NewVictimMock(), 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /internal/pkg/chaos/chaosresult.go: -------------------------------------------------------------------------------- 1 | package chaos 2 | 3 | import ( 4 | "kube-monkey/internal/pkg/victims" 5 | ) 6 | 7 | type Result struct { 8 | chaos *Chaos 9 | err error 10 | } 11 | 12 | func (r *Result) Victim() victims.Victim { 13 | return r.chaos.Victim() 14 | } 15 | 16 | func (r *Result) Error() error { 17 | return r.err 18 | } 19 | 20 | // NewResult creates a new Result instance 21 | func NewResult(chaos *Chaos, err error) *Result { 22 | return &Result{ 23 | chaos: chaos, 24 | err: err, 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /internal/pkg/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "strings" 5 | "time" 6 | 7 | "github.com/fsnotify/fsnotify" 8 | "github.com/golang/glog" 9 | "github.com/spf13/viper" 10 | 11 | "kube-monkey/internal/pkg/config/param" 12 | 13 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 | "k8s.io/apimachinery/pkg/util/sets" 15 | ) 16 | 17 | const ( 18 | configpath = "/etc/kube-monkey" 19 | configtype = "toml" 20 | configname = "config" 21 | 22 | // Currently, there does not appear to be 23 | // any value in making these configurable 24 | // so defining them as consts 25 | 26 | IdentLabelKey = "kube-monkey/identifier" 27 | EnabledLabelKey = "kube-monkey/enabled" 28 | EnabledLabelValue = "enabled" 29 | MtbfLabelKey = "kube-monkey/mtbf" 30 | KillTypeLabelKey = "kube-monkey/kill-mode" 31 | KillValueLabelKey = "kube-monkey/kill-value" 32 | KillRandomMaxLabelValue = "random-max-percent" 33 | KillFixedPercentageLabelValue = "fixed-percent" 34 | KillFixedLabelValue = "fixed" 35 | KillAllLabelValue = "kill-all" 36 | ) 37 | 38 | type Receiver struct { 39 | Endpoint string `mapstructure:"endpoint"` 40 | Message string `mapstructure:"message"` 41 | Headers []string `mapstructure:"headers"` 42 | } 43 | 44 | // NewReceiver creates a new Receiver instance 45 | func NewReceiver(endpoint string, message string, headers []string) Receiver { 46 | return Receiver{ 47 | Endpoint: endpoint, 48 | Message: message, 49 | Headers: headers, 50 | } 51 | } 52 | 53 | func SetDefaults() { 54 | viper.SetEnvKeyReplacer(strings.NewReplacer(".", "_")) 55 | viper.AutomaticEnv() 56 | 57 | viper.SetDefault(param.DryRun, true) 58 | viper.SetDefault(param.Timezone, "America/Los_Angeles") 59 | viper.SetDefault(param.RunHour, 8) 60 | viper.SetDefault(param.StartHour, 10) 61 | viper.SetDefault(param.EndHour, 16) 62 | viper.SetDefault(param.GracePeriodSec, 5) 63 | viper.SetDefault(param.BlacklistedNamespaces, []string{metav1.NamespaceSystem}) 64 | viper.SetDefault(param.WhitelistedNamespaces, []string{metav1.NamespaceAll}) 65 | 66 | viper.SetDefault(param.DebugEnabled, false) 67 | viper.SetDefault(param.DebugScheduleDelay, 30) 68 | viper.SetDefault(param.DebugForceShouldKill, false) 69 | viper.SetDefault(param.DebugScheduleImmediateKill, false) 70 | 71 | viper.SetDefault(param.NotificationsEnabled, false) 72 | viper.SetDefault(param.NotificationsProxy, nil) 73 | viper.SetDefault(param.NotificationsReportSchedule, false) 74 | viper.SetDefault(param.NotificationsAttacks, Receiver{}) 75 | } 76 | 77 | func setupWatch() { 78 | viper.WatchConfig() 79 | viper.OnConfigChange(func(e fsnotify.Event) { 80 | glog.V(4).Info("Config change detected") 81 | if err := ValidateConfigs(); err != nil { 82 | panic(err) 83 | } 84 | glog.V(4).Info("Successfully reloaded configs") 85 | }) 86 | } 87 | 88 | func Init() error { 89 | SetDefaults() 90 | viper.AddConfigPath(configpath) 91 | viper.SetConfigType(configtype) 92 | viper.SetConfigName(configname) 93 | 94 | if err := viper.ReadInConfig(); err != nil { 95 | return err 96 | } 97 | 98 | if err := ValidateConfigs(); err != nil { 99 | glog.Errorf("Failed to validate %v", err) 100 | return err 101 | } 102 | glog.V(4).Info("Successfully validated configs") 103 | setupWatch() 104 | return nil 105 | } 106 | 107 | func DryRun() bool { 108 | return viper.GetBool(param.DryRun) 109 | } 110 | 111 | func Timezone() *time.Location { 112 | tz := viper.GetString(param.Timezone) 113 | location, err := time.LoadLocation(tz) 114 | if err != nil { 115 | glog.Fatal(err.Error()) 116 | } 117 | return location 118 | } 119 | 120 | func RunHour() int { 121 | return viper.GetInt(param.RunHour) 122 | } 123 | 124 | func StartHour() int { 125 | return viper.GetInt(param.StartHour) 126 | } 127 | 128 | func EndHour() int { 129 | return viper.GetInt(param.EndHour) 130 | } 131 | 132 | func GracePeriodSeconds() *int64 { 133 | gpInt64 := viper.GetInt64(param.GracePeriodSec) 134 | return &gpInt64 135 | } 136 | 137 | func BlacklistedNamespaces() sets.String { 138 | // Return as set for O(1) membership checks 139 | namespaces := viper.GetStringSlice(param.BlacklistedNamespaces) 140 | return sets.NewString(namespaces...) 141 | } 142 | 143 | func WhitelistedNamespaces() sets.String { 144 | // Return as set for O(1) membership checks 145 | namespaces := viper.GetStringSlice(param.WhitelistedNamespaces) 146 | return sets.NewString(namespaces...) 147 | } 148 | 149 | func BlacklistEnabled() bool { 150 | return !BlacklistedNamespaces().Equal(sets.NewString(metav1.NamespaceNone)) 151 | } 152 | 153 | func WhitelistEnabled() bool { 154 | return !WhitelistedNamespaces().Equal(sets.NewString(metav1.NamespaceAll)) 155 | } 156 | 157 | func ClusterAPIServerHost() (string, bool) { 158 | if viper.IsSet(param.ClusterAPIServerHost) { 159 | return viper.GetString(param.ClusterAPIServerHost), true 160 | } 161 | return "", false 162 | } 163 | 164 | func DebugEnabled() bool { 165 | return viper.GetBool(param.DebugEnabled) 166 | } 167 | 168 | func DebugScheduleDelay() time.Duration { 169 | delaySec := viper.GetInt(param.DebugScheduleDelay) 170 | return time.Duration(delaySec) * time.Second 171 | } 172 | 173 | func DebugForceShouldKill() bool { 174 | return viper.GetBool(param.DebugForceShouldKill) 175 | } 176 | 177 | func DebugScheduleImmediateKill() bool { 178 | return viper.GetBool(param.DebugScheduleImmediateKill) 179 | } 180 | 181 | func NotificationsEnabled() bool { 182 | return viper.GetBool(param.NotificationsEnabled) 183 | } 184 | 185 | func NotificationsProxy() string { 186 | return viper.GetString(param.NotificationsProxy) 187 | } 188 | 189 | func NotificationsReportSchedule() bool { 190 | return viper.GetBool(param.NotificationsReportSchedule) 191 | } 192 | 193 | func NotificationsAttacks() Receiver { 194 | var receiver Receiver 195 | err := viper.UnmarshalKey(param.NotificationsAttacks, &receiver) 196 | if err != nil { 197 | glog.Errorf("Failed to parse notifications.attacks %v", err) 198 | } 199 | return receiver 200 | } 201 | -------------------------------------------------------------------------------- /internal/pkg/config/config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "os" 5 | "strings" 6 | "testing" 7 | "time" 8 | 9 | "kube-monkey/internal/pkg/config/param" 10 | 11 | "github.com/spf13/viper" 12 | "github.com/stretchr/testify/suite" 13 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 | ) 15 | 16 | type ConfigTestSuite struct { 17 | suite.Suite 18 | } 19 | 20 | func (s *ConfigTestSuite) SetupTest() { 21 | viper.Reset() 22 | SetDefaults() 23 | } 24 | 25 | func (s *ConfigTestSuite) TestSetDefaults() { 26 | 27 | s.True(viper.GetBool(param.DryRun)) 28 | s.Equal("America/Los_Angeles", viper.GetString(param.Timezone)) 29 | s.Equal(8, viper.GetInt(param.RunHour)) 30 | s.Equal(10, viper.GetInt(param.StartHour)) 31 | s.Equal(16, viper.GetInt(param.EndHour)) 32 | s.Equal(int64(5), viper.GetInt64(param.GracePeriodSec)) 33 | s.Equal([]string{metav1.NamespaceSystem}, viper.GetStringSlice(param.BlacklistedNamespaces)) 34 | s.Equal([]string{metav1.NamespaceAll}, viper.GetStringSlice(param.WhitelistedNamespaces)) 35 | s.False(viper.GetBool(param.DebugEnabled)) 36 | s.Equal(viper.GetInt(param.DebugScheduleDelay), 30) 37 | s.False(viper.GetBool(param.DebugForceShouldKill)) 38 | s.False(viper.GetBool(param.DebugScheduleImmediateKill)) 39 | s.False(viper.GetBool(param.NotificationsEnabled)) 40 | s.Equal(Receiver{}, viper.Get(param.NotificationsAttacks)) 41 | } 42 | 43 | func (s *ConfigTestSuite) TestDryRun() { 44 | viper.Set(param.DryRun, false) 45 | s.False(DryRun()) 46 | viper.Set(param.DryRun, true) 47 | s.True(DryRun()) 48 | } 49 | 50 | func (s *ConfigTestSuite) TestTimezone() { 51 | viper.Set(param.Timezone, "UTC") 52 | s.Equal(Timezone().String(), "UTC") 53 | } 54 | 55 | func (s *ConfigTestSuite) TestStartHourEnv() { 56 | envname := "KUBEMONKEY_START_HOUR" 57 | defer os.Setenv(envname, os.Getenv(envname)) 58 | os.Setenv(envname, "11") 59 | s.Equal(11, StartHour()) 60 | } 61 | 62 | func (s *ConfigTestSuite) TestRunHour() { 63 | viper.Set(param.RunHour, 11) 64 | s.Equal(11, RunHour()) 65 | } 66 | 67 | func (s *ConfigTestSuite) TestStartHour() { 68 | viper.Set(param.StartHour, 10) 69 | s.Equal(10, StartHour()) 70 | } 71 | 72 | func (s *ConfigTestSuite) TestEndHour() { 73 | viper.Set(param.EndHour, 9) 74 | s.Equal(9, EndHour()) 75 | } 76 | 77 | func (s *ConfigTestSuite) TestGracePeriodSeconds() { 78 | g := int64(100) 79 | viper.Set(param.GracePeriodSec, 100) 80 | s.Equal(&g, GracePeriodSeconds()) 81 | } 82 | 83 | func (s *ConfigTestSuite) TestBlacklistedNamespacesEnv() { 84 | blns := []string{"namespace3", "namespace4"} 85 | envname := "KUBEMONKEY_BLACKLISTED_NAMESPACES" 86 | defer os.Setenv(envname, os.Getenv(envname)) 87 | os.Setenv(envname, strings.Join(blns, " ")) 88 | ns := BlacklistedNamespaces() 89 | s.Len(ns, len(blns)) 90 | for _, v := range blns { 91 | s.Contains(ns, v) 92 | } 93 | } 94 | 95 | func (s *ConfigTestSuite) TestBlacklistedNamespaces() { 96 | blns := []string{"namespace1", "namespace2"} 97 | viper.Set(param.BlacklistedNamespaces, blns) 98 | ns := BlacklistedNamespaces() 99 | s.Len(ns, len(blns)) 100 | for _, v := range blns { 101 | s.Contains(ns, v) 102 | } 103 | } 104 | 105 | func (s *ConfigTestSuite) TestWhitelistedNamespaces() { 106 | wlns := []string{"namespace1", "namespace2"} 107 | viper.Set(param.WhitelistedNamespaces, wlns) 108 | ns := WhitelistedNamespaces() 109 | s.Len(ns, len(wlns)) 110 | for _, v := range wlns { 111 | s.Contains(ns, v) 112 | } 113 | } 114 | 115 | func (s *ConfigTestSuite) TestBlacklistEnabled() { 116 | s.True(BlacklistEnabled()) 117 | viper.Set(param.BlacklistedNamespaces, []string{metav1.NamespaceNone}) 118 | s.False(BlacklistEnabled()) 119 | } 120 | 121 | func (s *ConfigTestSuite) TestWhitelistEnabled() { 122 | s.False(WhitelistEnabled()) 123 | viper.Set(param.WhitelistedNamespaces, []string{metav1.NamespaceDefault}) 124 | s.True(WhitelistEnabled()) 125 | } 126 | 127 | func (s *ConfigTestSuite) TestClusterrAPIServerHost() { 128 | host, enabled := ClusterAPIServerHost() 129 | s.False(enabled) 130 | s.Empty(host) 131 | viper.Set(param.ClusterAPIServerHost, "Host") 132 | host, enabled = ClusterAPIServerHost() 133 | s.True(enabled) 134 | s.Equal("Host", host) 135 | } 136 | 137 | func (s *ConfigTestSuite) TestDebugEnabled() { 138 | viper.Set(param.DebugEnabled, true) 139 | s.True(DebugEnabled()) 140 | } 141 | 142 | func (s *ConfigTestSuite) TestDebugScheduleDelay() { 143 | viper.Set(param.DebugScheduleDelay, 10) 144 | s.Equal(10*time.Second, DebugScheduleDelay()) 145 | } 146 | func (s *ConfigTestSuite) TestDebugForceShouldKill() { 147 | viper.Set(param.DebugForceShouldKill, true) 148 | s.True(DebugForceShouldKill()) 149 | } 150 | 151 | func (s *ConfigTestSuite) TestDebugImmediateKill() { 152 | viper.Set(param.DebugScheduleImmediateKill, true) 153 | s.True(DebugScheduleImmediateKill()) 154 | } 155 | 156 | func (s *ConfigTestSuite) TestNotificationsEnabled() { 157 | viper.Set(param.NotificationsEnabled, true) 158 | s.True(NotificationsEnabled()) 159 | } 160 | 161 | func (s *ConfigTestSuite) TestNotificationsProxy() { 162 | viper.Set(param.NotificationsProxy, "http://127.0.0.1:8080") 163 | proxy := NotificationsProxy() 164 | s.Equal("http://127.0.0.1:8080", proxy) 165 | } 166 | 167 | func (s *ConfigTestSuite) TestNotificationsAttacks() { 168 | headers := []string{"header1Key:header1Value", "header2Key:header2Value"} 169 | receiver := map[string]interface{}{"endpoint": "endpoint1", "message": "message1", "headers": headers} 170 | viper.Set(param.NotificationsAttacks, receiver) 171 | actual := NotificationsAttacks() 172 | 173 | s.Equal(receiver["endpoint"], actual.Endpoint) 174 | s.Equal(receiver["message"], actual.Message) 175 | s.Equal(receiver["headers"], actual.Headers) 176 | 177 | s.Equal(receiver["endpoint"], actual.Endpoint) 178 | s.Equal(receiver["message"], actual.Message) 179 | s.Equal(receiver["headers"], actual.Headers) 180 | } 181 | 182 | func TestSuite(t *testing.T) { 183 | suite.Run(t, new(ConfigTestSuite)) 184 | } 185 | -------------------------------------------------------------------------------- /internal/pkg/config/param/param.go: -------------------------------------------------------------------------------- 1 | package param 2 | 3 | const ( 4 | // DryRun logs but does not terminate pods 5 | // Type: bool 6 | // Default: true 7 | DryRun = "kubemonkey.dry_run" 8 | 9 | // Timezone specifies the timezone to use when 10 | // scheduling Pod terminations 11 | // Type: string 12 | // Default: America/Los_Angeles 13 | Timezone = "kubemonkey.time_zone" 14 | 15 | // RunHour specifies the hour of the weekday 16 | // when the scheduler should run to schedule terminations 17 | // Must be less than StartHour, and [0,23] 18 | // Type: int 19 | // Default: 8 20 | RunHour = "kubemonkey.run_hour" 21 | 22 | // StartHour specifies the hour beginning at 23 | // which pod terminations may occur 24 | // Should be set to a time when service owners are expected 25 | // to be available 26 | // Must be less than EndHour, and [0, 23] 27 | // Type: int 28 | // Default: 10 29 | StartHour = "kubemonkey.start_hour" 30 | 31 | // EndHour specifies the end hour beyond which no pod 32 | // terminations will occur 33 | // Should be set to a time when service owners are 34 | // expected to be available 35 | // Must be [0,23] 36 | // Type: int 37 | // Default: 16 38 | EndHour = "kubemonkey.end_hour" 39 | 40 | // GracePeriodSec specifies the amount of time in 41 | // seconds a pod is given to shut down gracefully, 42 | // before Kubernetes does a hard kill 43 | // Type: int 44 | // Default: 5 45 | GracePeriodSec = "kubemonkey.graceperiod_sec" 46 | 47 | // WhitelistedNamespaces specifies a list of 48 | // namespaces where terminations are valid 49 | // Default is defined by metav1.NamespaceDefault 50 | // To allow all namespaces use [""] 51 | // Type: list 52 | // Default: [ "default" ] 53 | WhitelistedNamespaces = "kubemonkey.whitelisted_namespaces" 54 | 55 | // BlacklistedNamespaces specifies a list of namespaces 56 | // for which terminations should never 57 | // be carried out. 58 | // Default is defined by metav1.NamespaceSystem 59 | // To block no namespaces use [""] 60 | // Type: list 61 | // Default: [ "kube-system" ] 62 | BlacklistedNamespaces = "kubemonkey.blacklisted_namespaces" 63 | 64 | // ClusterAPIServerHost specifies the host URL for Kubernetes 65 | // cluster APIServer. Use this config if the apiserver IP 66 | // address provided by in-cluster config 67 | // does not work for you because your certificate does not 68 | // contain the right SAN 69 | // Type: string 70 | // Default: No default. If not specified, URL provided 71 | // by in-cluster config is used 72 | ClusterAPIServerHost = "kubernetes.host" 73 | 74 | // DebugEnabled enables debug mode 75 | // Type: bool 76 | // Default: false 77 | DebugEnabled = "debug.enabled" 78 | 79 | // DebugScheduleDelay delays duration 80 | // in sec after kube-monkey is launched 81 | // after which scheduling is run 82 | // Use when debugging to run scheduling sooner 83 | // Type: int 84 | // Default: 30 85 | DebugScheduleDelay = "debug.schedule_delay" 86 | 87 | // DebugForceShouldKill guarantees terminations 88 | // to be scheduled for all eligible Deployments, 89 | // i.e., probability of kill = 1 90 | // Type: bool 91 | // Default: false 92 | DebugForceShouldKill = "debug.force_should_kill" 93 | 94 | // DebugScheduleImmediateKill schedules pod terminations 95 | // sometime in the next 60 sec to facilitate 96 | // debugging (instead of the hours specified by 97 | // StartHour and EndHour) 98 | // Type: bool 99 | // Default: false 100 | DebugScheduleImmediateKill = "debug.schedule_immediate_kill" 101 | 102 | // NotificationsEnabled enables reporting of attacks to an HTTP endpoint 103 | // Type: bool 104 | // Default: false 105 | NotificationsEnabled = "notifications.enabled" 106 | 107 | // Notifications Proxy enables send the request with proxy 108 | // Type: string 109 | // Default: nil 110 | NotificationsProxy = "notifications.proxy" 111 | 112 | // NotificationsReportSchedule enables reporting of attack schedule to an HTTP endpoint 113 | // Type: bool 114 | // Default: false 115 | NotificationsReportSchedule = "notifications.reportSchedule" 116 | 117 | // NotificationsAttacks reports attacks to an HTTP endpoint 118 | // Type: config.Receiver struct 119 | // Default: Receiver{} 120 | NotificationsAttacks = "notifications.attacks" 121 | ) 122 | -------------------------------------------------------------------------------- /internal/pkg/config/validations.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | 7 | "kube-monkey/internal/pkg/config/param" 8 | ) 9 | 10 | func ValidateConfigs() error { 11 | // RunHour should be [0, 23] 12 | runHour := RunHour() 13 | if !IsValidHour(runHour) { 14 | return fmt.Errorf("RunHour: %s is outside valid range of [0,23]", param.RunHour) 15 | } 16 | 17 | // StartHour should be [0, 23] 18 | startHour := StartHour() 19 | if !IsValidHour(startHour) { 20 | return fmt.Errorf("StartHour: %s is outside valid range of [0,23]", param.StartHour) 21 | } 22 | 23 | // EndHour should be [0, 23] 24 | endHour := EndHour() 25 | if !IsValidHour(endHour) { 26 | return fmt.Errorf("EndHour: %s is outside valid range of [0,23]", param.EndHour) 27 | } 28 | 29 | // StartHour should be < EndHour 30 | if !(startHour < endHour) { 31 | return fmt.Errorf("StartHour: %s must be less than %s", param.StartHour, param.EndHour) 32 | } 33 | 34 | // RunHour should be < StartHour 35 | if !(runHour < startHour) { 36 | return fmt.Errorf("RunHour: %s should be less than %s", param.RunHour, param.StartHour) 37 | } 38 | 39 | notificationsReceiver := NotificationsAttacks() 40 | 41 | // Notification headers should be in a valid format 42 | for _, header := range notificationsReceiver.Headers { 43 | if !isValidHeader(header) { 44 | return fmt.Errorf("Header: %s is not in valid format", header) 45 | } 46 | } 47 | 48 | return nil 49 | } 50 | 51 | func IsValidHour(hour int) bool { 52 | return hour >= 0 && hour < 24 53 | } 54 | 55 | func isValidHeader(header string) bool { 56 | re := regexp.MustCompile("^(.+:.+)$") 57 | 58 | return re.MatchString(header) 59 | } 60 | -------------------------------------------------------------------------------- /internal/pkg/config/validations_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "testing" 5 | 6 | "kube-monkey/internal/pkg/config/param" 7 | 8 | "github.com/spf13/viper" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestValidateConfigs(t *testing.T) { 13 | SetDefaults() 14 | 15 | assert.Nil(t, ValidateConfigs()) 16 | 17 | viper.Set(param.RunHour, 24) 18 | assert.EqualError(t, ValidateConfigs(), "RunHour: "+param.RunHour+" is outside valid range of [0,23]") 19 | viper.Set(param.RunHour, 23) 20 | 21 | viper.Set(param.StartHour, 24) 22 | assert.EqualError(t, ValidateConfigs(), "StartHour: "+param.StartHour+" is outside valid range of [0,23]") 23 | viper.Set(param.StartHour, 23) 24 | 25 | viper.Set(param.EndHour, 24) 26 | assert.EqualError(t, ValidateConfigs(), "EndHour: "+param.EndHour+" is outside valid range of [0,23]") 27 | viper.Set(param.EndHour, 23) 28 | 29 | viper.Set(param.StartHour, 23) 30 | assert.EqualError(t, ValidateConfigs(), "StartHour: "+param.StartHour+" must be less than "+param.EndHour) 31 | viper.Set(param.StartHour, 22) 32 | 33 | viper.Set(param.RunHour, 23) 34 | assert.EqualError(t, ValidateConfigs(), "RunHour: "+param.RunHour+" should be less than "+param.StartHour) 35 | 36 | } 37 | 38 | func TestIsValidHour(t *testing.T) { 39 | for i := 0; i <= 23; i++ { 40 | assert.True(t, IsValidHour(i)) 41 | } 42 | assert.False(t, IsValidHour(24)) 43 | } 44 | 45 | func TestIsValidHeader(t *testing.T) { 46 | header := "header1Key:header1Value" 47 | assert.True(t, isValidHeader(header)) 48 | 49 | header = "header1/Key:header1/Value" 50 | assert.True(t, isValidHeader(header)) 51 | 52 | header = "header1:{$env:VARIABLE_NAME}" 53 | assert.True(t, isValidHeader(header)) 54 | 55 | header = "header1Key" 56 | assert.False(t, isValidHeader(header)) 57 | 58 | header = "header1Key:" 59 | assert.False(t, isValidHeader(header)) 60 | } 61 | -------------------------------------------------------------------------------- /internal/pkg/kubemonkey/kubemonkey.go: -------------------------------------------------------------------------------- 1 | package kubemonkey 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/golang/glog" 8 | 9 | "kube-monkey/internal/pkg/calendar" 10 | "kube-monkey/internal/pkg/chaos" 11 | "kube-monkey/internal/pkg/config" 12 | "kube-monkey/internal/pkg/kubernetes" 13 | "kube-monkey/internal/pkg/notifications" 14 | "kube-monkey/internal/pkg/schedule" 15 | ) 16 | 17 | func durationToNextRun(runhour int, loc *time.Location) time.Duration { 18 | if config.DebugEnabled() { 19 | debugDelayDuration := config.DebugScheduleDelay() 20 | glog.V(1).Infof("Debug mode detected!") 21 | glog.V(1).Infof("Status Update: Generating next schedule in %.0f sec\n", debugDelayDuration.Seconds()) 22 | return debugDelayDuration 23 | } 24 | nextRun := calendar.NextRuntime(loc, runhour) 25 | glog.V(1).Infof("Status Update: Generating next schedule at %s\n", nextRun) 26 | return time.Until(nextRun) 27 | } 28 | 29 | func Run() error { 30 | // Verify kubernetes client can be created and works before 31 | // we enter execution loop 32 | if _, err := kubernetes.CreateClient(); err != nil { 33 | return err 34 | } 35 | 36 | var notificationsClient notifications.Client 37 | if config.NotificationsEnabled() { 38 | glog.V(1).Infof("Notifications enabled!") 39 | proxy := config.NotificationsProxy() 40 | if proxy != "" { 41 | glog.V(1).Infof("Notifications proxy set: %s!", proxy) 42 | } 43 | notificationsClient = notifications.CreateClient(&proxy) 44 | } 45 | 46 | for { 47 | // Calculate duration to sleep before next run 48 | sleepDuration := durationToNextRun(config.RunHour(), config.Timezone()) 49 | time.Sleep(sleepDuration) 50 | 51 | schedule, err := schedule.New() 52 | if err != nil { 53 | glog.Fatal(err.Error()) 54 | } 55 | schedule.Print() 56 | if config.NotificationsEnabled() && config.NotificationsReportSchedule() { 57 | notifications.ReportSchedule(notificationsClient, schedule) 58 | } 59 | fmt.Println(schedule) 60 | ScheduleTerminations(schedule.Entries(), notificationsClient) 61 | } 62 | } 63 | 64 | func ScheduleTerminations(entries []*chaos.Chaos, notificationsClient notifications.Client) { 65 | resultchan := make(chan *chaos.Result) 66 | defer close(resultchan) 67 | 68 | // Spin off all terminations 69 | for _, chaos := range entries { 70 | go chaos.Schedule(resultchan) 71 | } 72 | 73 | completedCount := 0 74 | var result *chaos.Result 75 | 76 | glog.V(3).Infof("Status Update: Waiting to run scheduled terminations.") 77 | 78 | // Gather results 79 | for completedCount < len(entries) { 80 | result = <-resultchan 81 | if result.Error() != nil { 82 | glog.Errorf("Failed to execute termination for %s %s. Error: %v", result.Victim().Kind(), result.Victim().Name(), result.Error().Error()) 83 | } else { 84 | glog.V(2).Infof("Termination successfully executed for %s %s\n", result.Victim().Kind(), result.Victim().Name()) 85 | } 86 | if config.NotificationsEnabled() { 87 | currentTime := time.Now() 88 | notifications.ReportAttack(notificationsClient, result, currentTime) 89 | } 90 | completedCount++ 91 | glog.V(4).Info("Status Update: ", len(entries)-completedCount, " scheduled terminations left.") 92 | } 93 | 94 | glog.V(3).Info("Status Update: All terminations done.") 95 | } 96 | -------------------------------------------------------------------------------- /internal/pkg/kubernetes/kubernetes.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package kubernetes is the km k8 package that sets up the configured k8 clientset used to communicate with the apiserver 3 | 4 | Use CreateClient to create and verify connectivity. 5 | It's recommended to create a new clientset after a period of inactivity 6 | */ 7 | package kubernetes 8 | 9 | import ( 10 | "fmt" 11 | 12 | "github.com/golang/glog" 13 | 14 | cfg "kube-monkey/internal/pkg/config" 15 | 16 | "k8s.io/client-go/discovery" 17 | kube "k8s.io/client-go/kubernetes" 18 | "k8s.io/client-go/rest" 19 | ) 20 | 21 | // CreateClient creates, verifies and returns an instance of k8 clientset 22 | func CreateClient() (*kube.Clientset, error) { 23 | client, err := NewInClusterClient() 24 | if err != nil { 25 | return nil, fmt.Errorf("Failed to generate NewInClusterClient: %v", err) 26 | } 27 | 28 | if VerifyClient(client) { 29 | return client, nil 30 | } 31 | return nil, fmt.Errorf("Unable to verify client connectivity to Kubernetes apiserver") 32 | } 33 | 34 | // NewInClusterClient only creates an initialized instance of k8 clientset 35 | func NewInClusterClient() (*kube.Clientset, error) { 36 | config, err := rest.InClusterConfig() 37 | if err != nil { 38 | glog.Errorf("failed to obtain config from InClusterConfig: %v", err) 39 | return nil, err 40 | } 41 | 42 | if apiserverHost, override := cfg.ClusterAPIServerHost(); override { 43 | glog.V(5).Infof("API server host overridden to: %s\n", apiserverHost) 44 | config.Host = apiserverHost 45 | } 46 | 47 | clientset, err := kube.NewForConfig(config) 48 | if err != nil { 49 | glog.Errorf("failed to create clientset in NewForConfig: %v", err) 50 | return nil, err 51 | } 52 | return clientset, nil 53 | } 54 | 55 | func VerifyClient(client discovery.DiscoveryInterface) bool { 56 | _, err := client.ServerVersion() 57 | return err == nil 58 | } 59 | -------------------------------------------------------------------------------- /internal/pkg/notifications/client.go: -------------------------------------------------------------------------------- 1 | package notifications 2 | 3 | import ( 4 | "bytes" 5 | "crypto/tls" 6 | "fmt" 7 | "io" 8 | "io/ioutil" 9 | "net/http" 10 | "net/url" 11 | "strings" 12 | "time" 13 | ) 14 | 15 | type Client struct { 16 | httpClient *http.Client 17 | } 18 | 19 | // CreateClient creates a new client with a default timeout 20 | func CreateClient(proxy *string) Client { 21 | client := &http.Client{ 22 | Timeout: 10 * time.Second, 23 | } 24 | 25 | transport := http.Transport{ 26 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 27 | } 28 | if proxy != nil && *proxy != "" { 29 | proxyUrl, _ := url.Parse(*proxy) 30 | transport.Proxy = http.ProxyURL(proxyUrl) 31 | } 32 | client.Transport = &transport 33 | return Client{httpClient: client} 34 | } 35 | 36 | // Request sends an http request and returns error also if response code is NOT 2XX 37 | func (c Client) Request(endpoint string, requestBody string, headers map[string]string) error { 38 | body := bytes.NewBufferString(requestBody) 39 | 40 | req, err := http.NewRequest("POST", endpoint, body) 41 | if err != nil { 42 | return fmt.Errorf("new http request: %s %s: %v", "POST", endpoint, err) 43 | } 44 | 45 | for k, v := range headers { 46 | req.Header.Add(k, v) 47 | } 48 | 49 | resp, err := c.httpClient.Do(req) 50 | if err != nil { 51 | return fmt.Errorf("http request: %v", err) 52 | } 53 | defer resp.Body.Close() 54 | 55 | if resp.StatusCode/100 != 2 { 56 | b, _ := ioutil.ReadAll(resp.Body) // try to read response body as well to give user more info why request failed 57 | return fmt.Errorf("%s %s returned %d %s, expected 2xx", 58 | "POST", endpoint, resp.StatusCode, strings.TrimSuffix(string(b), "\n")) 59 | } 60 | 61 | if _, err = io.Copy(ioutil.Discard, resp.Body); err != nil { 62 | return fmt.Errorf("read response body: %s %s: %v", "POST", endpoint, err) 63 | } 64 | return nil 65 | } 66 | -------------------------------------------------------------------------------- /internal/pkg/notifications/client_test.go: -------------------------------------------------------------------------------- 1 | package notifications 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "net/http/httptest" 7 | "testing" 8 | ) 9 | 10 | func TestRequestSuccess(t *testing.T) { 11 | path := "/path" 12 | server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { 13 | if want, have := path, r.URL.Path; want != have { 14 | t.Errorf("unexpected endpoint, want: %q, have %q", want, have) 15 | } 16 | })) 17 | defer server.Close() 18 | 19 | c := CreateClient(nil) 20 | body := "message" 21 | err := c.Request(server.URL+path, body, map[string]string{}) 22 | if err != nil { 23 | t.Errorf("unexpected error: %v", err) 24 | } 25 | } 26 | 27 | func TestRequestFails(t *testing.T) { 28 | server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { 29 | rw.WriteHeader(403) 30 | _, _ = rw.Write([]byte("Unauthorized")) 31 | })) 32 | defer server.Close() 33 | 34 | c := CreateClient(nil) 35 | body := "" 36 | err := c.Request(server.URL, body, map[string]string{}) 37 | expectedErr := fmt.Sprintf("POST %s returned 403 Unauthorized, expected 2xx", server.URL) 38 | if want, have := expectedErr, err.Error(); want != have { 39 | t.Errorf("unexpected error, want %q, have %q", want, have) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /internal/pkg/notifications/notifications.go: -------------------------------------------------------------------------------- 1 | package notifications 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | 8 | "kube-monkey/internal/pkg/chaos" 9 | "kube-monkey/internal/pkg/config" 10 | "kube-monkey/internal/pkg/schedule" 11 | 12 | "github.com/golang/glog" 13 | ) 14 | 15 | func Send(client Client, endpoint string, msg string, headers map[string]string) error { 16 | if err := client.Request(endpoint, msg, headers); err != nil { 17 | return fmt.Errorf("send request: %v", err) 18 | } 19 | return nil 20 | } 21 | 22 | func ReportSchedule(client Client, schedule *schedule.Schedule) bool { 23 | success := true 24 | receiver := config.NotificationsAttacks() 25 | 26 | msg := fmt.Sprintf("{\"text\": \"\n%s\n\"}", schedule) 27 | 28 | glog.V(1).Infof("reporting next schedule") 29 | if err := Send(client, receiver.Endpoint, msg, toHeaders(receiver.Headers)); err != nil { 30 | glog.Errorf("error reporting next schedule") 31 | success = false 32 | } 33 | 34 | return success 35 | } 36 | 37 | func ReportAttack(client Client, result *chaos.Result, time time.Time) bool { 38 | success := true 39 | 40 | receiver := config.NotificationsAttacks() 41 | errorString := "" 42 | if result.Error() != nil { 43 | errorString = result.Error().Error() 44 | } 45 | msg := ReplacePlaceholders(receiver.Message, result.Victim().Name(), result.Victim().Kind(), result.Victim().Namespace(), errorString, time, os.Getenv("KUBE_MONKEY_ID")) 46 | glog.V(1).Infof("reporting attack for %s %s to %s with message %s\n", result.Victim().Kind(), result.Victim().Name(), receiver.Endpoint, msg) 47 | if err := Send(client, receiver.Endpoint, msg, toHeaders(receiver.Headers)); err != nil { 48 | glog.Errorf("error reporting attack for %s %s to %s with message %s, error: %v\n", result.Victim().Kind(), result.Victim().Name(), receiver.Endpoint, msg, err) 49 | success = false 50 | } 51 | 52 | return success 53 | } 54 | -------------------------------------------------------------------------------- /internal/pkg/notifications/util.go: -------------------------------------------------------------------------------- 1 | package notifications 2 | 3 | import ( 4 | "os" 5 | "regexp" 6 | "strconv" 7 | "strings" 8 | "time" 9 | 10 | "github.com/golang/glog" 11 | ) 12 | 13 | const ( 14 | // header 15 | EnvVariableRegex = "^{\\$env:\\w+\\}$" 16 | 17 | // body (message) 18 | Name = "{$name}" 19 | Kind = "{$kind}" 20 | Namespace = "{$namespace}" 21 | Timestamp = "{$timestamp}" 22 | Time = "{$time}" 23 | Date = "{$date}" 24 | Error = "{$error}" 25 | KubeMonkeyID = "{$kubemonkeyid}" 26 | ) 27 | 28 | func toHeaders(headersArray []string) map[string]string { 29 | headersMap := make(map[string]string) 30 | 31 | for _, h := range headersArray { 32 | kv := strings.SplitN(h, ":", 2) 33 | if len(kv) == 1 { 34 | glog.Errorf("Cannot find ':' separator in supplied header %s", h) 35 | headersMap[strings.TrimSpace(kv[0])] = "" 36 | continue 37 | } 38 | headersMap[strings.TrimSpace(kv[0])] = replaceEnvVariablePlaceholder(strings.TrimSpace(kv[1])) 39 | } 40 | return headersMap 41 | } 42 | 43 | func replaceEnvVariablePlaceholder(value string) string { 44 | envVariableRegex := regexp.MustCompile(EnvVariableRegex) 45 | if envVariableRegex.MatchString(value) { 46 | prefix, _ := envVariableRegex.LiteralPrefix() 47 | envVariableName := value[len(prefix) : len(value)-1] 48 | envVariableValue := os.Getenv(envVariableName) 49 | if len(envVariableValue) == 0 { 50 | glog.Errorf("Cannot find environment variable %s", envVariableName) 51 | } 52 | value = envVariableRegex.ReplaceAllString(value, envVariableValue) 53 | } 54 | return value 55 | } 56 | 57 | func ReplacePlaceholders(msg string, name string, kind string, namespace string, err string, attackTime time.Time, kubeMonkeyID string) string { 58 | msg = strings.Replace(msg, Name, name, -1) 59 | msg = strings.Replace(msg, Kind, kind, -1) 60 | msg = strings.Replace(msg, Namespace, namespace, -1) 61 | msg = strings.Replace(msg, Timestamp, timeToEpoch(attackTime), -1) 62 | msg = strings.Replace(msg, Time, timeToTime(attackTime), -1) 63 | msg = strings.Replace(msg, Date, timeToDate(attackTime), -1) 64 | msg = strings.Replace(msg, Error, err, -1) 65 | msg = strings.Replace(msg, KubeMonkeyID, kubeMonkeyID, -1) 66 | 67 | return msg 68 | } 69 | 70 | func timeToEpoch(time time.Time) string { 71 | epoch := time.UnixNano() / 1000000 72 | 73 | return strconv.FormatInt(epoch, 10) 74 | } 75 | 76 | func timeToDate(time time.Time) string { 77 | return time.Format("2006-01-02") 78 | } 79 | 80 | func timeToTime(time time.Time) string { 81 | return time.Format("15:04:05 MST") 82 | } 83 | -------------------------------------------------------------------------------- /internal/pkg/notifications/util_test.go: -------------------------------------------------------------------------------- 1 | package notifications 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func Test_ToHeadersSingle(t *testing.T) { 12 | headersArray := []string{"Content-Type:application/json"} 13 | 14 | headers := toHeaders(headersArray) 15 | 16 | assert.Equal(t, 1, len(headers)) 17 | assert.Equal(t, "application/json", headers["Content-Type"]) 18 | } 19 | 20 | func Test_ToHeadersMultiple(t *testing.T) { 21 | headersArray := []string{"Content-Type:application/json", "Host:localhost"} 22 | 23 | headers := toHeaders(headersArray) 24 | 25 | assert.Equal(t, 2, len(headers)) 26 | assert.Equal(t, "application/json", headers["Content-Type"]) 27 | assert.Equal(t, "localhost", headers["Host"]) 28 | } 29 | 30 | func Test_ToHeadersEnvVariablePlaceholder(t *testing.T) { 31 | headersArray := []string{"Content-Type:application/json", "api-key:{$env:API_KEY}"} 32 | os.Setenv("API_KEY", "123456") 33 | 34 | headers := toHeaders(headersArray) 35 | 36 | assert.Equal(t, 2, len(headers)) 37 | assert.Equal(t, "application/json", headers["Content-Type"]) 38 | assert.Equal(t, "123456", headers["api-key"]) 39 | } 40 | 41 | func Test_ToHeadersEnvVariablePlaceholderNotExisting(t *testing.T) { 42 | headersArray := []string{"Content-Type:application/json", "api-key:{$env:VARIABLE_NOT_SET}"} 43 | 44 | headers := toHeaders(headersArray) 45 | 46 | assert.Equal(t, 2, len(headers)) 47 | assert.Equal(t, "application/json", headers["Content-Type"]) 48 | assert.Equal(t, "", headers["api-key"]) 49 | } 50 | 51 | func Test_NamePlaceholder(t *testing.T) { 52 | msg := `{"name":"{$name}"}` 53 | currentTime := time.Now() 54 | actual := ReplacePlaceholders(msg, "testName", "", "", "", currentTime, "CLUSTER_A") 55 | assert.Equal(t, `{"name":"testName"}`, actual) 56 | } 57 | 58 | func Test_KindPlaceholder(t *testing.T) { 59 | msg := `{"kind":"{$kind}"}` 60 | currentTime := time.Now() 61 | actual := ReplacePlaceholders(msg, "", "testKind", "", "", currentTime, "CLUSTER_A") 62 | assert.Equal(t, `{"kind":"testKind"}`, actual) 63 | } 64 | 65 | func Test_NamespacePlaceholder(t *testing.T) { 66 | msg := `{"namespace":"{$namespace}"}` 67 | currentTime := time.Now() 68 | actual := ReplacePlaceholders(msg, "", "", "testNamespace", "", currentTime, "CLUSTER_A") 69 | assert.Equal(t, `{"namespace":"testNamespace"}`, actual) 70 | } 71 | 72 | func Test_ErrorPlaceholder(t *testing.T) { 73 | msg := `{"error":"{$error}"}` 74 | currentTime := time.Now() 75 | actual := ReplacePlaceholders(msg, "", "", "", "testError", currentTime, "CLUSTER_A") 76 | assert.Equal(t, `{"error":"testError"}`, actual) 77 | } 78 | 79 | func Test_IDPlaceholder(t *testing.T) { 80 | msg := `{"kubemonkeyid":"{$kubemonkeyid}"}` 81 | currentTime := time.Now() 82 | actual := ReplacePlaceholders(msg, "", "", "", "testError", currentTime, "CLUSTER_A") 83 | assert.Equal(t, `{"kubemonkeyid":"CLUSTER_A"}`, actual) 84 | } 85 | 86 | func Test_TimestampPlaceholder(t *testing.T) { 87 | msg := `{"timestamp":"{$timestamp}"}` 88 | currentTime := time.Now() 89 | actual := ReplacePlaceholders(msg, "", "", "", "", currentTime, "CLUSTER_A") 90 | assert.Equal(t, `{"timestamp":"`+timeToEpoch(currentTime)+`"}`, actual) 91 | } 92 | 93 | func Test_TimePlaceholder(t *testing.T) { 94 | msg := `{"time":"{$time}"}` 95 | currentTime := time.Now() 96 | actual := ReplacePlaceholders(msg, "", "", "", "", currentTime, "CLUSTER_A") 97 | assert.Equal(t, `{"time":"`+timeToTime(currentTime)+`"}`, actual) 98 | } 99 | 100 | func Test_DatePlaceholder(t *testing.T) { 101 | msg := `{"date":"{$date}"}` 102 | currentTime := time.Now() 103 | actual := ReplacePlaceholders(msg, "", "", "", "", currentTime, "CLUSTER_A") 104 | assert.Equal(t, `{"date":"`+timeToDate(currentTime)+`"}`, actual) 105 | } 106 | 107 | func Test_MultiplePlaceholders(t *testing.T) { 108 | msg := `{"date1":"{$date}","date2":"{$date}","name":"{$name}"}` 109 | currentTime := time.Now() 110 | actual := ReplacePlaceholders(msg, "testName", "", "", "", currentTime, "CLUSTER_A") 111 | assert.Equal(t, `{"date1":"`+timeToDate(currentTime)+`","date2":"`+timeToDate(currentTime)+`","name":"testName"}`, actual) 112 | } 113 | -------------------------------------------------------------------------------- /internal/pkg/schedule/schedule.go: -------------------------------------------------------------------------------- 1 | package schedule 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "os" 7 | "strings" 8 | "time" 9 | 10 | "github.com/golang/glog" 11 | 12 | "kube-monkey/internal/pkg/calendar" 13 | "kube-monkey/internal/pkg/chaos" 14 | "kube-monkey/internal/pkg/config" 15 | "kube-monkey/internal/pkg/victims/factory" 16 | ) 17 | 18 | const ( 19 | Today = "\t********** Today's schedule **********" 20 | KubeMonkeyID = "\tKubeMonkey ID: %s" 21 | NoTermination = "\tNo terminations scheduled" 22 | HeaderRow = "\tk8 Api Kind\tKind Namespace\tKind Name\t\tTermination Time" 23 | SepRow = "\t-----------\t--------------\t---------\t\t----------------" 24 | RowFormat = "\t%s\t%s\t%s\t\t%s" 25 | DateFormat = "01/02/2006 15:04:05 -0700 MST" 26 | End = "\t********** End of schedule **********" 27 | ) 28 | 29 | type Schedule struct { 30 | entries []*chaos.Chaos 31 | } 32 | 33 | func (s *Schedule) Entries() []*chaos.Chaos { 34 | return s.entries 35 | } 36 | 37 | func (s *Schedule) Add(entry *chaos.Chaos) { 38 | s.entries = append(s.entries, entry) 39 | } 40 | 41 | func (s *Schedule) String() string { 42 | schedString := []string{} 43 | 44 | schedString = append(schedString, fmt.Sprint(Today)) 45 | 46 | kubeMonkeyID := os.Getenv("KUBE_MONKEY_ID") 47 | if kubeMonkeyID != "" { 48 | schedString = append(schedString, fmt.Sprintf(KubeMonkeyID, kubeMonkeyID)) 49 | } 50 | 51 | if len(s.entries) == 0 { 52 | schedString = append(schedString, fmt.Sprint(NoTermination)) 53 | } else { 54 | schedString = append(schedString, fmt.Sprint(HeaderRow)) 55 | schedString = append(schedString, fmt.Sprint(SepRow)) 56 | for _, chaos := range s.entries { 57 | schedString = append(schedString, fmt.Sprintf(RowFormat, chaos.Victim().Kind(), chaos.Victim().Namespace(), chaos.Victim().Name(), chaos.KillAt().Format(DateFormat))) 58 | } 59 | } 60 | schedString = append(schedString, fmt.Sprint(End)) 61 | 62 | return strings.Join(schedString, "\n") 63 | } 64 | 65 | func (s Schedule) Print() { 66 | glog.V(4).Infof("Status Update: %v terminations scheduled today", len(s.entries)) 67 | for _, chaos := range s.entries { 68 | glog.V(4).Infof("%s %s scheduled for termination at %s", chaos.Victim().Kind(), chaos.Victim().Name(), chaos.KillAt().Format(DateFormat)) 69 | } 70 | } 71 | 72 | func New() (*Schedule, error) { 73 | glog.V(3).Info("Status Update: Generating schedule for terminations") 74 | victims, err := factory.EligibleVictims() 75 | if err != nil { 76 | return nil, err 77 | } 78 | 79 | schedule := &Schedule{ 80 | entries: []*chaos.Chaos{}, 81 | } 82 | 83 | for _, victim := range victims { 84 | killtime := CalculateKillTime() 85 | 86 | if ShouldScheduleChaos(victim.Mtbf()) { 87 | schedule.Add(chaos.New(killtime, victim)) 88 | } 89 | } 90 | 91 | return schedule, nil 92 | } 93 | 94 | func CalculateKillTime() time.Time { 95 | loc := config.Timezone() 96 | if config.DebugEnabled() && config.DebugScheduleImmediateKill() { 97 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 98 | // calculate a second-offset in the next minute 99 | secOffset := r.Intn(60) 100 | return time.Now().In(loc).Add(time.Duration(secOffset) * time.Second) 101 | } 102 | return calendar.RandomTimeInRange(config.StartHour(), config.EndHour(), loc) 103 | } 104 | 105 | func ShouldScheduleChaos(mtbf int) bool { 106 | if config.DebugEnabled() && config.DebugForceShouldKill() { 107 | return true 108 | } 109 | 110 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 111 | probability := 1 / float64(mtbf) 112 | return probability > r.Float64() 113 | } 114 | -------------------------------------------------------------------------------- /internal/pkg/schedule/schedule_test.go: -------------------------------------------------------------------------------- 1 | package schedule 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | "testing" 8 | "time" 9 | 10 | "kube-monkey/internal/pkg/chaos" 11 | "kube-monkey/internal/pkg/config/param" 12 | 13 | "github.com/spf13/viper" 14 | "github.com/stretchr/testify/assert" 15 | 16 | "kube-monkey/internal/pkg/config" 17 | ) 18 | 19 | func newSchedule() *Schedule { 20 | return &Schedule{} 21 | } 22 | 23 | func TestEntries(t *testing.T) { 24 | s := newSchedule() 25 | assert.Equal(t, s.Entries(), s.entries) 26 | assert.Len(t, s.Entries(), 0) 27 | } 28 | 29 | func TestAdd(t *testing.T) { 30 | e := chaos.NewMock() 31 | s := newSchedule() 32 | 33 | s.Add(e) 34 | assert.Len(t, s.entries, 1) 35 | 36 | } 37 | 38 | func TestStringNoEntries(t *testing.T) { 39 | s := newSchedule() 40 | 41 | schedString := []string{} 42 | schedString = append(schedString, fmt.Sprint(Today)) 43 | 44 | schedString = append(schedString, fmt.Sprint(NoTermination)) 45 | schedString = append(schedString, fmt.Sprint(End)) 46 | 47 | assert.Equal(t, strings.Join(schedString, "\n"), s.String()) 48 | } 49 | 50 | func TestStringNoEntriesWithID(t *testing.T) { 51 | 52 | id := "TestingID" 53 | os.Setenv("KUBE_MONKEY_ID", id) 54 | 55 | s := newSchedule() 56 | 57 | schedString := []string{} 58 | schedString = append(schedString, fmt.Sprint(Today)) 59 | schedString = append(schedString, fmt.Sprintf(KubeMonkeyID, id)) 60 | 61 | schedString = append(schedString, fmt.Sprint(NoTermination)) 62 | schedString = append(schedString, fmt.Sprint(End)) 63 | 64 | assert.Equal(t, strings.Join(schedString, "\n"), s.String()) 65 | 66 | os.Unsetenv("KUBE_MONKEY_ID") 67 | } 68 | 69 | func TestStringWithEntries(t *testing.T) { 70 | s := newSchedule() 71 | e1 := chaos.NewMock() 72 | e2 := chaos.NewMock() 73 | s.Add(e1) 74 | s.Add(e2) 75 | 76 | schedString := []string{} 77 | schedString = append(schedString, fmt.Sprint(Today)) 78 | schedString = append(schedString, fmt.Sprint(HeaderRow)) 79 | schedString = append(schedString, fmt.Sprint(SepRow)) 80 | for _, chaos := range s.entries { 81 | schedString = append(schedString, fmt.Sprintf(RowFormat, chaos.Victim().Kind(), chaos.Victim().Namespace(), chaos.Victim().Name(), chaos.KillAt().Format(DateFormat))) 82 | } 83 | schedString = append(schedString, fmt.Sprint(End)) 84 | 85 | assert.Equal(t, strings.Join(schedString, "\n"), s.String()) 86 | } 87 | 88 | func TestCalculateKillTimeRandom(t *testing.T) { 89 | config.SetDefaults() 90 | killtime := CalculateKillTime() 91 | 92 | scheduledTime := func() (success bool) { 93 | if killtime.Hour() >= config.StartHour() && killtime.Hour() <= config.EndHour() { 94 | success = true 95 | } 96 | return 97 | } 98 | 99 | assert.Equal(t, killtime.Location(), config.Timezone()) 100 | assert.Condition(t, scheduledTime) 101 | 102 | } 103 | 104 | func TestCalculateKillTimeNow(t *testing.T) { 105 | config.SetDefaults() 106 | viper.SetDefault(param.DebugEnabled, true) 107 | viper.SetDefault(param.DebugScheduleImmediateKill, true) 108 | killtime := CalculateKillTime() 109 | 110 | assert.Equal(t, killtime.Location(), config.Timezone()) 111 | assert.WithinDuration(t, killtime, time.Now(), time.Second*time.Duration(60)) 112 | config.SetDefaults() 113 | } 114 | 115 | func TestShouldScheduleChaosNow(t *testing.T) { 116 | config.SetDefaults() 117 | viper.SetDefault(param.DebugEnabled, true) 118 | viper.SetDefault(param.DebugForceShouldKill, true) 119 | assert.True(t, ShouldScheduleChaos(100000000000)) 120 | config.SetDefaults() 121 | } 122 | 123 | func TestShouldScheduleChaosMtbf(t *testing.T) { 124 | assert.False(t, ShouldScheduleChaos(100000000000)) 125 | assert.True(t, ShouldScheduleChaos(1)) 126 | } 127 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/daemonsets/daemonsets.go: -------------------------------------------------------------------------------- 1 | package daemonsets 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | 7 | "kube-monkey/internal/pkg/config" 8 | "kube-monkey/internal/pkg/victims" 9 | 10 | appsv1 "k8s.io/api/apps/v1" 11 | ) 12 | 13 | type DaemonSet struct { 14 | *victims.VictimBase 15 | } 16 | 17 | // New creates a new instance of DaemonSet 18 | func New(dep *appsv1.DaemonSet) (*DaemonSet, error) { 19 | ident, err := identifier(dep) 20 | if err != nil { 21 | return nil, err 22 | } 23 | mtbf, err := meanTimeBetweenFailures(dep) 24 | if err != nil { 25 | return nil, err 26 | } 27 | kind := fmt.Sprintf("%T", *dep) 28 | 29 | return &DaemonSet{VictimBase: victims.New(kind, dep.Name, dep.Namespace, ident, mtbf)}, nil 30 | } 31 | 32 | // Returns the value of the label defined by config.IdentLabelKey 33 | // from the DaemonSet labels 34 | // This label should be unique to a DaemonSet, and is used to 35 | // identify the pods that belong to this DaemonSet, as pods 36 | // inherit labels from the DaemonSet 37 | func identifier(kubekind *appsv1.DaemonSet) (string, error) { 38 | identifier, ok := kubekind.Labels[config.IdentLabelKey] 39 | if !ok { 40 | return "", fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.IdentLabelKey) 41 | } 42 | return identifier, nil 43 | } 44 | 45 | // Read the mean-time-between-failures value defined by the DaemonSet 46 | // in the label defined by config.MtbfLabelKey 47 | func meanTimeBetweenFailures(kubekind *appsv1.DaemonSet) (int, error) { 48 | mtbf, ok := kubekind.Labels[config.MtbfLabelKey] 49 | if !ok { 50 | return -1, fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.MtbfLabelKey) 51 | } 52 | 53 | mtbfInt, err := strconv.Atoi(mtbf) 54 | if err != nil { 55 | return -1, err 56 | } 57 | 58 | if !(mtbfInt > 0) { 59 | return -1, fmt.Errorf("Invalid value for label %s: %d", config.MtbfLabelKey, mtbfInt) 60 | } 61 | 62 | return mtbfInt, nil 63 | } 64 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/daemonsets/daemonsets_test.go: -------------------------------------------------------------------------------- 1 | package daemonsets 2 | 3 | import ( 4 | "testing" 5 | 6 | "kube-monkey/internal/pkg/config" 7 | 8 | "github.com/stretchr/testify/assert" 9 | appsv1 "k8s.io/api/apps/v1" 10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 | ) 12 | 13 | const ( 14 | IDENTIFIER = "kube-monkey-id" 15 | NAME = "daemonset_name" 16 | NAMESPACE = metav1.NamespaceDefault 17 | ) 18 | 19 | func newDaemonSet(name string, labels map[string]string) appsv1.DaemonSet { 20 | 21 | return appsv1.DaemonSet{ 22 | ObjectMeta: metav1.ObjectMeta{ 23 | Name: name, 24 | Namespace: NAMESPACE, 25 | Labels: labels, 26 | }, 27 | } 28 | } 29 | 30 | func TestNew(t *testing.T) { 31 | 32 | v1ds := newDaemonSet( 33 | NAME, 34 | map[string]string{ 35 | config.IdentLabelKey: IDENTIFIER, 36 | config.MtbfLabelKey: "1", 37 | }, 38 | ) 39 | ds, err := New(&v1ds) 40 | 41 | assert.NoError(t, err) 42 | assert.Equal(t, "v1.DaemonSet", ds.Kind()) 43 | assert.Equal(t, NAME, ds.Name()) 44 | assert.Equal(t, NAMESPACE, ds.Namespace()) 45 | assert.Equal(t, IDENTIFIER, ds.Identifier()) 46 | assert.Equal(t, 1, ds.Mtbf()) 47 | } 48 | 49 | func TestInvalidIdentifier(t *testing.T) { 50 | v1ds := newDaemonSet( 51 | NAME, 52 | map[string]string{ 53 | config.MtbfLabelKey: "1", 54 | }, 55 | ) 56 | _, err := New(&v1ds) 57 | 58 | assert.Errorf(t, err, "Expected an error if "+config.IdentLabelKey+" label doesn't exist") 59 | } 60 | 61 | func TestInvalidMtbf(t *testing.T) { 62 | v1ds := newDaemonSet( 63 | NAME, 64 | map[string]string{ 65 | config.IdentLabelKey: IDENTIFIER, 66 | }, 67 | ) 68 | _, err := New(&v1ds) 69 | 70 | assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label doesn't exist") 71 | 72 | v1ds = newDaemonSet( 73 | NAME, 74 | map[string]string{ 75 | config.IdentLabelKey: IDENTIFIER, 76 | config.MtbfLabelKey: "string", 77 | }, 78 | ) 79 | _, err = New(&v1ds) 80 | 81 | assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label can't be converted a Int type") 82 | 83 | v1ds = newDaemonSet( 84 | NAME, 85 | map[string]string{ 86 | config.IdentLabelKey: IDENTIFIER, 87 | config.MtbfLabelKey: "0", 88 | }, 89 | ) 90 | _, err = New(&v1ds) 91 | 92 | assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label is lower than 1") 93 | } 94 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/daemonsets/eligible_daemonsets.go: -------------------------------------------------------------------------------- 1 | package daemonsets 2 | 3 | //All these functions require api access specific to the version of the app 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | "strconv" 9 | 10 | "github.com/golang/glog" 11 | 12 | "kube-monkey/internal/pkg/config" 13 | "kube-monkey/internal/pkg/victims" 14 | 15 | kube "k8s.io/client-go/kubernetes" 16 | 17 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 18 | ) 19 | 20 | // EligibleDaemonSets gets all eligible daemonsets that opted in (filtered by config.EnabledLabel) 21 | func EligibleDaemonSets(clientset kube.Interface, namespace string, filter *metav1.ListOptions) (eligVictims []victims.Victim, err error) { 22 | enabledVictims, err := clientset.AppsV1().DaemonSets(namespace).List(context.TODO(), *filter) 23 | if err != nil { 24 | return nil, err 25 | } 26 | 27 | for _, vic := range enabledVictims.Items { 28 | victim, err := New(&vic) 29 | if err != nil { 30 | glog.Warningf("Skipping eligible %T %s because of error: %s", vic, vic.Name, err.Error()) 31 | continue 32 | } 33 | 34 | // TODO: After generating whitelisting ns list, this will move to factory. 35 | // IsBlacklisted will change to something like IsAllowedNamespace 36 | // and will only be used to verify at time of scheduled execution 37 | if victim.IsBlacklisted() { 38 | continue 39 | } 40 | 41 | eligVictims = append(eligVictims, victim) 42 | } 43 | 44 | return 45 | } 46 | 47 | /* Below methods are used to verify the victim's attributes have not changed at the scheduled time of termination */ 48 | 49 | // IsEnrolled checks if the daemonset is currently enrolled in kube-monkey 50 | func (d *DaemonSet) IsEnrolled(clientset kube.Interface) (bool, error) { 51 | daemonset, err := clientset.AppsV1().DaemonSets(d.Namespace()).Get(context.TODO(), d.Name(), metav1.GetOptions{}) 52 | if err != nil { 53 | return false, err 54 | } 55 | return daemonset.Labels[config.EnabledLabelKey] == config.EnabledLabelValue, nil 56 | } 57 | 58 | // KillType returns current killtype config label for update 59 | func (d *DaemonSet) KillType(clientset kube.Interface) (string, error) { 60 | daemonset, err := clientset.AppsV1().DaemonSets(d.Namespace()).Get(context.TODO(), d.Name(), metav1.GetOptions{}) 61 | if err != nil { 62 | return "", err 63 | } 64 | 65 | killType, ok := daemonset.Labels[config.KillTypeLabelKey] 66 | if !ok { 67 | return "", fmt.Errorf("%s %s does not have %s label", d.Kind(), d.Name(), config.KillTypeLabelKey) 68 | } 69 | 70 | return killType, nil 71 | } 72 | 73 | // KillValue returns current killvalue config label for update 74 | func (d *DaemonSet) KillValue(clientset kube.Interface) (int, error) { 75 | daemonset, err := clientset.AppsV1().DaemonSets(d.Namespace()).Get(context.TODO(), d.Name(), metav1.GetOptions{}) 76 | if err != nil { 77 | return -1, err 78 | } 79 | 80 | killMode, ok := daemonset.Labels[config.KillValueLabelKey] 81 | if !ok { 82 | return -1, fmt.Errorf("%s %s does not have %s label", d.Kind(), d.Name(), config.KillValueLabelKey) 83 | } 84 | 85 | killModeInt, err := strconv.Atoi(killMode) 86 | if err != nil { 87 | return -1, fmt.Errorf("%s %s has an invalid killMode: %v", d.Kind(), d.Name(), killMode) 88 | } 89 | 90 | if !(killModeInt > 0) { 91 | return -1, fmt.Errorf("Invalid value for label %s: %d", config.KillValueLabelKey, killModeInt) 92 | } 93 | 94 | return killModeInt, nil 95 | } 96 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/daemonsets/eligible_daemonsets_test.go: -------------------------------------------------------------------------------- 1 | package daemonsets 2 | 3 | import ( 4 | "testing" 5 | 6 | "kube-monkey/internal/pkg/config" 7 | 8 | "github.com/stretchr/testify/assert" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | "k8s.io/client-go/kubernetes/fake" 11 | ) 12 | 13 | func TestEligibleDaemonSets(t *testing.T) { 14 | v1ds := newDaemonSet( 15 | NAME, 16 | map[string]string{ 17 | "kube-monkey/identifier": "1", 18 | "kube-monkey/mtbf": "1", 19 | }, 20 | ) 21 | 22 | client := fake.NewSimpleClientset(&v1ds) 23 | victims, _ := EligibleDaemonSets(client, NAMESPACE, &metav1.ListOptions{}) 24 | 25 | assert.Len(t, victims, 1) 26 | } 27 | 28 | func TestIsEnrolled(t *testing.T) { 29 | v1ds := newDaemonSet( 30 | NAME, 31 | map[string]string{ 32 | config.IdentLabelKey: "1", 33 | config.MtbfLabelKey: "1", 34 | config.EnabledLabelKey: config.EnabledLabelValue, 35 | }, 36 | ) 37 | 38 | depl, _ := New(&v1ds) 39 | 40 | client := fake.NewSimpleClientset(&v1ds) 41 | 42 | b, _ := depl.IsEnrolled(client) 43 | 44 | assert.Equal(t, b, true, "Expected daemonset to be enrolled") 45 | } 46 | 47 | func TestIsNotEnrolled(t *testing.T) { 48 | v1ds := newDaemonSet( 49 | NAME, 50 | map[string]string{ 51 | config.IdentLabelKey: "1", 52 | config.MtbfLabelKey: "1", 53 | config.EnabledLabelKey: "x", 54 | }, 55 | ) 56 | 57 | ds, _ := New(&v1ds) 58 | 59 | client := fake.NewSimpleClientset(&v1ds) 60 | 61 | b, _ := ds.IsEnrolled(client) 62 | 63 | assert.Equal(t, b, false, "Expected daemonset to not be enrolled") 64 | } 65 | 66 | func TestKillType(t *testing.T) { 67 | 68 | ident := "1" 69 | mtbf := "1" 70 | killMode := "kill-mode" 71 | 72 | v1ds := newDaemonSet( 73 | NAME, 74 | map[string]string{ 75 | config.IdentLabelKey: ident, 76 | config.MtbfLabelKey: mtbf, 77 | }, 78 | ) 79 | 80 | depl, _ := New(&v1ds) 81 | 82 | client := fake.NewSimpleClientset(&v1ds) 83 | 84 | _, err := depl.KillType(client) 85 | 86 | assert.EqualError(t, err, depl.Kind()+" "+depl.Name()+" does not have "+config.KillTypeLabelKey+" label") 87 | 88 | v1ds = newDaemonSet( 89 | NAME, 90 | map[string]string{ 91 | config.IdentLabelKey: ident, 92 | config.MtbfLabelKey: mtbf, 93 | config.KillTypeLabelKey: killMode, 94 | }, 95 | ) 96 | 97 | client = fake.NewSimpleClientset(&v1ds) 98 | 99 | kill, _ := depl.KillType(client) 100 | 101 | assert.Equal(t, kill, killMode, "Unexpected kill value, got %d", kill) 102 | } 103 | 104 | func TestKillValue(t *testing.T) { 105 | 106 | ident := "1" 107 | mtbf := "1" 108 | killValue := "0" 109 | 110 | v1ds := newDaemonSet( 111 | NAME, 112 | map[string]string{ 113 | config.IdentLabelKey: ident, 114 | config.MtbfLabelKey: mtbf, 115 | }, 116 | ) 117 | 118 | depl, _ := New(&v1ds) 119 | 120 | client := fake.NewSimpleClientset(&v1ds) 121 | 122 | _, err := depl.KillValue(client) 123 | 124 | assert.EqualError(t, err, depl.Kind()+" "+depl.Name()+" does not have "+config.KillValueLabelKey+" label") 125 | 126 | v1ds = newDaemonSet( 127 | NAME, 128 | map[string]string{ 129 | config.IdentLabelKey: ident, 130 | config.MtbfLabelKey: mtbf, 131 | config.KillValueLabelKey: killValue, 132 | }, 133 | ) 134 | 135 | client = fake.NewSimpleClientset(&v1ds) 136 | 137 | _, err = depl.KillValue(client) 138 | 139 | assert.EqualError(t, err, "Invalid value for label "+config.KillValueLabelKey+": "+killValue) 140 | 141 | killValue = "1" 142 | 143 | v1ds = newDaemonSet( 144 | NAME, 145 | map[string]string{ 146 | config.IdentLabelKey: ident, 147 | config.MtbfLabelKey: mtbf, 148 | config.KillValueLabelKey: killValue, 149 | }, 150 | ) 151 | 152 | client = fake.NewSimpleClientset(&v1ds) 153 | 154 | kill, _ := depl.KillValue(client) 155 | 156 | assert.Equalf(t, kill, 1, "Unexpected a kill value, got %d", kill) 157 | } 158 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/deployments/deployments.go: -------------------------------------------------------------------------------- 1 | package deployments 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | 7 | "kube-monkey/internal/pkg/config" 8 | "kube-monkey/internal/pkg/victims" 9 | 10 | appsv1 "k8s.io/api/apps/v1" 11 | ) 12 | 13 | type Deployment struct { 14 | *victims.VictimBase 15 | } 16 | 17 | // New creates a new instance of Deployment 18 | func New(dep *appsv1.Deployment) (*Deployment, error) { 19 | ident, err := identifier(dep) 20 | if err != nil { 21 | return nil, err 22 | } 23 | mtbf, err := meanTimeBetweenFailures(dep) 24 | if err != nil { 25 | return nil, err 26 | } 27 | kind := fmt.Sprintf("%T", *dep) 28 | 29 | return &Deployment{VictimBase: victims.New(kind, dep.Name, dep.Namespace, ident, mtbf)}, nil 30 | } 31 | 32 | // Returns the value of the label defined by config.IdentLabelKey 33 | // from the deployment labels 34 | // This label should be unique to a deployment, and is used to 35 | // identify the pods that belong to this deployment, as pods 36 | // inherit labels from the Deployment 37 | func identifier(kubekind *appsv1.Deployment) (string, error) { 38 | identifier, ok := kubekind.Labels[config.IdentLabelKey] 39 | if !ok { 40 | return "", fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.IdentLabelKey) 41 | } 42 | return identifier, nil 43 | } 44 | 45 | // Read the mean-time-between-failures value defined by the Deployment 46 | // in the label defined by config.MtbfLabelKey 47 | func meanTimeBetweenFailures(kubekind *appsv1.Deployment) (int, error) { 48 | mtbf, ok := kubekind.Labels[config.MtbfLabelKey] 49 | if !ok { 50 | return -1, fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.MtbfLabelKey) 51 | } 52 | 53 | mtbfInt, err := strconv.Atoi(mtbf) 54 | if err != nil { 55 | return -1, err 56 | } 57 | 58 | if !(mtbfInt > 0) { 59 | return -1, fmt.Errorf("Invalid value for label %s: %d", config.MtbfLabelKey, mtbfInt) 60 | } 61 | 62 | return mtbfInt, nil 63 | } 64 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/deployments/deployments_test.go: -------------------------------------------------------------------------------- 1 | package deployments 2 | 3 | import ( 4 | "testing" 5 | 6 | "kube-monkey/internal/pkg/config" 7 | 8 | "github.com/stretchr/testify/assert" 9 | appsv1 "k8s.io/api/apps/v1" 10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 | ) 12 | 13 | const ( 14 | IDENTIFIER = "kube-monkey-id" 15 | NAME = "deployment_name" 16 | NAMESPACE = metav1.NamespaceDefault 17 | ) 18 | 19 | func newDeployment(name string, labels map[string]string) appsv1.Deployment { 20 | 21 | return appsv1.Deployment{ 22 | ObjectMeta: metav1.ObjectMeta{ 23 | Name: name, 24 | Namespace: NAMESPACE, 25 | Labels: labels, 26 | }, 27 | } 28 | } 29 | 30 | func TestNew(t *testing.T) { 31 | 32 | v1depl := newDeployment( 33 | NAME, 34 | map[string]string{ 35 | config.IdentLabelKey: IDENTIFIER, 36 | config.MtbfLabelKey: "1", 37 | }, 38 | ) 39 | depl, err := New(&v1depl) 40 | 41 | assert.NoError(t, err) 42 | assert.Equal(t, "v1.Deployment", depl.Kind()) 43 | assert.Equal(t, NAME, depl.Name()) 44 | assert.Equal(t, NAMESPACE, depl.Namespace()) 45 | assert.Equal(t, IDENTIFIER, depl.Identifier()) 46 | assert.Equal(t, 1, depl.Mtbf()) 47 | } 48 | 49 | func TestInvalidIdentifier(t *testing.T) { 50 | v1depl := newDeployment( 51 | NAME, 52 | map[string]string{ 53 | config.MtbfLabelKey: "1", 54 | }, 55 | ) 56 | _, err := New(&v1depl) 57 | 58 | assert.Errorf(t, err, "Expected an error if "+config.IdentLabelKey+" label doesn't exist") 59 | } 60 | 61 | func TestInvalidMtbf(t *testing.T) { 62 | v1depl := newDeployment( 63 | NAME, 64 | map[string]string{ 65 | config.IdentLabelKey: IDENTIFIER, 66 | }, 67 | ) 68 | _, err := New(&v1depl) 69 | 70 | assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label doesn't exist") 71 | 72 | v1depl = newDeployment( 73 | NAME, 74 | map[string]string{ 75 | config.IdentLabelKey: IDENTIFIER, 76 | config.MtbfLabelKey: "string", 77 | }, 78 | ) 79 | _, err = New(&v1depl) 80 | 81 | assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label can't be converted a Int type") 82 | 83 | v1depl = newDeployment( 84 | NAME, 85 | map[string]string{ 86 | config.IdentLabelKey: IDENTIFIER, 87 | config.MtbfLabelKey: "0", 88 | }, 89 | ) 90 | _, err = New(&v1depl) 91 | 92 | assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label is lower than 1") 93 | } 94 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/deployments/eligible_deployments.go: -------------------------------------------------------------------------------- 1 | package deployments 2 | 3 | //All these functions require api access specific to the version of the app 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | "strconv" 9 | 10 | "github.com/golang/glog" 11 | 12 | "kube-monkey/internal/pkg/config" 13 | "kube-monkey/internal/pkg/victims" 14 | 15 | kube "k8s.io/client-go/kubernetes" 16 | 17 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 18 | ) 19 | 20 | // EligibleDeployments gets all eligible deployments that opted in (filtered by config.EnabledLabel) 21 | func EligibleDeployments(clientset kube.Interface, namespace string, filter *metav1.ListOptions) (eligVictims []victims.Victim, err error) { 22 | enabledVictims, err := clientset.AppsV1().Deployments(namespace).List(context.TODO(), *filter) 23 | if err != nil { 24 | return nil, err 25 | } 26 | 27 | for _, vic := range enabledVictims.Items { 28 | victim, err := New(&vic) 29 | if err != nil { 30 | glog.Warningf("Skipping eligible %T %s because of error: %s", vic, vic.Name, err.Error()) 31 | continue 32 | } 33 | 34 | // TODO: After generating whitelisting ns list, this will move to factory. 35 | // IsBlacklisted will change to something like IsAllowedNamespace 36 | // and will only be used to verify at time of scheduled execution 37 | if victim.IsBlacklisted() { 38 | continue 39 | } 40 | 41 | eligVictims = append(eligVictims, victim) 42 | } 43 | 44 | return 45 | } 46 | 47 | /* Below methods are used to verify the victim's attributes have not changed at the scheduled time of termination */ 48 | 49 | // IsEnrolled checks if the deployment is currently enrolled in kube-monkey 50 | func (d *Deployment) IsEnrolled(clientset kube.Interface) (bool, error) { 51 | deployment, err := clientset.AppsV1().Deployments(d.Namespace()).Get(context.TODO(), d.Name(), metav1.GetOptions{}) 52 | if err != nil { 53 | return false, err 54 | } 55 | return deployment.Labels[config.EnabledLabelKey] == config.EnabledLabelValue, nil 56 | } 57 | 58 | // KillType returns current killtype config label for update 59 | func (d *Deployment) KillType(clientset kube.Interface) (string, error) { 60 | deployment, err := clientset.AppsV1().Deployments(d.Namespace()).Get(context.TODO(), d.Name(), metav1.GetOptions{}) 61 | if err != nil { 62 | return "", err 63 | } 64 | 65 | killType, ok := deployment.Labels[config.KillTypeLabelKey] 66 | if !ok { 67 | return "", fmt.Errorf("%s %s does not have %s label", d.Kind(), d.Name(), config.KillTypeLabelKey) 68 | } 69 | 70 | return killType, nil 71 | } 72 | 73 | // KillValue returns current killvalue config label for update 74 | func (d *Deployment) KillValue(clientset kube.Interface) (int, error) { 75 | deployment, err := clientset.AppsV1().Deployments(d.Namespace()).Get(context.TODO(), d.Name(), metav1.GetOptions{}) 76 | if err != nil { 77 | return -1, err 78 | } 79 | 80 | killMode, ok := deployment.Labels[config.KillValueLabelKey] 81 | if !ok { 82 | return -1, fmt.Errorf("%s %s does not have %s label", d.Kind(), d.Name(), config.KillValueLabelKey) 83 | } 84 | 85 | killModeInt, err := strconv.Atoi(killMode) 86 | if err != nil || !(killModeInt > 0) { 87 | return -1, fmt.Errorf("Invalid value for label %s: %d", config.KillValueLabelKey, killModeInt) 88 | } 89 | 90 | return killModeInt, nil 91 | } 92 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/deployments/eligible_deployments_test.go: -------------------------------------------------------------------------------- 1 | package deployments 2 | 3 | import ( 4 | "testing" 5 | 6 | "kube-monkey/internal/pkg/config" 7 | 8 | "github.com/stretchr/testify/assert" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | "k8s.io/client-go/kubernetes/fake" 11 | ) 12 | 13 | func TestEligibleDeployments(t *testing.T) { 14 | v1depl := newDeployment( 15 | NAME, 16 | map[string]string{ 17 | "kube-monkey/identifier": "1", 18 | "kube-monkey/mtbf": "1", 19 | }, 20 | ) 21 | 22 | client := fake.NewSimpleClientset(&v1depl) 23 | victims, _ := EligibleDeployments(client, NAMESPACE, &metav1.ListOptions{}) 24 | 25 | assert.Len(t, victims, 1) 26 | } 27 | 28 | func TestIsEnrolled(t *testing.T) { 29 | v1depl := newDeployment( 30 | NAME, 31 | map[string]string{ 32 | config.IdentLabelKey: "1", 33 | config.MtbfLabelKey: "1", 34 | config.EnabledLabelKey: config.EnabledLabelValue, 35 | }, 36 | ) 37 | 38 | depl, _ := New(&v1depl) 39 | 40 | client := fake.NewSimpleClientset(&v1depl) 41 | 42 | b, _ := depl.IsEnrolled(client) 43 | 44 | assert.Equal(t, b, true, "Expected deployment to be enrolled") 45 | } 46 | 47 | func TestIsNotEnrolled(t *testing.T) { 48 | v1depl := newDeployment( 49 | NAME, 50 | map[string]string{ 51 | config.IdentLabelKey: "1", 52 | config.MtbfLabelKey: "1", 53 | config.EnabledLabelKey: "x", 54 | }, 55 | ) 56 | 57 | depl, _ := New(&v1depl) 58 | 59 | client := fake.NewSimpleClientset(&v1depl) 60 | 61 | b, _ := depl.IsEnrolled(client) 62 | 63 | assert.Equal(t, b, false, "Expected deployment to not be enrolled") 64 | } 65 | 66 | func TestKillType(t *testing.T) { 67 | 68 | ident := "1" 69 | mtbf := "1" 70 | killMode := "kill-mode" 71 | 72 | v1depl := newDeployment( 73 | NAME, 74 | map[string]string{ 75 | config.IdentLabelKey: ident, 76 | config.MtbfLabelKey: mtbf, 77 | }, 78 | ) 79 | 80 | depl, _ := New(&v1depl) 81 | 82 | client := fake.NewSimpleClientset(&v1depl) 83 | 84 | _, err := depl.KillType(client) 85 | 86 | assert.EqualError(t, err, depl.Kind()+" "+depl.Name()+" does not have "+config.KillTypeLabelKey+" label") 87 | 88 | v1depl = newDeployment( 89 | NAME, 90 | map[string]string{ 91 | config.IdentLabelKey: ident, 92 | config.MtbfLabelKey: mtbf, 93 | config.KillTypeLabelKey: killMode, 94 | }, 95 | ) 96 | 97 | client = fake.NewSimpleClientset(&v1depl) 98 | 99 | kill, _ := depl.KillType(client) 100 | 101 | assert.Equal(t, kill, killMode, "Unexpected kill value, got %d", kill) 102 | } 103 | 104 | func TestKillValue(t *testing.T) { 105 | 106 | ident := "1" 107 | mtbf := "1" 108 | killValue := "0" 109 | 110 | v1depl := newDeployment( 111 | NAME, 112 | map[string]string{ 113 | config.IdentLabelKey: ident, 114 | config.MtbfLabelKey: mtbf, 115 | }, 116 | ) 117 | 118 | depl, _ := New(&v1depl) 119 | 120 | client := fake.NewSimpleClientset(&v1depl) 121 | 122 | _, err := depl.KillValue(client) 123 | 124 | assert.EqualError(t, err, depl.Kind()+" "+depl.Name()+" does not have "+config.KillValueLabelKey+" label") 125 | 126 | v1depl = newDeployment( 127 | NAME, 128 | map[string]string{ 129 | config.IdentLabelKey: ident, 130 | config.MtbfLabelKey: mtbf, 131 | config.KillValueLabelKey: killValue, 132 | }, 133 | ) 134 | 135 | client = fake.NewSimpleClientset(&v1depl) 136 | 137 | _, err = depl.KillValue(client) 138 | 139 | assert.EqualError(t, err, "Invalid value for label "+config.KillValueLabelKey+": "+killValue) 140 | 141 | killValue = "1" 142 | 143 | v1depl = newDeployment( 144 | NAME, 145 | map[string]string{ 146 | config.IdentLabelKey: ident, 147 | config.MtbfLabelKey: mtbf, 148 | config.KillValueLabelKey: killValue, 149 | }, 150 | ) 151 | 152 | client = fake.NewSimpleClientset(&v1depl) 153 | 154 | kill, _ := depl.KillValue(client) 155 | 156 | assert.Equalf(t, kill, 1, "Unexpected a kill value, got %d", kill) 157 | } 158 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/factory.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package factory is responsible for generating eligible victim kinds 3 | 4 | New types of kinds can be added easily 5 | */ 6 | package factory 7 | 8 | import ( 9 | "github.com/golang/glog" 10 | 11 | "kube-monkey/internal/pkg/config" 12 | "kube-monkey/internal/pkg/kubernetes" 13 | "kube-monkey/internal/pkg/victims" 14 | "kube-monkey/internal/pkg/victims/factory/daemonsets" 15 | "kube-monkey/internal/pkg/victims/factory/deployments" 16 | "kube-monkey/internal/pkg/victims/factory/statefulsets" 17 | 18 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 | "k8s.io/apimachinery/pkg/labels" 20 | "k8s.io/apimachinery/pkg/selection" 21 | "k8s.io/apimachinery/pkg/util/sets" 22 | ) 23 | 24 | // EligibleVictims gathers list of enabled/enrolled kinds for judgement by 25 | // the scheduler 26 | // This checks against config.WhitelistedNamespaces but 27 | // each victim checks themselves against the ns blacklist 28 | // TODO: fetch all namespaces from k8 apiserver to check blacklist here 29 | func EligibleVictims() (eligibleVictims []victims.Victim, err error) { 30 | clientset, err := kubernetes.CreateClient() 31 | if err != nil { 32 | return nil, err 33 | } 34 | 35 | // Verify opt-in at scheduling time 36 | filter, err := enrollmentFilter() 37 | if err != nil { 38 | return nil, err 39 | } 40 | 41 | for _, namespace := range config.WhitelistedNamespaces().UnsortedList() { 42 | // Fetch deployments 43 | deployments, err := deployments.EligibleDeployments(clientset, namespace, filter) 44 | if err != nil { 45 | //allow pass through to schedule other kinds and namespaces 46 | glog.Warningf("Failed to fetch eligible deployments for namespace %s due to error: %s", namespace, err.Error()) 47 | continue 48 | } 49 | eligibleVictims = append(eligibleVictims, deployments...) 50 | 51 | // Fetch statefulsets 52 | statefulsets, err := statefulsets.EligibleStatefulSets(clientset, namespace, filter) 53 | if err != nil { 54 | //allow pass through to schedule other kinds and namespaces 55 | glog.Warningf("Failed to fetch eligible statefulsets for namespace %s due to error: %s", namespace, err.Error()) 56 | continue 57 | } 58 | eligibleVictims = append(eligibleVictims, statefulsets...) 59 | 60 | // Fetch daemonsets 61 | daemonsets, err := daemonsets.EligibleDaemonSets(clientset, namespace, filter) 62 | if err != nil { 63 | //allow pass through to schedule other kinds and namespaces 64 | glog.Warningf("Failed to fetch eligible daemonsets for namespace %s due to error: %s", namespace, err.Error()) 65 | continue 66 | } 67 | eligibleVictims = append(eligibleVictims, daemonsets...) 68 | } 69 | 70 | return 71 | } 72 | 73 | // Verifies opt-in of victims 74 | func enrollmentFilter() (*metav1.ListOptions, error) { 75 | req, err := enrollmentRequirement() 76 | if err != nil { 77 | return nil, err 78 | } 79 | return &metav1.ListOptions{ 80 | LabelSelector: labels.NewSelector().Add(*req).String(), 81 | }, nil 82 | } 83 | 84 | func enrollmentRequirement() (*labels.Requirement, error) { 85 | return labels.NewRequirement(config.EnabledLabelKey, selection.Equals, sets.NewString(config.EnabledLabelValue).UnsortedList()) 86 | } 87 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/statefulsets/eligible_statefulsets.go: -------------------------------------------------------------------------------- 1 | package statefulsets 2 | 3 | //All these functions require api access specific to the version of the app 4 | 5 | import ( 6 | "context" 7 | "fmt" 8 | "strconv" 9 | 10 | "github.com/golang/glog" 11 | 12 | "kube-monkey/internal/pkg/config" 13 | "kube-monkey/internal/pkg/victims" 14 | 15 | kube "k8s.io/client-go/kubernetes" 16 | 17 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 18 | ) 19 | 20 | // EligibleStatefulSets gets all eligible statefulsets that opted in (filtered by config.EnabledLabel) 21 | func EligibleStatefulSets(clientset kube.Interface, namespace string, filter *metav1.ListOptions) (eligVictims []victims.Victim, err error) { 22 | enabledVictims, err := clientset.AppsV1().StatefulSets(namespace).List(context.TODO(), *filter) 23 | if err != nil { 24 | return nil, err 25 | } 26 | 27 | for _, vic := range enabledVictims.Items { 28 | victim, err := New(&vic) 29 | if err != nil { 30 | glog.Warningf("Skipping eligible %T %s because of error: %s", vic, vic.Name, err.Error()) 31 | continue 32 | } 33 | 34 | // TODO: After generating whitelisting ns list, this will move to factory. 35 | // IsBlacklisted will change to something like IsAllowedNamespace 36 | // and will only be used to verify at time of scheduled execution 37 | if victim.IsBlacklisted() { 38 | continue 39 | } 40 | 41 | eligVictims = append(eligVictims, victim) 42 | } 43 | 44 | return 45 | } 46 | 47 | /* Below methods are used to verify the victim's attributes have not changed at the scheduled time of termination */ 48 | 49 | // IsEnrolled checks if the statefulset is currently enrolled in kube-monkey 50 | func (ss *StatefulSet) IsEnrolled(clientset kube.Interface) (bool, error) { 51 | statefulset, err := clientset.AppsV1().StatefulSets(ss.Namespace()).Get(context.TODO(), ss.Name(), metav1.GetOptions{}) 52 | if err != nil { 53 | return false, err 54 | } 55 | return statefulset.Labels[config.EnabledLabelKey] == config.EnabledLabelValue, nil 56 | } 57 | 58 | // KillType returns current killtype config label for update 59 | func (ss *StatefulSet) KillType(clientset kube.Interface) (string, error) { 60 | statefulset, err := clientset.AppsV1().StatefulSets(ss.Namespace()).Get(context.TODO(), ss.Name(), metav1.GetOptions{}) 61 | if err != nil { 62 | return "", err 63 | } 64 | 65 | killType, ok := statefulset.Labels[config.KillTypeLabelKey] 66 | if !ok { 67 | return "", fmt.Errorf("%s %s does not have %s label", ss.Kind(), ss.Name(), config.KillTypeLabelKey) 68 | } 69 | 70 | return killType, nil 71 | } 72 | 73 | // KillValue returns current killvalue config label for update 74 | func (ss *StatefulSet) KillValue(clientset kube.Interface) (int, error) { 75 | statefulset, err := clientset.AppsV1().StatefulSets(ss.Namespace()).Get(context.TODO(), ss.Name(), metav1.GetOptions{}) 76 | if err != nil { 77 | return -1, err 78 | } 79 | 80 | killMode, ok := statefulset.Labels[config.KillValueLabelKey] 81 | if !ok { 82 | return -1, fmt.Errorf("%s %s does not have %s label", ss.Kind(), ss.Name(), config.KillValueLabelKey) 83 | } 84 | 85 | killModeInt, err := strconv.Atoi(killMode) 86 | if err != nil || !(killModeInt > 0) { 87 | return -1, fmt.Errorf("Invalid value for label %s: %d", config.KillValueLabelKey, killModeInt) 88 | } 89 | 90 | return killModeInt, nil 91 | } 92 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/statefulsets/eligible_statefulsets_test.go: -------------------------------------------------------------------------------- 1 | package statefulsets 2 | 3 | import ( 4 | "testing" 5 | 6 | "kube-monkey/internal/pkg/config" 7 | 8 | "github.com/stretchr/testify/assert" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | "k8s.io/client-go/kubernetes/fake" 11 | ) 12 | 13 | func TestEligibleStatefulSets(t *testing.T) { 14 | v1stfs := newStatefulSet( 15 | NAME, 16 | map[string]string{ 17 | "kube-monkey/identifier": "1", 18 | "kube-monkey/mtbf": "1", 19 | }, 20 | ) 21 | 22 | client := fake.NewSimpleClientset(&v1stfs) 23 | victims, _ := EligibleStatefulSets(client, NAMESPACE, &metav1.ListOptions{}) 24 | 25 | assert.Len(t, victims, 1) 26 | } 27 | 28 | func TestIsEnrolled(t *testing.T) { 29 | v1stfs := newStatefulSet( 30 | NAME, 31 | map[string]string{ 32 | config.IdentLabelKey: "1", 33 | config.MtbfLabelKey: "1", 34 | config.EnabledLabelKey: config.EnabledLabelValue, 35 | }, 36 | ) 37 | 38 | stfs, _ := New(&v1stfs) 39 | 40 | client := fake.NewSimpleClientset(&v1stfs) 41 | 42 | b, _ := stfs.IsEnrolled(client) 43 | 44 | assert.Equal(t, b, true, "Expected statefulset to be enrolled") 45 | } 46 | 47 | func TestIsNotEnrolled(t *testing.T) { 48 | v1stfs := newStatefulSet( 49 | NAME, 50 | map[string]string{ 51 | config.IdentLabelKey: "1", 52 | config.MtbfLabelKey: "1", 53 | config.EnabledLabelKey: "x", 54 | }, 55 | ) 56 | 57 | stfs, _ := New(&v1stfs) 58 | 59 | client := fake.NewSimpleClientset(&v1stfs) 60 | 61 | b, _ := stfs.IsEnrolled(client) 62 | 63 | assert.Equal(t, b, false, "Expected statefulset to not be enrolled") 64 | } 65 | 66 | func TestKillType(t *testing.T) { 67 | 68 | ident := "1" 69 | mtbf := "1" 70 | killMode := "kill-mode" 71 | 72 | v1stfs := newStatefulSet( 73 | NAME, 74 | map[string]string{ 75 | config.IdentLabelKey: ident, 76 | config.MtbfLabelKey: mtbf, 77 | }, 78 | ) 79 | 80 | stfs, _ := New(&v1stfs) 81 | 82 | client := fake.NewSimpleClientset(&v1stfs) 83 | 84 | _, err := stfs.KillType(client) 85 | 86 | assert.EqualError(t, err, stfs.Kind()+" "+stfs.Name()+" does not have "+config.KillTypeLabelKey+" label") 87 | 88 | v1stfs = newStatefulSet( 89 | NAME, 90 | map[string]string{ 91 | config.IdentLabelKey: ident, 92 | config.MtbfLabelKey: mtbf, 93 | config.KillTypeLabelKey: killMode, 94 | }, 95 | ) 96 | 97 | client = fake.NewSimpleClientset(&v1stfs) 98 | 99 | kill, _ := stfs.KillType(client) 100 | 101 | assert.Equal(t, kill, killMode, "Unexpected kill value, got %d", kill) 102 | } 103 | 104 | func TestKillValue(t *testing.T) { 105 | 106 | ident := "1" 107 | mtbf := "1" 108 | killValue := "0" 109 | 110 | v1stfs := newStatefulSet( 111 | NAME, 112 | map[string]string{ 113 | config.IdentLabelKey: ident, 114 | config.MtbfLabelKey: mtbf, 115 | }, 116 | ) 117 | 118 | stfs, _ := New(&v1stfs) 119 | 120 | client := fake.NewSimpleClientset(&v1stfs) 121 | 122 | _, err := stfs.KillValue(client) 123 | 124 | assert.EqualError(t, err, stfs.Kind()+" "+stfs.Name()+" does not have "+config.KillValueLabelKey+" label") 125 | 126 | v1stfs = newStatefulSet( 127 | NAME, 128 | map[string]string{ 129 | config.IdentLabelKey: ident, 130 | config.MtbfLabelKey: mtbf, 131 | config.KillValueLabelKey: killValue, 132 | }, 133 | ) 134 | 135 | client = fake.NewSimpleClientset(&v1stfs) 136 | 137 | _, err = stfs.KillValue(client) 138 | 139 | assert.EqualError(t, err, "Invalid value for label "+config.KillValueLabelKey+": "+killValue) 140 | 141 | killValue = "1" 142 | 143 | v1stfs = newStatefulSet( 144 | NAME, 145 | map[string]string{ 146 | config.IdentLabelKey: ident, 147 | config.MtbfLabelKey: mtbf, 148 | config.KillValueLabelKey: killValue, 149 | }, 150 | ) 151 | 152 | client = fake.NewSimpleClientset(&v1stfs) 153 | 154 | kill, _ := stfs.KillValue(client) 155 | 156 | assert.Equalf(t, kill, 1, "Unexpected a kill value, got %d", kill) 157 | } 158 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/statefulsets/statefulset_test.go: -------------------------------------------------------------------------------- 1 | package statefulsets 2 | 3 | import ( 4 | "testing" 5 | 6 | "kube-monkey/internal/pkg/config" 7 | 8 | "github.com/stretchr/testify/assert" 9 | appsv1 "k8s.io/api/apps/v1" 10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 | ) 12 | 13 | const ( 14 | IDENTIFIER = "kube-monkey-id" 15 | NAME = "statefulset_name" 16 | NAMESPACE = metav1.NamespaceDefault 17 | ) 18 | 19 | func newStatefulSet(name string, labels map[string]string) appsv1.StatefulSet { 20 | 21 | return appsv1.StatefulSet{ 22 | ObjectMeta: metav1.ObjectMeta{ 23 | Name: name, 24 | Namespace: NAMESPACE, 25 | Labels: labels, 26 | }, 27 | } 28 | } 29 | 30 | func TestNew(t *testing.T) { 31 | 32 | v1stfs := newStatefulSet( 33 | NAME, 34 | map[string]string{ 35 | config.IdentLabelKey: IDENTIFIER, 36 | config.MtbfLabelKey: "1", 37 | }, 38 | ) 39 | stfs, err := New(&v1stfs) 40 | 41 | assert.NoError(t, err) 42 | assert.Equal(t, "v1.StatefulSet", stfs.Kind()) 43 | assert.Equal(t, NAME, stfs.Name()) 44 | assert.Equal(t, NAMESPACE, stfs.Namespace()) 45 | assert.Equal(t, IDENTIFIER, stfs.Identifier()) 46 | assert.Equal(t, 1, stfs.Mtbf()) 47 | } 48 | 49 | func TestInvalidIdentifier(t *testing.T) { 50 | v1stfs := newStatefulSet( 51 | NAME, 52 | map[string]string{ 53 | config.MtbfLabelKey: "1", 54 | }, 55 | ) 56 | _, err := New(&v1stfs) 57 | 58 | assert.Errorf(t, err, "Expected an error if "+config.IdentLabelKey+" label doesn't exist") 59 | } 60 | 61 | func TestInvalidMtbf(t *testing.T) { 62 | v1stfs := newStatefulSet( 63 | NAME, 64 | map[string]string{ 65 | config.IdentLabelKey: IDENTIFIER, 66 | }, 67 | ) 68 | _, err := New(&v1stfs) 69 | 70 | assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label doesn't exist") 71 | 72 | v1stfs = newStatefulSet( 73 | NAME, 74 | map[string]string{ 75 | config.IdentLabelKey: IDENTIFIER, 76 | config.MtbfLabelKey: "string", 77 | }, 78 | ) 79 | _, err = New(&v1stfs) 80 | 81 | assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label can't be converted a Int type") 82 | 83 | v1stfs = newStatefulSet( 84 | NAME, 85 | map[string]string{ 86 | config.IdentLabelKey: IDENTIFIER, 87 | config.MtbfLabelKey: "0", 88 | }, 89 | ) 90 | _, err = New(&v1stfs) 91 | 92 | assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label is lower than 1") 93 | } 94 | -------------------------------------------------------------------------------- /internal/pkg/victims/factory/statefulsets/statefulsets.go: -------------------------------------------------------------------------------- 1 | package statefulsets 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | 7 | "kube-monkey/internal/pkg/config" 8 | "kube-monkey/internal/pkg/victims" 9 | 10 | corev1 "k8s.io/api/apps/v1" 11 | ) 12 | 13 | type StatefulSet struct { 14 | *victims.VictimBase 15 | } 16 | 17 | // New creates a new instance of StatefulSet 18 | func New(ss *corev1.StatefulSet) (*StatefulSet, error) { 19 | ident, err := identifier(ss) 20 | if err != nil { 21 | return nil, err 22 | } 23 | mtbf, err := meanTimeBetweenFailures(ss) 24 | if err != nil { 25 | return nil, err 26 | } 27 | kind := fmt.Sprintf("%T", *ss) 28 | 29 | return &StatefulSet{VictimBase: victims.New(kind, ss.Name, ss.Namespace, ident, mtbf)}, nil 30 | } 31 | 32 | // Returns the value of the label defined by config.IdentLabelKey 33 | // from the statefulset labels 34 | // This label should be unique to a statefulset, and is used to 35 | // identify the pods that belong to this statefulset, as pods 36 | // inherit labels from the StatefulSet 37 | func identifier(kubekind *corev1.StatefulSet) (string, error) { 38 | identifier, ok := kubekind.Labels[config.IdentLabelKey] 39 | if !ok { 40 | return "", fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.IdentLabelKey) 41 | } 42 | return identifier, nil 43 | } 44 | 45 | // Read the mean-time-between-failures value defined by the StatefulSet 46 | // in the label defined by config.MtbfLabelKey 47 | func meanTimeBetweenFailures(kubekind *corev1.StatefulSet) (int, error) { 48 | mtbf, ok := kubekind.Labels[config.MtbfLabelKey] 49 | if !ok { 50 | return -1, fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.MtbfLabelKey) 51 | } 52 | 53 | mtbfInt, err := strconv.Atoi(mtbf) 54 | if err != nil { 55 | return -1, err 56 | } 57 | 58 | if !(mtbfInt > 0) { 59 | return -1, fmt.Errorf("Invalid value for label %s: %d", config.MtbfLabelKey, mtbfInt) 60 | } 61 | 62 | return mtbfInt, nil 63 | } 64 | -------------------------------------------------------------------------------- /internal/pkg/victims/victims.go: -------------------------------------------------------------------------------- 1 | package victims 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "math" 7 | "math/rand" 8 | "time" 9 | 10 | "kube-monkey/internal/pkg/config" 11 | 12 | "github.com/golang/glog" 13 | "github.com/pkg/errors" 14 | 15 | kube "k8s.io/client-go/kubernetes" 16 | 17 | corev1 "k8s.io/api/core/v1" 18 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 | "k8s.io/apimachinery/pkg/labels" 20 | "k8s.io/apimachinery/pkg/selection" 21 | "k8s.io/apimachinery/pkg/util/sets" 22 | ) 23 | 24 | type Victim interface { 25 | VictimBaseTemplate 26 | VictimSpecificAPICalls 27 | VictimKillNumberGenerator 28 | } 29 | 30 | type VictimBaseTemplate interface { 31 | // Get value methods 32 | Kind() string 33 | Name() string 34 | Namespace() string 35 | Identifier() string 36 | Mtbf() int 37 | 38 | VictimAPICalls 39 | } 40 | 41 | type VictimSpecificAPICalls interface { 42 | // Depends on which version i.e. apps/v1 or extensions/v1beta2 43 | IsEnrolled(kube.Interface) (bool, error) // Get updated enroll status 44 | KillType(kube.Interface) (string, error) // Get updated kill config type 45 | KillValue(kube.Interface) (int, error) // Get updated kill config value 46 | } 47 | 48 | type VictimAPICalls interface { 49 | // Exposed Api Calls 50 | RunningPods(kube.Interface) ([]corev1.Pod, error) 51 | Pods(kube.Interface) ([]corev1.Pod, error) 52 | DeletePod(kube.Interface, string) error 53 | DeleteRandomPod(kube.Interface) error // Deprecated, but faster than DeleteRandomPods for single pod termination 54 | DeleteRandomPods(kube.Interface, int) error 55 | IsBlacklisted() bool 56 | IsWhitelisted() bool 57 | } 58 | 59 | type VictimKillNumberGenerator interface { 60 | KillNumberForMaxPercentage(kube.Interface, int) (int, error) 61 | KillNumberForKillingAll(kube.Interface) (int, error) 62 | KillNumberForFixedPercentage(kube.Interface, int) (int, error) 63 | } 64 | 65 | type VictimBase struct { 66 | kind string 67 | name string 68 | namespace string 69 | identifier string 70 | mtbf int 71 | 72 | VictimBaseTemplate 73 | } 74 | 75 | func New(kind, name, namespace, identifier string, mtbf int) *VictimBase { 76 | return &VictimBase{kind: kind, name: name, namespace: namespace, identifier: identifier, mtbf: mtbf} 77 | } 78 | 79 | func (v *VictimBase) Kind() string { 80 | return v.kind 81 | } 82 | 83 | func (v *VictimBase) Name() string { 84 | return v.name 85 | } 86 | 87 | func (v *VictimBase) Namespace() string { 88 | return v.namespace 89 | } 90 | 91 | func (v *VictimBase) Identifier() string { 92 | return v.identifier 93 | } 94 | 95 | func (v *VictimBase) Mtbf() int { 96 | return v.mtbf 97 | } 98 | 99 | // RunningPods returns a list of running pods for the victim 100 | func (v *VictimBase) RunningPods(clientset kube.Interface) (runningPods []corev1.Pod, err error) { 101 | pods, err := v.Pods(clientset) 102 | if err != nil { 103 | return nil, err 104 | } 105 | 106 | for _, pod := range pods { 107 | if pod.Status.Phase == corev1.PodRunning { 108 | runningPods = append(runningPods, pod) 109 | } 110 | } 111 | 112 | return runningPods, nil 113 | } 114 | 115 | // Pods returns a list of pods under the victim 116 | func (v *VictimBase) Pods(clientset kube.Interface) ([]corev1.Pod, error) { 117 | labelSelector, err := labelFilterForPods(v.identifier) 118 | if err != nil { 119 | return nil, err 120 | } 121 | 122 | podlist, err := clientset.CoreV1().Pods(v.namespace).List(context.TODO(), *labelSelector) 123 | if err != nil { 124 | return nil, err 125 | } 126 | return podlist.Items, nil 127 | } 128 | 129 | // DeletePod removes specified pod for victim 130 | func (v *VictimBase) DeletePod(clientset kube.Interface, podName string) error { 131 | if config.DryRun() { 132 | glog.Infof("[DryRun Mode] Terminated pod %s for %s/%s", podName, v.namespace, v.name) 133 | return nil 134 | } 135 | 136 | deleteOpts := v.GetDeleteOptsForPod() 137 | return clientset.CoreV1().Pods(v.namespace).Delete(context.TODO(), podName, *deleteOpts) 138 | } 139 | 140 | // Creates the DeleteOptions object 141 | // Grace period is derived from config 142 | func (v *VictimBase) GetDeleteOptsForPod() *metav1.DeleteOptions { 143 | gracePeriodSec := config.GracePeriodSeconds() 144 | 145 | return &metav1.DeleteOptions{ 146 | GracePeriodSeconds: gracePeriodSec, 147 | } 148 | } 149 | 150 | // DeleteRandomPods removes specified number of random pods for the victim 151 | func (v *VictimBase) DeleteRandomPods(clientset kube.Interface, killNum int) error { 152 | // Pick a target pod to delete 153 | pods, err := v.RunningPods(clientset) 154 | if err != nil { 155 | return err 156 | } 157 | 158 | numPods := len(pods) 159 | switch { 160 | case numPods == 0: 161 | return fmt.Errorf("%s %s has no running pods at the moment", v.kind, v.name) 162 | case killNum == 0: 163 | return fmt.Errorf("no terminations requested for %s %s", v.kind, v.name) 164 | case numPods < killNum: 165 | glog.Warningf("%s %s has only %d currently running pods, but %d terminations requested", v.kind, v.name, numPods, killNum) 166 | fallthrough 167 | case numPods == killNum: 168 | glog.V(6).Infof("Killing ALL %d running pods for %s %s", numPods, v.kind, v.name) 169 | case killNum < 0: 170 | return fmt.Errorf("cannot request negative terminations %d for %s %s", killNum, v.kind, v.name) 171 | case numPods > killNum: 172 | glog.V(6).Infof("Killing %d running pods for %s %s", killNum, v.kind, v.name) 173 | default: 174 | return fmt.Errorf("unexpected behavior for terminating %s %s", v.kind, v.name) 175 | } 176 | 177 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 178 | 179 | for i := 0; i < killNum; i++ { 180 | victimIndex := r.Intn(numPods) 181 | targetPod := pods[victimIndex].Name 182 | 183 | glog.V(6).Infof("Terminating pod %s for %s %s/%s\n", targetPod, v.kind, v.namespace, v.name) 184 | 185 | err = v.DeletePod(clientset, targetPod) 186 | if err != nil { 187 | return err 188 | } 189 | } 190 | 191 | // Successful termination 192 | return nil 193 | } 194 | 195 | // Deprecated for DeleteRandomPods(clientset, 1) 196 | // Remove a random pod for the victim 197 | func (v *VictimBase) DeleteRandomPod(clientset kube.Interface) error { 198 | // Pick a target pod to delete 199 | pods, err := v.RunningPods(clientset) 200 | if err != nil { 201 | return err 202 | } 203 | 204 | if len(pods) == 0 { 205 | return fmt.Errorf("%s %s has no running pods at the moment", v.kind, v.name) 206 | } 207 | 208 | targetPod := RandomPodName(pods) 209 | 210 | glog.V(6).Infof("Terminating pod %s for %s %s\n", targetPod, v.kind, v.name) 211 | return v.DeletePod(clientset, targetPod) 212 | } 213 | 214 | // IsBlacklisted checks if this victim is blacklisted 215 | func (v *VictimBase) IsBlacklisted() bool { 216 | if config.BlacklistEnabled() { 217 | blacklist := config.BlacklistedNamespaces() 218 | return blacklist.Has(v.namespace) 219 | } 220 | return false 221 | } 222 | 223 | // IsWhitelisted checks if this victim is whitelisted 224 | func (v *VictimBase) IsWhitelisted() bool { 225 | if config.WhitelistEnabled() { 226 | whitelist := config.WhitelistedNamespaces() 227 | return whitelist.Has(v.namespace) 228 | } 229 | return true 230 | } 231 | 232 | // Create a label filter to filter only for pods that belong to the this 233 | // victim. This is done using the identifier label 234 | func labelFilterForPods(identifier string) (*metav1.ListOptions, error) { 235 | req, err := labelRequirementForPods(identifier) 236 | if err != nil { 237 | return nil, err 238 | } 239 | labelFilter := &metav1.ListOptions{ 240 | LabelSelector: labels.NewSelector().Add(*req).String(), 241 | } 242 | return labelFilter, nil 243 | } 244 | 245 | // Create a labels.Requirement that can be used to build a filter 246 | func labelRequirementForPods(identifier string) (*labels.Requirement, error) { 247 | return labels.NewRequirement(config.IdentLabelKey, selection.Equals, sets.NewString(identifier).UnsortedList()) 248 | } 249 | 250 | // RandomPodName picks a random pod name from a list of Pods 251 | func RandomPodName(pods []corev1.Pod) string { 252 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 253 | randIndex := r.Intn(len(pods)) 254 | return pods[randIndex].Name 255 | } 256 | 257 | // KillNumberForKillingAll returns the number of pods to kill based on the number of all running pods 258 | func (v *VictimBase) KillNumberForKillingAll(clientset kube.Interface) (int, error) { 259 | killNum, err := v.numberOfRunningPods(clientset) 260 | if err != nil { 261 | return 0, err 262 | } 263 | 264 | return killNum, nil 265 | } 266 | 267 | // KillNumberForFixedPercentage returns the number of pods to kill based on a kill percentage and the number of running pods 268 | func (v *VictimBase) KillNumberForFixedPercentage(clientset kube.Interface, killPercentage int) (int, error) { 269 | if killPercentage == 0 { 270 | glog.V(6).Infof("Not terminating any pods for %s %s as kill percentage is 0\n", v.kind, v.name) 271 | // Report success 272 | return 0, nil 273 | } 274 | if killPercentage < 0 || killPercentage > 100 { 275 | return 0, fmt.Errorf("percentage value of %d is invalid. Must be [0-100]", killPercentage) 276 | } 277 | 278 | numRunningPods, err := v.numberOfRunningPods(clientset) 279 | if err != nil { 280 | return 0, err 281 | } 282 | 283 | numberOfPodsToKill := float64(numRunningPods) * float64(killPercentage) / 100 284 | killNum := int(math.Round(numberOfPodsToKill)) 285 | 286 | return killNum, nil 287 | } 288 | 289 | // KillNumberForMaxPercentage returns a number of pods to kill based on a a random kill percentage (between 0 and maxPercentage) and the number of running pods 290 | func (v *VictimBase) KillNumberForMaxPercentage(clientset kube.Interface, maxPercentage int) (int, error) { 291 | if maxPercentage == 0 { 292 | glog.V(6).Infof("Not terminating any pods for %s %s as kill percentage is 0", v.kind, v.name) 293 | // Report success 294 | return 0, nil 295 | } 296 | if maxPercentage < 0 || maxPercentage > 100 { 297 | return 0, fmt.Errorf("percentage value of %d is invalid. Must be [0-100]", maxPercentage) 298 | } 299 | 300 | numRunningPods, err := v.numberOfRunningPods(clientset) 301 | if err != nil { 302 | return 0, err 303 | } 304 | 305 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 306 | killPercentage := r.Intn(maxPercentage + 1) // + 1 because Intn works with half open interval [0,n) and we want [0,n] 307 | numberOfPodsToKill := float64(numRunningPods) * float64(killPercentage) / 100 308 | killNum := int(math.Round(numberOfPodsToKill)) 309 | 310 | return killNum, nil 311 | } 312 | 313 | // Returns the number of running pods or 0 if the operation fails 314 | func (v *VictimBase) numberOfRunningPods(clientset kube.Interface) (int, error) { 315 | pods, err := v.RunningPods(clientset) 316 | if err != nil { 317 | return 0, errors.Wrapf(err, "Failed to get running pods for victim %s %s", v.kind, v.name) 318 | } 319 | 320 | return len(pods), nil 321 | } 322 | -------------------------------------------------------------------------------- /internal/pkg/victims/victims_test.go: -------------------------------------------------------------------------------- 1 | package victims 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "strings" 7 | "testing" 8 | 9 | "k8s.io/apimachinery/pkg/runtime" 10 | 11 | "kube-monkey/internal/pkg/config" 12 | 13 | "github.com/stretchr/testify/assert" 14 | corev1 "k8s.io/api/core/v1" 15 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 16 | kube "k8s.io/client-go/kubernetes" 17 | "k8s.io/client-go/kubernetes/fake" 18 | ) 19 | 20 | const ( 21 | NAMESPACE = metav1.NamespaceDefault 22 | IDENTIFIER = "kube-monkey-id" 23 | KIND = "Pod" 24 | NAME = "name" 25 | ) 26 | 27 | func newPod(name string, status corev1.PodPhase) corev1.Pod { 28 | 29 | return corev1.Pod{ 30 | TypeMeta: metav1.TypeMeta{ 31 | Kind: "Pod", 32 | APIVersion: "v1", 33 | }, 34 | ObjectMeta: metav1.ObjectMeta{ 35 | Name: name, 36 | Namespace: NAMESPACE, 37 | Labels: map[string]string{ 38 | "kube-monkey/identifier": IDENTIFIER, 39 | }, 40 | }, 41 | Status: corev1.PodStatus{ 42 | Phase: status, 43 | }, 44 | } 45 | } 46 | 47 | func generateNPods(namePrefix string, n int, status corev1.PodPhase) []runtime.Object { 48 | var pods []runtime.Object 49 | for i := 0; i < n; i++ { 50 | pod := newPod(fmt.Sprintf("%s%d", namePrefix, i), status) 51 | pods = append(pods, &pod) 52 | } 53 | 54 | return pods 55 | } 56 | 57 | func generateNRunningPods(namePrefix string, n int) []runtime.Object { 58 | return generateNPods(namePrefix, n, corev1.PodRunning) 59 | } 60 | 61 | func newVictimBase() *VictimBase { 62 | return New(KIND, NAME, NAMESPACE, IDENTIFIER, 1) 63 | } 64 | 65 | func getPodList(client kube.Interface) *corev1.PodList { 66 | podList, _ := client.CoreV1().Pods(NAMESPACE).List(context.TODO(), metav1.ListOptions{}) 67 | return podList 68 | } 69 | 70 | func TestVictimBaseTemplateGetters(t *testing.T) { 71 | 72 | v := newVictimBase() 73 | 74 | assert.Equal(t, "Pod", v.Kind()) 75 | assert.Equal(t, "name", v.Name()) 76 | assert.Equal(t, NAMESPACE, v.Namespace()) 77 | assert.Equal(t, IDENTIFIER, v.Identifier()) 78 | assert.Equal(t, 1, v.Mtbf()) 79 | } 80 | 81 | func TestRunningPods(t *testing.T) { 82 | 83 | v := newVictimBase() 84 | pod1 := newPod("app1", corev1.PodRunning) 85 | pod2 := newPod("app2", corev1.PodPending) 86 | 87 | client := fake.NewSimpleClientset(&pod1, &pod2) 88 | 89 | podList, err := v.RunningPods(client) 90 | 91 | assert.NoError(t, err) 92 | assert.Lenf(t, podList, 1, "Expected 1 item in podList, got %d", len(podList)) 93 | 94 | name := podList[0].GetName() 95 | assert.Equal(t, name, "app1", "Unexpected pod name, got %s", name) 96 | } 97 | 98 | func TestPods(t *testing.T) { 99 | 100 | v := newVictimBase() 101 | pod1 := newPod("app1", corev1.PodRunning) 102 | pod2 := newPod("app2", corev1.PodPending) 103 | 104 | client := fake.NewSimpleClientset(&pod1, &pod2) 105 | 106 | podList, _ := v.Pods(client) 107 | 108 | assert.Lenf(t, podList, 2, "Expected 2 items in podList, got %d", len(podList)) 109 | } 110 | 111 | func TestDeletePod(t *testing.T) { 112 | 113 | v := newVictimBase() 114 | pod := newPod("app", corev1.PodRunning) 115 | 116 | client := fake.NewSimpleClientset(&pod) 117 | 118 | err := v.DeletePod(client, "app") 119 | assert.NoError(t, err) 120 | 121 | podList := getPodList(client).Items 122 | assert.Lenf(t, podList, 0, "Expected 0 items in podList, got %d", len(podList)) 123 | } 124 | 125 | func TestDeleteRandomPods(t *testing.T) { 126 | 127 | v := newVictimBase() 128 | pod1 := newPod("app1", corev1.PodRunning) 129 | pod2 := newPod("app2", corev1.PodPending) 130 | pod3 := newPod("app3", corev1.PodRunning) 131 | 132 | client := fake.NewSimpleClientset(&pod1, &pod2, &pod3) 133 | podList := getPodList(client).Items 134 | assert.Lenf(t, podList, 3, "Expected 3 items in podList, got %d", len(podList)) 135 | 136 | err := v.DeleteRandomPods(client, 0) 137 | assert.NotNil(t, err, "expected err for killNum=0 but got nil") 138 | 139 | err = v.DeleteRandomPods(client, -1) 140 | assert.NotNil(t, err, "expected err for negative terminations but got nil") 141 | 142 | _ = v.DeleteRandomPods(client, 1) 143 | podList = getPodList(client).Items 144 | assert.Lenf(t, podList, 2, "Expected 2 items in podList, got %d", len(podList)) 145 | 146 | _ = v.DeleteRandomPods(client, 2) 147 | podList = getPodList(client).Items 148 | assert.Lenf(t, podList, 1, "Expected 1 item in podList, got %d", len(podList)) 149 | name := podList[0].GetName() 150 | assert.Equalf(t, name, "app2", "Expected not running pods not be deleted") 151 | 152 | err = v.DeleteRandomPods(client, 2) 153 | assert.EqualError(t, err, KIND+" "+NAME+" has no running pods at the moment") 154 | } 155 | 156 | func TestKillNumberForMaxPercentage(t *testing.T) { 157 | 158 | v := newVictimBase() 159 | 160 | pods := generateNRunningPods("app", 100) 161 | 162 | client := fake.NewSimpleClientset(pods...) 163 | 164 | killNum, err := v.KillNumberForMaxPercentage(client, 50) // 50% means we kill between at most 50 pods of the 100 that are running 165 | assert.Nil(t, err, "Expected err to be nil but got %v", err) 166 | assert.Truef(t, killNum >= 0 && killNum <= 50, "Expected kill number between 0 and 50 pods, got %d", killNum) 167 | } 168 | 169 | func TestKillNumberForMaxPercentageInvalidValues(t *testing.T) { 170 | type TestCase struct { 171 | name string 172 | maxPercentage int 173 | expectedNum int 174 | expectedErr bool 175 | } 176 | 177 | tcs := []TestCase{ 178 | { 179 | name: "Negative value for maxPercentage", 180 | maxPercentage: -1, 181 | expectedNum: 0, 182 | expectedErr: true, 183 | }, 184 | { 185 | name: "0 value for maxPercentage", 186 | maxPercentage: 0, 187 | expectedNum: 0, 188 | expectedErr: false, 189 | }, 190 | { 191 | name: "maxPercentage greater than 100", 192 | maxPercentage: 110, 193 | expectedNum: 0, 194 | expectedErr: true, 195 | }, 196 | } 197 | 198 | for _, tc := range tcs { 199 | v := newVictimBase() 200 | client := fake.NewSimpleClientset() 201 | 202 | result, err := v.KillNumberForMaxPercentage(client, tc.maxPercentage) 203 | 204 | if tc.expectedErr { 205 | assert.NotNil(t, err, tc.name) 206 | } else { 207 | assert.Nil(t, err, tc.name) 208 | assert.Equal(t, result, tc.expectedNum, tc.name) 209 | } 210 | } 211 | } 212 | 213 | func TestDeletePodsFixedPercentage(t *testing.T) { 214 | type TestCase struct { 215 | name string 216 | killPercentage int 217 | pods []runtime.Object 218 | expectedNum int 219 | expectedErr bool 220 | } 221 | 222 | tcs := []TestCase{ 223 | { 224 | name: "negative value for killPercentage", 225 | killPercentage: -1, 226 | expectedNum: 0, 227 | expectedErr: true, 228 | }, 229 | { 230 | name: "0 value for killPercentage", 231 | killPercentage: 0, 232 | expectedNum: 0, 233 | expectedErr: false, 234 | }, 235 | { 236 | name: "killPercentage greater than 100", 237 | killPercentage: 110, 238 | expectedNum: 0, 239 | expectedErr: true, 240 | }, 241 | { 242 | name: "correctly calculates pods to kill based on killPercentage", 243 | killPercentage: 50, 244 | pods: generateNRunningPods("app", 10), 245 | expectedNum: 5, 246 | expectedErr: false, 247 | }, 248 | { 249 | name: "correctly floors fractional values for the number of pods to kill", 250 | killPercentage: 33, 251 | pods: generateNRunningPods("app", 10), 252 | expectedNum: 3, 253 | expectedErr: false, 254 | }, 255 | { 256 | name: "does not count pending pods when calculating num of pods to kill", 257 | killPercentage: 80, 258 | pods: append( 259 | generateNPods("running", 1, corev1.PodRunning), 260 | generateNPods("pending", 1, corev1.PodPending)...), 261 | expectedNum: 1, 262 | expectedErr: false, 263 | }, 264 | } 265 | 266 | for _, tc := range tcs { 267 | client := fake.NewSimpleClientset(tc.pods...) 268 | v := newVictimBase() 269 | 270 | result, err := v.KillNumberForFixedPercentage(client, tc.killPercentage) 271 | 272 | if tc.expectedErr { 273 | assert.NotNil(t, err, tc.name) 274 | } else { 275 | assert.Nil(t, err, tc.name) 276 | assert.Equal(t, tc.expectedNum, result, tc.name) 277 | } 278 | } 279 | 280 | } 281 | 282 | func TestDeleteRandomPod(t *testing.T) { 283 | 284 | v := newVictimBase() 285 | pod1 := newPod("app1", corev1.PodRunning) 286 | pod2 := newPod("app2", corev1.PodPending) 287 | 288 | client := fake.NewSimpleClientset(&pod1, &pod2) 289 | 290 | _ = v.DeleteRandomPod(client) 291 | podList := getPodList(client).Items 292 | assert.Len(t, podList, 1) 293 | 294 | err := v.DeleteRandomPods(client, 2) 295 | assert.EqualError(t, err, KIND+" "+NAME+" has no running pods at the moment") 296 | } 297 | 298 | func TestIsBlacklisted(t *testing.T) { 299 | 300 | v := newVictimBase() 301 | 302 | config.SetDefaults() 303 | 304 | b := v.IsBlacklisted() 305 | assert.False(t, b, "%s namespace should not be blacklisted", NAMESPACE) 306 | 307 | v = New("Pod", "name", metav1.NamespaceSystem, IDENTIFIER, 1) 308 | b = v.IsBlacklisted() 309 | assert.True(t, b, "%s namespace should be blacklisted", metav1.NamespaceSystem) 310 | 311 | } 312 | 313 | func TestIsWhitelisted(t *testing.T) { 314 | 315 | v := newVictimBase() 316 | 317 | config.SetDefaults() 318 | 319 | b := v.IsWhitelisted() 320 | assert.True(t, b, "%s namespace should be whitelisted", NAMESPACE) 321 | } 322 | 323 | func TestRandomPodName(t *testing.T) { 324 | 325 | pod1 := newPod("app1", corev1.PodRunning) 326 | pod2 := newPod("app2", corev1.PodPending) 327 | 328 | name := RandomPodName([]corev1.Pod{pod1, pod2}) 329 | assert.Truef(t, strings.HasPrefix(name, "app"), "Pod name %s should start with 'app'", name) 330 | } 331 | 332 | func TestGetDeleteOptsForPod(t *testing.T) { 333 | configuredGracePeriod := config.GracePeriodSeconds() 334 | 335 | v := newVictimBase() 336 | deleteOpts := v.GetDeleteOptsForPod() 337 | 338 | assert.Equal(t, deleteOpts.GracePeriodSeconds, configuredGracePeriod) 339 | 340 | } 341 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | 8 | "github.com/golang/glog" 9 | 10 | "kube-monkey/internal/pkg/config" 11 | "kube-monkey/internal/pkg/kubemonkey" 12 | ) 13 | 14 | func glogUsage() { 15 | fmt.Fprintf(os.Stderr, "usage: example -stderrthreshold=[INFO|WARN|FATAL] -log_dir=[string]\n") 16 | flag.PrintDefaults() 17 | os.Exit(2) 18 | } 19 | 20 | func initLogging() { 21 | // Check commandline options or "flags" for glog parameters 22 | // to be picked up by the glog module 23 | flag.Usage = glogUsage 24 | flag.Parse() 25 | 26 | if _, err := os.Stat(flag.Lookup("log_dir").Value.String()); os.IsNotExist(err) { 27 | err = os.MkdirAll(flag.Lookup("log_dir").Value.String(), os.ModePerm) 28 | if err != nil { 29 | glog.Errorf("Failed to open custom log directory at %s; defaulting to /tmp! Error: %v", flag.Lookup("log_dir").Value, err) 30 | } else { 31 | glog.V(5).Infof("Created custom logging %s directory!", flag.Lookup("log_dir").Value) 32 | } 33 | } 34 | // Since km runs as a k8 pod, log everything to stderr (stdout not supported) 35 | // this takes advantage of k8's logging driver allowing kubectl logs kube-monkey 36 | if err := flag.Lookup("alsologtostderr").Value.Set("true"); err != nil { 37 | glog.Errorf("Failed to set alsologtostderr. Error: %v", err) 38 | } 39 | } 40 | 41 | func initConfig() { 42 | if err := config.Init(); err != nil { 43 | glog.Fatal(err.Error()) 44 | } 45 | } 46 | 47 | func main() { 48 | // Initialize logging 49 | initLogging() 50 | 51 | // Initialize configs 52 | initConfig() 53 | 54 | glog.V(1).Infof("Starting kube-monkey with v logging level %v and local log directory %s", flag.Lookup("v").Value, flag.Lookup("log_dir").Value) 55 | 56 | if err := kubemonkey.Run(); err != nil { 57 | glog.Fatal(err.Error()) 58 | } 59 | } 60 | --------------------------------------------------------------------------------