├── .gitignore ├── Dockerfile ├── Gopkg.toml ├── LICENSE ├── Makefile ├── README.md ├── cmd └── operator │ └── main.go ├── controller └── crd.go ├── cruisecontrol ├── cruise-control.go └── cruise-control_test.go ├── deploy └── kafka-operator.yaml ├── e2e-tests ├── 01-test-basic-setup.bats ├── 02-test-kafka-setup.bats ├── files │ ├── 02-basic-cluster.yaml │ ├── kafka-operator.yaml │ └── manual-zookeeper.yaml ├── hack │ ├── delete-all.sh │ ├── delete-gcloud-cluster.sh │ ├── kubernetes_helper.bash │ └── setup-gcloud-cluster.sh └── run-tests.sh ├── example ├── busybox.yaml ├── kafka-cluster.yaml ├── kafka-operator.yaml ├── kafkaClient.yaml ├── kafkaFullOptions.yaml ├── manual-zookeeper.yaml ├── manualKafka.yaml ├── operator.yaml ├── prometheus.yaml ├── test.json └── test.yaml ├── images ├── cruise-control │ ├── Dockerfile │ ├── capacity.json │ ├── cruisecontrol.properties.tpl │ └── setup-cruise-control.sh └── kafka │ └── Dockerfile ├── kafka └── kafka.go ├── kube ├── cruisecontrol │ ├── cruise-control.go │ └── cruise-control_test.go ├── deploy-helper.go ├── exporter │ ├── exporter.go │ └── exporter_test.go ├── kafka │ ├── kafka.go │ ├── kafka_test.go │ ├── options.go │ └── options_test.go ├── kube.go ├── service-helper.go └── statefulset-helper.go ├── processor ├── metrics.go ├── processor.go └── processor_test.go ├── spec ├── register.go ├── spec.go └── spec_test.go └── util ├── util.go └── util_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | .idea/* 27 | 28 | #Do we need to push vendor? manifest.json should be determinist enough. 29 | vendor/ 30 | 31 | .vscode/ 32 | bin/ 33 | Gopkg.lock -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.6 2 | MAINTAINER Jakob Karalus 3 | 4 | ADD bin/kafka_operator /bin/usr/sbin/kafka_operator 5 | 6 | CMD ["/bin/usr/sbin/kafka_operator"] -------------------------------------------------------------------------------- /Gopkg.toml: -------------------------------------------------------------------------------- 1 | 2 | required = ["k8s.io/code-generator/cmd/client-gen"] 3 | 4 | 5 | #[[constraint]] 6 | # name = "github.com/Shopify/sarama" 7 | # version = "1.12.0" 8 | 9 | #[[constraint]] 10 | # name = "github.com/Sirupsen/logrus" 11 | # version = "1.0.2" 12 | 13 | #[[constraint]] 14 | # branch = "master" 15 | # name = "github.com/azer/snakecase" 16 | 17 | #[[constraint]] 18 | # branch = "master" 19 | # name = "github.com/google/gofuzz" 20 | 21 | #[[constraint]] 22 | # branch = "master" 23 | # name = "github.com/krallistic/kazoo-go" 24 | 25 | [[constraint]] 26 | name = "k8s.io/apimachinery" 27 | branch = "release-1.8" 28 | 29 | [[constraint]] 30 | name = "k8s.io/client-go" 31 | version = "5.0.0" 32 | 33 | [[constraint]] 34 | name = "k8s.io/api" 35 | branch = "release-1.8" 36 | 37 | [[constraint]] 38 | name = "k8s.io/code-generator" 39 | branch = "release-1.8" 40 | 41 | # goland/dep does not follow k8s.io/code-generator's Godeps.json and gengo's master 42 | # version does not work with release-1.8 k8s.io/code-generator. So we have to 43 | # override it. 44 | #[[override]] 45 | # name = "k8s.io/gengo" 46 | # revision = "9e661e9308f078838e266cca1c673922088c0ea4" 47 | 48 | 49 | [[constraint]] 50 | name = "github.com/kylelemons/godebug" 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for the Docker image 2 | # MAINTAINER: Jakob Karalus 3 | 4 | .PHONY: all build container push deploy clean test 5 | 6 | TAG ?= v0.2.5-dirty-5 7 | PREFIX ?= krallistic 8 | 9 | all: push images 10 | 11 | build: test 12 | GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o bin/kafka_operator cmd/operator/main.go 13 | 14 | container: build 15 | docker build -t $(PREFIX)/kafka-operator:$(TAG) . 16 | docker build -t $(PREFIX)/kafka-operator:latest . 17 | 18 | image-cc: 19 | docker build -t $(PREFIX)/cruise-control:latest images/cruise-control/ 20 | docker push $(PREFIX)/cruise-control:latest 21 | 22 | 23 | images: 24 | docker build -t $(PREFIX)/kafka-cc-reporter:latest images/kafka/ 25 | docker push $(PREFIX)/kafka-cc-reporter:latest 26 | 27 | push: container 28 | docker push $(PREFIX)/kafka-operator:$(TAG) 29 | docker push $(PREFIX)/kafka-operator:latest 30 | docker push $(PREFIX)/kafka-operator:$(TAG) 31 | 32 | deploy: container 33 | docker build -t $(PREFIX)/kafka-operator:latest . 34 | docker push $(PREFIX)/kafka-operator:latest 35 | docker push $(PREFIX)/kafka-operator:$(TAG) 36 | kubectl delete -f deploy/kafka-operator.yaml 37 | sleep 5 38 | # export TAG=$(TAG) 39 | # TAG=$(TAG) envsubst < deploy/kafka-operator.yaml.tpl > deploy/kafka-operator.yaml 40 | # sed -e "s/\${TAG}/$(TAG)/" template.txt deploy/kafka-operator.yaml.tpl 41 | kubectl apply -f deploy/kafka-operator.yaml 42 | 43 | clean: 44 | rm -f bin/kafka-operator 45 | kubectl delete -f deploy/kafka-operator.yaml 46 | 47 | test: 48 | go test $$(go list ./... | grep -v /vendor/) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Project inactive 2 | 3 | 4 | # kafka-operator - A Kafka Operator for Kubernetes 5 | 6 | A Kubernetes Operator for Apache Kafka, which deploys, configures and manages your kafka cluster through its lifecycle. 7 | Features: 8 | - Fixed deployment of a Cluster, Services and PersistentVolumes 9 | - Upscaling of Cluster (eg adding a Broker) 10 | - Downscaling a Cluster, without dataloss (removes partition of broker first, under development) 11 | - 12 | 13 | Upcoming Features/Ideas: 14 | - [] Vertical Pod Autoscaling 15 | - [] Managed Topics and hot partition detection/shuffling 16 | - [] Advanced Partition Shuffling (based on rate/size of incomming msg) 17 | 18 | 19 | Currently the Operator is under development. 20 | If you want to run Kafka in kubernetes a better option would be to look at the Helm Chart https://github.com/kubernetes/charts/blob/master/incubator/kafka/README.md alternative this: https://github.com/Yolean/kubernetes-kafka 21 | 22 | # How to use it: 23 | 24 | ## 1.) Deploy the Operator 25 | First we deploy the Operator inside our cluster: 26 | ```bash 27 | # kubectl apply -f example/kafka-operator.yaml 28 | deployment "kafka-operator" created 29 | ``` 30 | 31 | The Operator then creates a custom resource definition(CRD) "KafkaCluster" inside Kubernetes, which behaves like a normal k8s Object. 32 | The only difference is that no k8s internal components reacts to it, only our operator has a watch on it. 33 | 34 | ## 2) Deploy Zookeeper 35 | Currently you need to deploy zookeeper by yourself (since managing zookeeper is a not a easy to topic, this is out of scope for now). As a starter you can find a example under `example/manual-zookeeper.yaml` for a single node zookeeper. 36 | ```bash 37 | # kubectl apply -f example/manual-zookeeper.yaml 38 | service "zk-headless" created 39 | configmap "zk-config" created 40 | statefulset "zk" created 41 | ``` 42 | 43 | ## 3) Create a KafkaCluster spec and deploy 44 | To deploy a kafka cluster we create spec (example/kafkaObj.yaml): 45 | 46 | ```yaml 47 | apiVersion: "krallistic.github.com/v1" 48 | kind: "Kafkacluster" 49 | metadata: 50 | name: test-cluster-1 51 | spec: 52 | brokerCount: 3 53 | topics: 54 | - name: "test1" 55 | replicationFactor: 1 56 | partitions: 1 57 | - name: "test2" 58 | replicationFactor: 2 59 | partitions: 2 60 | kafkaOptions: 61 | logRetentionHours: 24 62 | autoCreateTopics: false 63 | compressionType: "gzip" 64 | zookeeperConnect: zk-headless.default.svc.cluster.local 65 | image: confluentinc/cp-kafka:latest 66 | leaderImbalanceRatio: 0.1 67 | leaderImbalanceInterval: 600 68 | storageClass: emptyDir 69 | minimumGracePeriod: 1200 70 | jmxSidecar: false 71 | resources: 72 | cpu: "1" 73 | memory: "1Gi" 74 | diskSpace: "50G" 75 | 76 | ``` 77 | We can then just deploy this yaml via kubectl: 78 | ```bash 79 | # kubectl apply -f example/kafka-cluster.yaml 80 | kafkacluster "test-cluster-1" created 81 | ``` 82 | into kubernetes. This creates a ```kafkacluster``` object inside the api server. We can check this with: 83 | ```bash 84 | # kubectl get kafkacluster 85 | NAME KIND 86 | test-cluster-1 Kafkacluster.v1.krallistic.github.com 87 | ``` 88 | 89 | The operators then picks up the newly created object and creates the actual pods which are needed for the spezified Kafka cluster. 90 | Create the whole cluster can take a while but after a bit you should see every broker running and services created to either access direclty or all broker load-balanced: 91 | ```bash 92 | # kubectl get pods,service 93 | NAME READY STATUS RESTARTS AGE 94 | po/kafka-offset-checker-test-cluster-1-3029848613-z8rtd 1/1 Running 3 1m 95 | po/kafka-operator-767603131-zcnt0 1/1 Running 0 1m 96 | po/test-cluster-1-0 1/1 Running 0 1m 97 | po/test-cluster-1-1 1/1 Running 0 54s 98 | po/test-cluster-1-2 1/1 Running 0 40s 99 | po/zk-0 1/1 Running 0 1m 100 | 101 | NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE 102 | svc/kubernetes 10.7.240.1 443/TCP 5h 103 | svc/test-cluster-1 None 9092/TCP 1m 104 | svc/test-cluster-1-broker-0 10.7.243.30 9092:31545/TCP 1m 105 | svc/test-cluster-1-broker-1 10.7.250.215 9092:31850/TCP 1m 106 | svc/test-cluster-1-broker-2 10.7.249.221 9092:32653/TCP 1m 107 | svc/zk-headless None 2888/TCP,3888/TCP 1m 108 | ``` 109 | 110 | ## 3) Resize the cluster 111 | If we want to upscale the cluster we can just change the ```brokerCount``` value. 112 | After we changed it (for example to `5`) we do a ```kubectl apply -f example/kafka-cluster.yaml```. 113 | The operators then should pick up the change and start upsizing the cluster: 114 | ```bash 115 | # kubectl apply -f example/kafka-cluster.yaml 116 | kafkacluster "test-cluster-1" configured 117 | kubectl get pods 118 | NAME READY STATUS RESTARTS AGE 119 | kafka-offset-checker-test-cluster-1-3029848613-z8rtd 1/1 Running 3 4m 120 | kafka-operator-767603131-zcnt0 1/1 Running 0 4m 121 | test-cluster-1-0 1/1 Running 0 4m 122 | test-cluster-1-1 1/1 Running 0 4m 123 | test-cluster-1-2 1/1 Running 0 3m 124 | test-cluster-1-3 0/1 Pending 0 35s 125 | zk-0 1/1 Running 0 4m 126 | ``` 127 | NOTE: Currently the operator does not automaticly rebalance topics with the new broker 128 | 129 | ### 3.b) Downscaling: 130 | While downscaling the cluster is possible and simple a simple rebalancing is done to prevent data-loss, this is currently heavy under development and considered unstable. 131 | 132 | ## 4) Delete the cluster 133 | When we are done, we can do a 134 | ```bash 135 | # kubectl delete -f example/kafka-cluster.yaml 136 | kafkacluster "test-cluster-1" deleted 137 | ``` 138 | to delete the `kafkaCluster` object. 139 | The operator then detects the deletion and shuts down all running components: 140 | ```bash 141 | # kubectl get pods 142 | NAME READY STATUS RESTARTS AGE 143 | kafka-operator-767603131-tv3ck 1/1 Running 0 1m 144 | test-cluster-1-0 0/1 Terminating 0 8m 145 | zk-0 1/1 Running 0 9m 146 | ``` 147 | 148 | # Known Issues / Open Tasks 149 | There are a couple of open Task/Issues, this is mainly just for me tracking progress: 150 | 151 | - [ ] Resisze Clusters (without Data Rebalancing) 152 | - [x] Delete Cluster 153 | - [ ] Dokumentation, Vendoring and Testing 154 | - [ ] Use Ressource (K8s and kafka Options) 155 | - [ ] Monitoring with JMX Sidecar 156 | - [ ] Automaticly Rebalacing 157 | - [ ] Investigate Datagravity 158 | 159 | 160 | ### Zookeeper 161 | To get Kafka Running a Zookeeper is needed. A simple one Node zK example is provided in the example Folder. But for any usage beyond testing/developing a proper Zookeeper setup should be used. A good example is the Zookeeper Chart in the offical Helm Repo. 162 | 163 | 164 | # Details 165 | 166 | ## Differences vs Helm Chart 167 | While a Helm is a great tool, and the provided kafka chart is also pretty dope, Helm only managees deployment of a cluster. But since kafka is a statefull application its needs goes beyond the normal capabilities you can do with vanilla kubernetes. For example a downsizing/upsizing of the cluster requires moving partitions/replicas off/onto brokers. To automate that the operator is used. It looks at the current cluster state and takes neccesary actions to 168 | 169 | ## Images: 170 | Currently the supported image is are the offical images from confluent. (https://github.com/confluentinc/cp-docker-images) While its possible to specify other images, due to instrumentation most other images wont work. 171 | 172 | # Development 173 | 174 | ## Dependency Managment 175 | 176 | ```dep``` is used for dependecy managment. 177 | 178 | Under `e2e-test/hack` are a couple of 179 | -------------------------------------------------------------------------------- /cmd/operator/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "os/signal" 6 | 7 | "os" 8 | "syscall" 9 | 10 | "net/http" 11 | 12 | log "github.com/Sirupsen/logrus" 13 | 14 | "github.com/prometheus/client_golang/prometheus/promhttp" 15 | 16 | "github.com/krallistic/kafka-operator/kube" 17 | "github.com/krallistic/kafka-operator/processor" 18 | 19 | "github.com/krallistic/kafka-operator/controller" 20 | ) 21 | 22 | var ( 23 | version = "0.0.1" 24 | kubeConfigFile string 25 | print bool 26 | masterHost string 27 | image string 28 | zookeeperConnect string 29 | 30 | metricListenAddress string 31 | metricListenPath string 32 | 33 | logLevel string 34 | 35 | namespace string 36 | 37 | logger = log.WithFields(log.Fields{ 38 | "package": "main", 39 | }) 40 | ) 41 | 42 | func init() { 43 | flag.BoolVar(&print, "print", false, "Show basic information and quit - debug") 44 | flag.StringVar(&kubeConfigFile, "kubeconfig", "", "Location of kubecfg file for access to kubernetes master service; --kube_master_url overrides the URL part of this; if neither this nor --kube_master_url are provided, defaults to service account tokens") 45 | flag.StringVar(&masterHost, "masterhost", "http://localhost:8080", "Full url to kubernetes api server") 46 | flag.StringVar(&image, "image", "confluentinc/cp-kafka:latest", "Image to use for Brokers") 47 | //flag.StringVar(&zookeerConnect, "zookeeperConnect", "zk-0.zk-headless.default.svc.cluster.local:2181", "Connect String to zK, if no string is give a custom zookeeper ist deployed") 48 | 49 | flag.StringVar(&logLevel, "log-level", "debug", "log level, one of debug, info, warn, error") 50 | 51 | flag.StringVar(&metricListenAddress, "listen-address", ":9090", "The address to listen on for HTTP requests.") 52 | flag.StringVar(&metricListenPath, "metric-path", "/metrics", "Path under which the the prometheus metrics can be found") 53 | flag.StringVar(&namespace, "namespace", "", "Namespace on which the operator listens to CR, if not set then all Namespaces will be used") 54 | flag.Parse() 55 | } 56 | 57 | func Main() int { 58 | //TODO make cmd-line flag 59 | level, err := log.ParseLevel(logLevel) 60 | if err != nil { 61 | log.WithField("error", err).Error("Error cant parse log-level, defaulting to info") 62 | level = log.InfoLevel 63 | } 64 | log.SetLevel(level) 65 | 66 | logger.WithFields(log.Fields{ 67 | "version": version, 68 | "masterHost": masterHost, 69 | "kubeconfig": kubeConfigFile, 70 | "image": image, 71 | "metric-listen-address": metricListenAddress, 72 | "metric-listen-path": metricListenPath, 73 | }).Info("Started kafka-operator with args") 74 | if print { 75 | logger.WithFields(log.Fields{"version": version}).Print("Operator Version") 76 | return 0 77 | } 78 | 79 | //Creating osSignals first so we can exit at any time. 80 | osSignals := make(chan os.Signal, 2) 81 | signal.Notify(osSignals, syscall.SIGINT, syscall.SIGKILL, os.Interrupt) 82 | 83 | controlChannel := make(chan int, 2) //TODO allows more finegranular Object? maybe a Struct? Replace with just osSignals? 84 | 85 | go func() { 86 | for { 87 | select { 88 | case sig := <-osSignals: 89 | logger.WithFields(log.Fields{"signal": sig}).Info("Got Signal from OS shutting Down: ") 90 | controlChannel <- 1 91 | //TODO Cleanup 92 | os.Exit(1) 93 | } 94 | } 95 | }() 96 | 97 | kube, err := kube.New(kubeConfigFile, masterHost) 98 | if err != nil { 99 | logger.WithFields(log.Fields{ 100 | "error": err, 101 | "configFile": kubeConfigFile, 102 | "masterHost": masterHost, 103 | }).Fatal("Error initilizing kubernetes client ") 104 | return 1 105 | } 106 | 107 | cdrClient, err := controller.New(kubeConfigFile, masterHost, namespace) 108 | if err != nil { 109 | logger.WithFields(log.Fields{ 110 | "error": err, 111 | "configFile": kubeConfigFile, 112 | "masterHost": masterHost, 113 | }).Fatal("Error initilizing ThirdPartyRessource (KafkaClusters) client ") 114 | return 1 115 | } 116 | 117 | cdrClient.CreateCustomResourceDefinition() 118 | 119 | processor, err := processor.New(image, *cdrClient, controlChannel, *kube) 120 | processor.Run() 121 | 122 | http.Handle(metricListenPath, promhttp.Handler()) 123 | //Blocking ListenAndServer, so we dont exit 124 | logger.Fatal(http.ListenAndServe(metricListenAddress, nil)) 125 | logger.Info("Exiting now") 126 | 127 | return 0 128 | } 129 | 130 | func main() { 131 | os.Exit(Main()) 132 | } 133 | -------------------------------------------------------------------------------- /controller/crd.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "time" 7 | 8 | "github.com/krallistic/kafka-operator/spec" 9 | "github.com/krallistic/kafka-operator/util" 10 | 11 | log "github.com/Sirupsen/logrus" 12 | 13 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 | "k8s.io/apimachinery/pkg/runtime" 15 | "k8s.io/apimachinery/pkg/runtime/serializer" 16 | "k8s.io/apimachinery/pkg/util/errors" 17 | "k8s.io/apimachinery/pkg/util/wait" 18 | //"k8s.io/client-go/kubernetes" 19 | //"k8s.io/client-go/pkg/api" 20 | 21 | "k8s.io/client-go/rest" 22 | 23 | apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" 24 | 25 | "k8s.io/api/core/v1" 26 | 27 | apiextensionsv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1" 28 | 29 | "k8s.io/apimachinery/pkg/fields" 30 | "k8s.io/client-go/tools/cache" 31 | ) 32 | 33 | var ( 34 | logger = log.WithFields(log.Fields{ 35 | "package": "controller/crd", 36 | }) 37 | ) 38 | 39 | const ( 40 | //tprShortName = "kafka-cluster" 41 | //tprSuffix = "incubator.test.com" 42 | //tprFullName = tprShortName + "." + tprSuffix 43 | //API Name is used in the watch of the API, it defined as tprShorName, removal of -, and suffix s 44 | //tprApiName = "kafkaclusters" 45 | //tprVersion = "v1" 46 | 47 | ) 48 | 49 | type CustomResourceController struct { 50 | ApiExtensionsClient *apiextensionsclient.Clientset 51 | DefaultOption metav1.GetOptions 52 | crdClient *rest.RESTClient 53 | namespace string 54 | } 55 | 56 | func New(kubeConfigFile, masterHost string, namespace string) (*CustomResourceController, error) { 57 | methodLogger := logger.WithFields(log.Fields{"method": "New"}) 58 | 59 | // Create the client config. Use kubeconfig if given, otherwise assume in-cluster. 60 | config, err := util.BuildConfig(kubeConfigFile) 61 | 62 | apiextensionsclientset, err := apiextensionsclient.NewForConfig(config) 63 | if err != nil { 64 | methodLogger.WithFields(log.Fields{ 65 | "error": err, 66 | "config": config, 67 | "client": apiextensionsclientset, 68 | }).Error("could not init Kubernetes client") 69 | return nil, err 70 | } 71 | 72 | crdClient, err := newCRDClient(config) 73 | if err != nil { 74 | methodLogger.WithFields(log.Fields{ 75 | "Error": err, 76 | "Client": crdClient, 77 | "Config": config, 78 | }).Error("Could not initialize CustomResourceDefinition Kafkacluster cLient") 79 | return nil, err 80 | } 81 | var ns string 82 | if namespace == "" { 83 | ns = v1.NamespaceAll 84 | } else { 85 | ns = namespace 86 | } 87 | 88 | k := &CustomResourceController{ 89 | crdClient: crdClient, 90 | ApiExtensionsClient: apiextensionsclientset, 91 | namespace: ns, 92 | } 93 | methodLogger.Info("Initilized CustomResourceDefinition Kafkacluster cLient") 94 | 95 | return k, nil 96 | } 97 | 98 | func (c *CustomResourceController) Watch(client *rest.RESTClient, eventsChannel chan spec.KafkaclusterWatchEvent, signalChannel chan int) { 99 | methodLogger := logger.WithFields(log.Fields{"method": "Watch"}) 100 | 101 | stop := make(chan struct{}, 1) 102 | source := cache.NewListWatchFromClient( 103 | client, 104 | spec.CRDRessourcePlural, 105 | c.namespace, 106 | fields.Everything()) 107 | 108 | store, controller := cache.NewInformer( 109 | source, 110 | 111 | &spec.Kafkacluster{}, 112 | 113 | // resyncPeriod 114 | // Every resyncPeriod, all resources in the cache will retrigger events. 115 | // Set to 0 to disable the resync. 116 | 0, 117 | 118 | cache.ResourceEventHandlerFuncs{ 119 | AddFunc: func(obj interface{}) { 120 | cluster := obj.(*spec.Kafkacluster) 121 | methodLogger.WithFields(log.Fields{"watchFunction": "ADDED"}).Info(spec.PrintCluster(cluster)) 122 | var event spec.KafkaclusterWatchEvent 123 | //TODO 124 | event.Type = "ADDED" 125 | event.Object = *cluster 126 | eventsChannel <- event 127 | }, 128 | 129 | UpdateFunc: func(old, new interface{}) { 130 | oldCluster := old.(*spec.Kafkacluster) 131 | newCluster := new.(*spec.Kafkacluster) 132 | methodLogger.WithFields(log.Fields{ 133 | "eventType": "UPDATED", 134 | "old": spec.PrintCluster(oldCluster), 135 | "new": spec.PrintCluster(newCluster), 136 | }).Debug("Recieved Update Event") 137 | var event spec.KafkaclusterWatchEvent 138 | //TODO refactor this. use old/new in EventChannel 139 | event.Type = "UPDATED" 140 | event.Object = *newCluster 141 | event.OldObject = *oldCluster 142 | eventsChannel <- event 143 | }, 144 | 145 | DeleteFunc: func(obj interface{}) { 146 | cluster := obj.(*spec.Kafkacluster) 147 | var event spec.KafkaclusterWatchEvent 148 | event.Type = "DELETED" 149 | event.Object = *cluster 150 | eventsChannel <- event 151 | }, 152 | }) 153 | 154 | // the controller run starts the event processing loop 155 | go controller.Run(stop) 156 | methodLogger.Info(store) 157 | 158 | go func() { 159 | select { 160 | case <-signalChannel: 161 | methodLogger.Warn("recieved shutdown signal, stopping informer") 162 | close(stop) 163 | } 164 | }() 165 | } 166 | 167 | func (c *CustomResourceController) MonitorKafkaEvents(eventsChannel chan spec.KafkaclusterWatchEvent, signalChannel chan int) { 168 | methodLogger := logger.WithFields(log.Fields{"method": "MonitorKafkaEvents"}) 169 | methodLogger.Info("Starting Watch") 170 | c.Watch(c.crdClient, eventsChannel, signalChannel) 171 | } 172 | 173 | func configureConfig(cfg *rest.Config) error { 174 | scheme := runtime.NewScheme() 175 | 176 | if err := spec.AddToScheme(scheme); err != nil { 177 | return err 178 | } 179 | 180 | cfg.GroupVersion = &spec.SchemeGroupVersion 181 | cfg.APIPath = "/apis" 182 | cfg.ContentType = runtime.ContentTypeJSON 183 | cfg.NegotiatedSerializer = serializer.DirectCodecFactory{CodecFactory: serializer.NewCodecFactory(scheme)} 184 | 185 | return nil 186 | } 187 | 188 | func newCRDClient(config *rest.Config) (*rest.RESTClient, error) { 189 | 190 | var cdrconfig *rest.Config 191 | cdrconfig = config 192 | configureConfig(cdrconfig) 193 | 194 | crdClient, err := rest.RESTClientFor(cdrconfig) 195 | if err != nil { 196 | panic(err) 197 | } 198 | 199 | return crdClient, nil 200 | } 201 | 202 | func (c *CustomResourceController) CreateCustomResourceDefinition() (*apiextensionsv1beta1.CustomResourceDefinition, error) { 203 | methodLogger := logger.WithFields(log.Fields{"method": "CreateCustomResourceDefinition"}) 204 | 205 | crd := &apiextensionsv1beta1.CustomResourceDefinition{ 206 | ObjectMeta: metav1.ObjectMeta{ 207 | Name: spec.CRDFullName, 208 | }, 209 | Spec: apiextensionsv1beta1.CustomResourceDefinitionSpec{ 210 | Group: spec.CRDGroupName, 211 | Version: spec.CRDVersion, 212 | Scope: apiextensionsv1beta1.NamespaceScoped, 213 | Names: apiextensionsv1beta1.CustomResourceDefinitionNames{ 214 | Plural: spec.CRDRessourcePlural, 215 | Kind: reflect.TypeOf(spec.Kafkacluster{}).Name(), 216 | }, 217 | }, 218 | } 219 | _, err := c.ApiExtensionsClient.ApiextensionsV1beta1().CustomResourceDefinitions().Create(crd) 220 | if err != nil { 221 | methodLogger.WithFields(log.Fields{ 222 | "error": err, 223 | "crd": crd, 224 | }).Error("Error while creating CRD") 225 | return nil, err 226 | } 227 | 228 | // wait for CRD being established 229 | methodLogger.Debug("Created CRD, wating till its established") 230 | err = wait.Poll(500*time.Millisecond, 60*time.Second, func() (bool, error) { 231 | crd, err = c.ApiExtensionsClient.ApiextensionsV1beta1().CustomResourceDefinitions().Get(spec.CRDFullName, metav1.GetOptions{}) 232 | if err != nil { 233 | return false, err 234 | } 235 | for _, cond := range crd.Status.Conditions { 236 | switch cond.Type { 237 | case apiextensionsv1beta1.Established: 238 | if cond.Status == apiextensionsv1beta1.ConditionTrue { 239 | return true, err 240 | } 241 | case apiextensionsv1beta1.NamesAccepted: 242 | if cond.Status == apiextensionsv1beta1.ConditionFalse { 243 | fmt.Printf("Name conflict: %v\n", cond.Reason) 244 | methodLogger.WithFields(log.Fields{ 245 | "error": err, 246 | "crd": crd, 247 | "reason": cond.Reason, 248 | }).Error("Naming Conflict with created CRD") 249 | } 250 | } 251 | } 252 | return false, err 253 | }) 254 | if err != nil { 255 | deleteErr := c.ApiExtensionsClient.ApiextensionsV1beta1().CustomResourceDefinitions().Delete(spec.CRDFullName, nil) 256 | if deleteErr != nil { 257 | return nil, errors.NewAggregate([]error{err, deleteErr}) 258 | } 259 | return nil, err 260 | } 261 | return crd, nil 262 | } 263 | 264 | func (c *CustomResourceController) GetKafkaClusters() ([]spec.Kafkacluster, error) { 265 | methodLogger := logger.WithFields(log.Fields{"method": "GetKafkaClusters"}) 266 | 267 | exampleList := spec.KafkaclusterList{} 268 | err := c.crdClient.Get().Resource(spec.CRDRessourcePlural).Do().Into(&exampleList) 269 | 270 | if err != nil { 271 | methodLogger.WithFields(log.Fields{ 272 | "response": exampleList, 273 | "error": err, 274 | }).Error("Error response from API") 275 | return nil, err 276 | } 277 | methodLogger.WithFields(log.Fields{ 278 | "response": exampleList, 279 | }).Info("KafkaCluster received") 280 | 281 | return exampleList.Items, nil 282 | } 283 | 284 | func (c *CustomResourceController) SetKafkaclusterState(cluster spec.Kafkacluster) error { 285 | methodLogger := logger.WithFields(log.Fields{ 286 | "method": "SetKafkaclusterState", 287 | "name": cluster.ObjectMeta.Name, 288 | "namespace": cluster.ObjectMeta.Namespace, 289 | }) 290 | 291 | methodLogger.Debug("setting state for cluster") 292 | 293 | var result spec.Kafkacluster 294 | err := c.crdClient.Put(). //TODO check if PATCH is maybe better 295 | Resource(spec.CRDRessourcePlural). 296 | Namespace(cluster.ObjectMeta.Namespace). 297 | Body(cluster). 298 | Do().Into(&result) 299 | 300 | if err != nil { 301 | methodLogger.Error("Cant set state on CRD") 302 | } 303 | 304 | methodLogger.WithField("result", result).Debug("Set state for CRD") 305 | return err 306 | } 307 | -------------------------------------------------------------------------------- /cruisecontrol/cruise-control.go: -------------------------------------------------------------------------------- 1 | package cruisecontrol 2 | 3 | import ( 4 | "net/http" 5 | 6 | "io/ioutil" 7 | 8 | "errors" 9 | 10 | "github.com/krallistic/kafka-operator/spec" 11 | 12 | cruisecontrol_kube "github.com/krallistic/kafka-operator/kube/cruisecontrol" 13 | 14 | log "github.com/Sirupsen/logrus" 15 | ) 16 | 17 | type CruiseControlExecuterState int32 18 | type CruiseControlMonitorState int32 19 | 20 | type CruiseControlState struct { 21 | MonitorState CruiseControlMonitorState 22 | ExecuterState CruiseControlExecuterState 23 | ReplicasToMove int 24 | FinshedReplicas int 25 | BootstrapProgress int 26 | ValidPartitions int 27 | TotalPartitions int 28 | ProposalReady bool 29 | } 30 | 31 | const ( 32 | basePath = "kafkacruisecontrol" 33 | statusBasePath = "status" 34 | 35 | removeBrokerAction = "remove_broker" 36 | addBrokerAction = "add_broker" 37 | 38 | NO_TASK = iota + 1 39 | EXECUTION_STARTED 40 | REPLICA_MOVEMENT_IN_PROGRESS 41 | LEADER_MOVEMENT_IN_PROGRESS 42 | ) 43 | 44 | func GetCruiseControlStatus(url string) (string, error) { 45 | methodLogger := log.WithFields(log.Fields{ 46 | "method": "GetCruiseControlStatus", 47 | }) 48 | methodLogger.Info("Getting Cruise Control Status") 49 | 50 | requestURl := url + "/" + statusBasePath 51 | rsp, err := http.Get(requestURl) 52 | if err != nil { 53 | return "nil", err 54 | } 55 | defer rsp.Body.Close() 56 | if rsp.StatusCode != 200 { 57 | methodLogger.WithField("response", rsp).Warn("Got non 200 response from CruiseControl while reading state") 58 | return "nil", errors.New("Non 200 error code from cruise-control while reading state: " + rsp.Status) 59 | } 60 | body, err := ioutil.ReadAll(rsp.Body) 61 | if err != nil { 62 | methodLogger.WithField("error", err).Error("Error while reading response body from cruisecontrol state") 63 | return "nil", err 64 | } 65 | sBody := string(body) 66 | 67 | //TODO parse sBody Respone 68 | 69 | return sBody, nil 70 | } 71 | 72 | func DownsizeCluster(cluster spec.Kafkacluster, brokerToDelete string) error { 73 | methodLogger := log.WithFields(log.Fields{ 74 | "method": "DownsizeCluster", 75 | "name": cluster.ObjectMeta.Name, 76 | "namespace": cluster.ObjectMeta.Namespace, 77 | }) 78 | //TODO generate Cluster CruiseControl Service URL 79 | // 80 | 81 | cruiseControlURL := "http://" + cruisecontrol_kube.GetCruiseControlName(cluster) + "." + cluster.ObjectMeta.Namespace + ".svc.cluster.local:9095" 82 | 83 | options := map[string]string{ 84 | "brokerid": brokerToDelete, 85 | "dryrun": "false", 86 | } 87 | 88 | rsp, err := postCruiseControl(cruiseControlURL, removeBrokerAction, options) 89 | if err != nil { 90 | methodLogger.Error("Cant downsize cluster since post to cc failed") 91 | //TODO do-something? 92 | return err 93 | } 94 | methodLogger.WithField("response", rsp).Info("Initiated Dowsize to cruise control") 95 | return nil 96 | } 97 | 98 | func postCruiseControl(url string, action string, options map[string]string) (*http.Response, error) { 99 | methodLogger := log.WithFields(log.Fields{ 100 | "method": "callCruiseControl", 101 | "request-url": url, 102 | "request_values": options, 103 | "action": action, 104 | }) 105 | methodLogger.Info("Calling Cruise Control") 106 | 107 | optionURL := "" 108 | for option, value := range options { 109 | optionURL = optionURL + option + "=" + value + "&&" 110 | } 111 | 112 | requestURl := url + "/" + basePath + "/" + action + "?" + optionURL 113 | rsp, err := http.Post(requestURl, "text/plain", nil) 114 | if err != nil { 115 | methodLogger.WithField("error", err).Error("Error while talking to cruise-control") 116 | return nil, err 117 | } 118 | if rsp.StatusCode != 200 { 119 | methodLogger.WithField("response", rsp).Warn("Got non 200 response from CruiseControl") 120 | return nil, errors.New("Non 200 response from cruise-control: " + rsp.Status) 121 | } 122 | 123 | return rsp, nil 124 | } 125 | -------------------------------------------------------------------------------- /cruisecontrol/cruise-control_test.go: -------------------------------------------------------------------------------- 1 | package cruisecontrol 2 | 3 | import ( 4 | "fmt" 5 | "net/http" 6 | "net/http/httptest" 7 | "testing" 8 | ) 9 | 10 | // func TestClientUtil_DownsizeCluster_removeBroker(t *testing.T) { 11 | 12 | // testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 13 | // w.WriteHeader(http.StatusOK) 14 | // fmt.Println(r.URL) 15 | // if r.Method != "POST" { 16 | // t.Errorf("Expected 'POST' request, got ‘%s’", r.Method) 17 | // } 18 | // if r.URL.EscapedPath() != "/kafkacruisecontrol/remove_broker" { 19 | // t.Errorf("Expected request to '/kafkacruisecontrol/remove_broker', got ‘%s’", r.URL.EscapedPath()) 20 | // } 21 | // r.ParseForm() 22 | // brokerID := r.Form.Get("brokerid") 23 | // if brokerID != "1" { 24 | // t.Errorf("Expected request to have brokerid=1’, got: ‘%s’", brokerID) 25 | // } 26 | // dryrun := r.Form.Get("dryrun") 27 | // if dryrun != "false" { 28 | // t.Errorf("Expected request to have dryrun=false’, got: ‘%s’", dryrun) 29 | // } 30 | // })) 31 | 32 | // defer testServer.Close() 33 | 34 | // spec := spec.Kafkacluster{ 35 | // ObjectMeta: metav1.ObjectMeta{ 36 | // Name: "test-cluster", 37 | // Namespace: "test", 38 | // }, 39 | // Spec: spec.KafkaclusterSpec{ 40 | // Image: "testImage", 41 | // BrokerCount: 1, 42 | // JmxSidecar: false, 43 | // ZookeeperConnect: "testZookeeperConnect", 44 | // }, 45 | // } 46 | 47 | // err := DownsizeCluster(spec, "2") 48 | // if err != nil { 49 | // t.Errorf("Unexcepted returned Error", err) 50 | // } 51 | 52 | // } 53 | 54 | func TestClientUtil_postCruiseControl_removeBroker(t *testing.T) { 55 | 56 | testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 57 | w.WriteHeader(http.StatusOK) 58 | fmt.Println(r) 59 | if r.Method != "POST" { 60 | t.Errorf("Expected 'POST' request, got ‘%s’", r.Method) 61 | } 62 | if r.URL.EscapedPath() != "/kafkacruisecontrol/remove_broker" { 63 | t.Errorf("Expected request to '/kafkacruisecontrol/remove_broker', got ‘%s’", r.URL.EscapedPath()) 64 | } 65 | r.ParseForm() 66 | brokerID := r.Form.Get("brokerid") 67 | if brokerID != "1" { 68 | t.Errorf("Expected request to have brokerid=1’, got: ‘%s’", brokerID) 69 | } 70 | dryrun := r.Form.Get("dryrun") 71 | if dryrun != "false" { 72 | t.Errorf("Expected request to have dryrun=false’, got: ‘%s’", dryrun) 73 | } 74 | })) 75 | defer testServer.Close() 76 | requestUrl := testServer.URL 77 | 78 | values := map[string]string{ 79 | "brokerid": "1", 80 | "dryrun": "false", 81 | } 82 | 83 | _, err := postCruiseControl(requestUrl, "remove_broker", values) 84 | if err != nil { 85 | t.Errorf("Unexcepted Error returned", err) 86 | } 87 | 88 | } 89 | -------------------------------------------------------------------------------- /deploy/kafka-operator.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: kafka-operator 5 | spec: 6 | replicas: 1 7 | template: 8 | metadata: 9 | labels: 10 | name: kafka-operator 11 | type: operator 12 | spec: 13 | containers: 14 | - name: operator 15 | image: krallistic/kafka-operator:latest -------------------------------------------------------------------------------- /e2e-tests/01-test-basic-setup.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "hack/kubernetes_helper" 4 | 5 | 6 | setup() { 7 | echo "Setup" 8 | kubectl apply -f files/kafka-operator.yaml 9 | wait_for_operator_running_or_fail 10 | } 11 | 12 | teardown() { 13 | echo "Teardown" 14 | kubectl delete -f files/kafka-operator.yaml 15 | 16 | } 17 | 18 | @test "Test if operator is running" { 19 | wait_for_operator_running_or_fail 20 | } 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /e2e-tests/02-test-kafka-setup.bats: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bats 2 | 3 | load "hack/kubernetes_helper" 4 | 5 | 6 | #Global Suite setup, bats dont support these, so hack with BATS_TESTNumer 7 | suite-setup() { 8 | if [ "$BATS_TEST_NUMBER" -eq 1 ]; then 9 | echo "Global Setup" 10 | kubectl apply -f files/manual-zookeeper.yaml 11 | kubectl apply -f files/kafka-operator.yaml 12 | wait_for_operator_running_or_fail 13 | wait_for_zookeeper_running_or_fail 14 | kubectl apply -f files/02-basic-cluster.yaml 15 | wait_for_brokers_running_or_fail 3 16 | fi 17 | } 18 | 19 | suite-teardown() { 20 | if [ "$BATS_TEST_NUMBER" -eq 3 ]; then 21 | echo "Global Teardown" 22 | kubectl delete -f files/02-basic-cluster.yaml 23 | kubectl delete -f files/manual-zookeeper.yaml 24 | #TODO wait till sts is fully deleted 25 | kubectl delete -f files/kafka-operator.yaml 26 | kubectl delete statefulset test-cluster-1 27 | fi 28 | } 29 | 30 | setup() { 31 | #Empty 32 | suite-setup 33 | echo "Setup" 34 | } 35 | 36 | teardown() { 37 | #Empty for now since we want a global setup 38 | echo "Teardown" 39 | suite-teardown 40 | } 41 | 42 | @test "Test if headless service is created" { 43 | run kubectl get svc test-cluster-1 44 | [ "$status" -eq 0 ] 45 | } 46 | 47 | @test "Test if direct Broker are created" { 48 | run kubectl get svc test-cluster-1-broker-0 49 | [ "$status" -eq 0 ] 50 | run kubectl get svc test-cluster-1-broker-1 51 | [ "$status" -eq 0 ] 52 | run kubectl get svc test-cluster-1-broker-2 53 | [ "$status" -eq 0 ] 54 | } 55 | 56 | @test "test brokers are created and running" { 57 | wait_for_brokers_running_or_fail 3 58 | } 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /e2e-tests/files/02-basic-cluster.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "krallistic.github.com/v1" 2 | kind: "Kafkacluster" 3 | metadata: 4 | name: test-cluster-1 5 | spec: 6 | brokerCount: 3 7 | topics: 8 | - name: "test1" 9 | replicationFactor: 1 10 | partitions: 1 11 | - name: "test2" 12 | replicationFactor: 2 13 | partitions: 2 14 | kafkaOptions: 15 | logRetentionHours: 24 16 | autoCreateTopics: false 17 | compressionType: "gzip" 18 | zookeeperConnect: zk-headless.default.svc.cluster.local 19 | image: confluentinc/cp-kafka:latest 20 | leaderImbalanceRatio: 0.1 21 | leaderImbalanceInterval: 600 22 | storageClass: emptyDir 23 | minimumGracePeriod: 1200 24 | jmxSidecar: false 25 | resources: 26 | cpu: "1" 27 | memory: "1Gi" 28 | diskSpace: "50G" 29 | 30 | -------------------------------------------------------------------------------- /e2e-tests/files/kafka-operator.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: kafka-operator 5 | spec: 6 | replicas: 1 7 | template: 8 | metadata: 9 | labels: 10 | name: kafka-operator 11 | type: operator 12 | spec: 13 | containers: 14 | - name: operator 15 | image: krallistic/kafka-operator:latest -------------------------------------------------------------------------------- /e2e-tests/files/manual-zookeeper.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: zk-headless 5 | labels: 6 | app: zk-headless 7 | spec: 8 | ports: 9 | - port: 2888 10 | name: server 11 | - port: 3888 12 | name: leader-election 13 | clusterIP: None 14 | selector: 15 | app: zk 16 | --- 17 | apiVersion: v1 18 | kind: ConfigMap 19 | metadata: 20 | name: zk-config 21 | data: 22 | ensemble: "zk-0" 23 | jvm.heap: "2G" 24 | tick: "2000" 25 | init: "10" 26 | sync: "5" 27 | client.cnxns: "60" 28 | snap.retain: "3" 29 | purge.interval: "1" 30 | 31 | --- 32 | apiVersion: apps/v1beta1 33 | kind: StatefulSet 34 | metadata: 35 | name: zk 36 | spec: 37 | serviceName: zk-headless 38 | replicas: 1 39 | template: 40 | metadata: 41 | labels: 42 | app: zk 43 | annotations: 44 | pod.alpha.kubernetes.io/initialized: "true" 45 | spec: 46 | containers: 47 | - name: k8szk 48 | imagePullPolicy: Always 49 | image: gcr.io/google_samples/k8szk:v1 50 | ports: 51 | - containerPort: 2181 52 | name: client 53 | - containerPort: 2888 54 | name: server 55 | - containerPort: 3888 56 | name: leader-election 57 | env: 58 | - name : ZK_ENSEMBLE 59 | valueFrom: 60 | configMapKeyRef: 61 | name: zk-config 62 | key: ensemble 63 | - name : ZK_HEAP_SIZE 64 | valueFrom: 65 | configMapKeyRef: 66 | name: zk-config 67 | key: jvm.heap 68 | - name : ZK_TICK_TIME 69 | valueFrom: 70 | configMapKeyRef: 71 | name: zk-config 72 | key: tick 73 | - name : ZK_INIT_LIMIT 74 | valueFrom: 75 | configMapKeyRef: 76 | name: zk-config 77 | key: init 78 | - name : ZK_SYNC_LIMIT 79 | valueFrom: 80 | configMapKeyRef: 81 | name: zk-config 82 | key: tick 83 | - name : ZK_MAX_CLIENT_CNXNS 84 | valueFrom: 85 | configMapKeyRef: 86 | name: zk-config 87 | key: client.cnxns 88 | - name: ZK_SNAP_RETAIN_COUNT 89 | valueFrom: 90 | configMapKeyRef: 91 | name: zk-config 92 | key: snap.retain 93 | - name: ZK_PURGE_INTERVAL 94 | valueFrom: 95 | configMapKeyRef: 96 | name: zk-config 97 | key: purge.interval 98 | - name: ZK_CLIENT_PORT 99 | value: "2181" 100 | - name: ZK_SERVER_PORT 101 | value: "2888" 102 | - name: ZK_ELECTION_PORT 103 | value: "3888" 104 | 105 | command: 106 | - sh 107 | - -c 108 | - zkGenConfig.sh && zkServer.sh start-foreground 109 | readinessProbe: 110 | exec: 111 | command: 112 | - "zkOk.sh" 113 | initialDelaySeconds: 15 114 | timeoutSeconds: 5 115 | livenessProbe: 116 | exec: 117 | command: 118 | - "zkOk.sh" 119 | initialDelaySeconds: 15 120 | timeoutSeconds: 5 121 | volumeMounts: 122 | - name: datadir 123 | mountPath: /var/lib/zookeeper 124 | securityContext: 125 | runAsUser: 1000 126 | fsGroup: 1000 127 | volumes: 128 | - name: datadir 129 | emptyDir: {} -------------------------------------------------------------------------------- /e2e-tests/hack/delete-all.sh: -------------------------------------------------------------------------------- 1 | kubectl delete -f files/02-basic-cluster.yaml 2 | kubectl delete -f files/02-basic-cluster.yaml 3 | kubectl delete -f files/manual-zookeeper.yaml 4 | kubectl delete -f files/kafka-operator.yaml 5 | kubectl delete statefulset test-cluster-1 6 | kubectl delete service test-cluster-1 7 | kubectl delete service test-cluster-1-broker-0 8 | kubectl delete service test-cluster-1-broker-1 9 | kubectl delete service test-cluster-1-broker-2 -------------------------------------------------------------------------------- /e2e-tests/hack/delete-gcloud-cluster.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | gcloud container clusters delete kafka-operator-test-cluster --no-async 4 | -------------------------------------------------------------------------------- /e2e-tests/hack/kubernetes_helper.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | is_operator_running() { 4 | PHASE=$(kubectl get pod -l name=kafka-operator,type=operator -ojson | jq -r '.items[] | .status.phase') 5 | EXPECTED='Running' 6 | if [ "$EXPECTED" = "$PHASE" ] 7 | then 8 | return true 9 | else 10 | return false 11 | fi 12 | } 13 | 14 | wait_for_operator_running_or_fail() { 15 | for try in {1..10} ; do 16 | PHASE=$(kubectl get pod -l name=kafka-operator,type=operator -ojson | jq -r '.items[] | .status.phase') 17 | EXPECTED='Running' 18 | if [ "$EXPECTED" = "$PHASE" ] 19 | then 20 | return 0 21 | else 22 | sleep 10 23 | fi 24 | done 25 | echo "Waited for 100 seconds, operator not ready" 26 | return 1 27 | } 28 | 29 | wait_for_zookeeper_running_or_fail() { 30 | for try in {1..10} ; do 31 | PHASE=$(kubectl get pod -l app=zk -ojson | jq -r '.items[] | .status.phase') 32 | EXPECTED='Running' 33 | if [ "$EXPECTED" = "$PHASE" ] 34 | then 35 | return 0 36 | else 37 | sleep 10 38 | fi 39 | done 40 | echo "Waited for 100 seconds, zookeeper not ready" 41 | return 1 42 | } 43 | 44 | wait_for_broker_X_running_or_fail() { 45 | echo "Waiting till broker $1 is ready" 46 | START_=0 47 | END=18 48 | while [[ $i -le $END ]] 49 | do 50 | PHASE=$(kubectl get pod -l creator=kafka-operator,kafka_broker_id=$1 -ojson | jq -r '.items[] | .status.phase') 51 | EXPECTED='Running' 52 | if [ "$EXPECTED" = "$PHASE" ] 53 | then 54 | return 0 55 | else 56 | echo "Sleeping 10" 57 | sleep 10 58 | fi 59 | ((i = i + 1)) 60 | done 61 | echo "Waited for 180 seconds, Broker not ready" 62 | return 1 63 | } 64 | 65 | wait_for_brokers_running_or_fail() { 66 | START_INDEX=0 67 | END_INDEX=2 68 | ## save $START, just in case if we need it later ## 69 | index=$START_INDEX 70 | while [[ $index -le $END_INDEX ]] 71 | do 72 | wait_for_broker_X_running_or_fail "$index" 73 | if [ "$?" -eq 1 ] 74 | then 75 | echo "Waited to long for Broker $index" 76 | return 1 77 | fi 78 | ((index = index + 1)) 79 | done 80 | return 0 81 | } 82 | -------------------------------------------------------------------------------- /e2e-tests/hack/setup-gcloud-cluster.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | gcloud config set compute/zone europe-west1-d 4 | 5 | 6 | gcloud container clusters create kafka-operator-test-cluster \ 7 | --num-nodes 3 \ 8 | --machine-type n1-standard-2 \ 9 | --scopes storage-rw \ 10 | --preemptible \ 11 | --cluster-version=1.7.6-gke.1 \ 12 | --no-async \ 13 | --enable-kubernetes-alpha 14 | 15 | gcloud container clusters get-credentials kafka-operator-test-cluster 16 | -------------------------------------------------------------------------------- /e2e-tests/run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo "Setting Up Cluster" 3 | source hack/setup-gcloud-cluster.sh 4 | 5 | echo "Running BATS Tests" 6 | bats 01-test-basic-setup.bats 7 | bats 02-test-kafka-setup.bats 8 | 9 | echo "Destroying Cluster" 10 | source hack/delete-gcloud-cluser.sh -------------------------------------------------------------------------------- /example/busybox.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: busybox 5 | namespace: default 6 | spec: 7 | containers: 8 | - image: busybox 9 | command: 10 | - sleep 11 | - "3600" 12 | imagePullPolicy: IfNotPresent 13 | name: busybox 14 | restartPolicy: Always -------------------------------------------------------------------------------- /example/kafka-cluster.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "krallistic.github.com/v1" 2 | kind: "Kafkacluster" 3 | metadata: 4 | name: test-cluster-1 5 | spec: 6 | brokerCount: 5 7 | topics: 8 | - name: "test1" 9 | replicationFactor: 1 10 | partitions: 1 11 | - name: "test2" 12 | replicationFactor: 2 13 | partitions: 2 14 | kafkaOptions: 15 | logRetentionHours: 24 16 | autoCreateTopics: false 17 | compressionType: "gzip" 18 | zookeeperConnect: zk-headless.default.svc.cluster.local 19 | image: confluentinc/cp-kafka:latest 20 | leaderImbalanceRatio: 0.1 21 | leaderImbalanceInterval: 600 22 | storageClass: emptyDir 23 | minimumGracePeriod: 1200 24 | jmxSidecar: false 25 | resources: 26 | cpu: "1" 27 | memory: "1Gi" 28 | diskSpace: "50G" 29 | -------------------------------------------------------------------------------- /example/kafka-operator.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: kafka-operator 5 | spec: 6 | replicas: 1 7 | template: 8 | metadata: 9 | labels: 10 | name: kafka-operator 11 | type: operator 12 | spec: 13 | containers: 14 | - name: operator 15 | image: krallistic/kafka-operator:latest -------------------------------------------------------------------------------- /example/kafkaClient.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: testclient 5 | spec: 6 | containers: 7 | - name: kafkac 8 | image: solsson/kafka-persistent:0.10.1 9 | command: 10 | - sh 11 | - -c 12 | - "exec tail -f /dev/null" -------------------------------------------------------------------------------- /example/kafkaFullOptions.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "incubator.test.com/v1" 2 | kind: "KafkaCluster" 3 | metadata: 4 | name: hello-world-cluster-2 5 | spec: 6 | brokerCount: 2 7 | managedTopics: 8 | - name: "test1" 9 | replicationFactor: 1 10 | partitions: 1 11 | - name: "test2" 12 | replicationFactor: 2 13 | partitions: 2 14 | kafkaOptions: 15 | logRetentionHours: 24 16 | autoCreateTopicsEnable: false 17 | autoLeaderRebalanceEnable: true 18 | backgroudThreads: 10 19 | compressionType: "gzip" 20 | deleteTopicEnable: false 21 | leaderImbalanceCheckIntervalSeconds: 300 22 | leaderImbalanceBrokerPercentage: 10 23 | logFlushIntervalMessages: 10 24 | zookeeperConnect: zk-headless.default.svc.cluster.local 25 | image: confluentinc/cp-kafka:latest 26 | leaderImbalanceRatio: 0.1 27 | leaderImbalanceInterval: 600 28 | storageClass: emptyDir 29 | minimumGracePeriod: 1200 30 | jmxSidecar: false 31 | resources: 32 | cpu: "0.1" 33 | memory: "2Mi" 34 | diskSpace: "50G" 35 | 36 | -------------------------------------------------------------------------------- /example/manual-zookeeper.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: zk-headless 5 | labels: 6 | app: zk-headless 7 | spec: 8 | ports: 9 | - port: 2888 10 | name: server 11 | - port: 3888 12 | name: leader-election 13 | - port: 2181 14 | name: client 15 | clusterIP: None 16 | selector: 17 | app: zk 18 | --- 19 | apiVersion: v1 20 | kind: ConfigMap 21 | metadata: 22 | name: zk-config 23 | data: 24 | ensemble: "zk-0" 25 | jvm.heap: "2G" 26 | tick: "2000" 27 | init: "10" 28 | sync: "5" 29 | client.cnxns: "60" 30 | snap.retain: "3" 31 | purge.interval: "1" 32 | 33 | --- 34 | apiVersion: apps/v1beta1 35 | kind: StatefulSet 36 | metadata: 37 | name: zk 38 | spec: 39 | serviceName: zk-headless 40 | replicas: 1 41 | template: 42 | metadata: 43 | labels: 44 | app: zk 45 | annotations: 46 | pod.alpha.kubernetes.io/initialized: "true" 47 | spec: 48 | containers: 49 | - name: k8szk 50 | imagePullPolicy: Always 51 | image: gcr.io/google_samples/k8szk:v1 52 | ports: 53 | - containerPort: 2181 54 | name: client 55 | - containerPort: 2888 56 | name: server 57 | - containerPort: 3888 58 | name: leader-election 59 | env: 60 | - name : ZK_ENSEMBLE 61 | valueFrom: 62 | configMapKeyRef: 63 | name: zk-config 64 | key: ensemble 65 | - name : ZK_HEAP_SIZE 66 | valueFrom: 67 | configMapKeyRef: 68 | name: zk-config 69 | key: jvm.heap 70 | - name : ZK_TICK_TIME 71 | valueFrom: 72 | configMapKeyRef: 73 | name: zk-config 74 | key: tick 75 | - name : ZK_INIT_LIMIT 76 | valueFrom: 77 | configMapKeyRef: 78 | name: zk-config 79 | key: init 80 | - name : ZK_SYNC_LIMIT 81 | valueFrom: 82 | configMapKeyRef: 83 | name: zk-config 84 | key: tick 85 | - name : ZK_MAX_CLIENT_CNXNS 86 | valueFrom: 87 | configMapKeyRef: 88 | name: zk-config 89 | key: client.cnxns 90 | - name: ZK_SNAP_RETAIN_COUNT 91 | valueFrom: 92 | configMapKeyRef: 93 | name: zk-config 94 | key: snap.retain 95 | - name: ZK_PURGE_INTERVAL 96 | valueFrom: 97 | configMapKeyRef: 98 | name: zk-config 99 | key: purge.interval 100 | - name: ZK_CLIENT_PORT 101 | value: "2181" 102 | - name: ZK_SERVER_PORT 103 | value: "2888" 104 | - name: ZK_ELECTION_PORT 105 | value: "3888" 106 | 107 | command: 108 | - sh 109 | - -c 110 | - zkGenConfig.sh && zkServer.sh start-foreground 111 | readinessProbe: 112 | exec: 113 | command: 114 | - "zkOk.sh" 115 | initialDelaySeconds: 15 116 | timeoutSeconds: 5 117 | livenessProbe: 118 | exec: 119 | command: 120 | - "zkOk.sh" 121 | initialDelaySeconds: 15 122 | timeoutSeconds: 5 123 | volumeMounts: 124 | - name: datadir 125 | mountPath: /var/lib/zookeeper 126 | securityContext: 127 | runAsUser: 1000 128 | fsGroup: 1000 129 | volumes: 130 | - name: datadir 131 | emptyDir: {} -------------------------------------------------------------------------------- /example/manualKafka.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: kafka-headless 5 | labels: 6 | app: kafka-headless 7 | spec: 8 | ports: 9 | - port: 9092 10 | name: client 11 | clusterIP: None 12 | selector: 13 | app: kafka 14 | --- 15 | apiVersion: apps/v1beta1 16 | kind: StatefulSet 17 | metadata: 18 | name: kafka 19 | spec: 20 | serviceName: kafka-headless 21 | replicas: 1 22 | template: 23 | metadata: 24 | labels: 25 | app: kafka 26 | annotations: 27 | pod.alpha.kubernetes.io/initialized: "true" 28 | spec: 29 | containers: 30 | - name: kafka 31 | imagePullPolicy: Always 32 | image: confluentinc/cp-kafka:latest 33 | ports: 34 | - containerPort: 9092 35 | name: client 36 | env: 37 | - name : KAFKA_ZOOKEEPER_CONNECT 38 | value: zk-headless.default.svc.cluster.local 39 | - name : KAFKA_ADVERTISED_LISTENERS 40 | value: "kafka-0.kafka-broker-svc.cluster.local:9092" 41 | - name : KAFKA_BROKER_ID 42 | value: "1" 43 | volumes: 44 | - name: datadir 45 | emptyDir: {} -------------------------------------------------------------------------------- /example/operator.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: kafka-operator 5 | spec: 6 | replicas: 1 7 | template: 8 | metadata: 9 | labels: 10 | name: kafka-operator 11 | type: operator 12 | spec: 13 | containers: 14 | - name: operator 15 | image: krallistic/kafka-operator:v0.0.2 16 | 17 | # imagePullPolicy: Always 18 | - name: kubectl 19 | image: gcr.io/google_containers/hyperkube:v1.5.2 20 | command: 21 | - "/hyperkube" 22 | args: 23 | - "kubectl" 24 | - "proxy" 25 | - "-p" 26 | - "8080" -------------------------------------------------------------------------------- /example/prometheus.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: extensions/v1beta1 3 | kind: Deployment 4 | metadata: 5 | labels: 6 | name: prometheus-deployment 7 | name: prometheus 8 | spec: 9 | replicas: 1 10 | template: 11 | metadata: 12 | labels: 13 | app: prometheus 14 | spec: 15 | containers: 16 | - image: quay.io/prometheus/prometheus:v1.0.1 17 | name: prometheus 18 | command: 19 | - "/bin/prometheus" 20 | args: 21 | - "-config.file=/etc/prometheus/prometheus.yml" 22 | - "-storage.local.path=/prometheus" 23 | - "-storage.local.retention=24h" 24 | ports: 25 | - containerPort: 9090 26 | protocol: TCP 27 | volumeMounts: 28 | - mountPath: "/prometheus" 29 | name: data 30 | - mountPath: "/etc/prometheus" 31 | name: config-volume 32 | resources: 33 | requests: 34 | cpu: 100m 35 | memory: 100Mi 36 | limits: 37 | cpu: 500m 38 | memory: 2500Mi 39 | volumes: 40 | - emptyDir: {} 41 | name: data 42 | - configMap: 43 | name: prometheus-config 44 | name: config-volume 45 | --- 46 | apiVersion: v1 47 | kind: ConfigMap 48 | metadata: 49 | name: prometheus-config 50 | data: 51 | prometheus.yml: | 52 | global: 53 | scrape_interval: 30s 54 | scrape_timeout: 30s 55 | scrape_configs: 56 | - job_name: 'prometheus' 57 | static_configs: 58 | - targets: ['localhost:9090'] 59 | - job_name: 'kubernetes-cluster' 60 | scheme: https 61 | tls_config: 62 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 63 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 64 | kubernetes_sd_configs: 65 | - api_servers: 66 | - 'https://kubernetes.default.svc' 67 | in_cluster: true 68 | role: apiserver 69 | - job_name: 'kubernetes-nodes' 70 | scheme: https 71 | tls_config: 72 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 73 | insecure_skip_verify: true 74 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 75 | kubernetes_sd_configs: 76 | - api_servers: 77 | - 'https://kubernetes.default.svc' 78 | in_cluster: true 79 | role: node 80 | relabel_configs: 81 | - action: labelmap 82 | regex: __meta_kubernetes_node_label_(.+) 83 | - job_name: 'kubernetes-service-endpoints' 84 | scheme: https 85 | kubernetes_sd_configs: 86 | - api_servers: 87 | - 'https://kubernetes.default.svc' 88 | in_cluster: true 89 | role: endpoint 90 | relabel_configs: 91 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] 92 | action: keep 93 | regex: true 94 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] 95 | action: replace 96 | target_label: __scheme__ 97 | regex: (https?) 98 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] 99 | action: replace 100 | target_label: __metrics_path__ 101 | regex: (.+) 102 | - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] 103 | action: replace 104 | target_label: __address__ 105 | regex: (.+)(?::\d+);(\d+) 106 | replacement: $1:$2 107 | - action: labelmap 108 | regex: __meta_kubernetes_service_label_(.+) 109 | - source_labels: [__meta_kubernetes_service_namespace] 110 | action: replace 111 | target_label: kubernetes_namespace 112 | - source_labels: [__meta_kubernetes_service_name] 113 | action: replace 114 | target_label: kubernetes_name 115 | - job_name: 'kubernetes-services' 116 | scheme: https 117 | metrics_path: /probe 118 | params: 119 | module: [http_2xx] 120 | kubernetes_sd_configs: 121 | - api_servers: 122 | - 'https://kubernetes.default.svc' 123 | in_cluster: true 124 | role: service 125 | relabel_configs: 126 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] 127 | action: keep 128 | regex: true 129 | - source_labels: [__address__] 130 | target_label: __param_target 131 | - target_label: __address__ 132 | replacement: blackbox 133 | - source_labels: [__param_target] 134 | target_label: instance 135 | - action: labelmap 136 | regex: __meta_kubernetes_service_label_(.+) 137 | - source_labels: [__meta_kubernetes_service_namespace] 138 | target_label: kubernetes_namespace 139 | - source_labels: [__meta_kubernetes_service_name] 140 | target_label: kubernetes_name 141 | - job_name: 'kubernetes-pods' 142 | scheme: https 143 | kubernetes_sd_configs: 144 | - api_servers: 145 | - 'https://kubernetes.default.svc' 146 | in_cluster: true 147 | role: pod 148 | relabel_configs: 149 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] 150 | action: keep 151 | regex: true 152 | - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 153 | action: replace 154 | target_label: __metrics_path__ 155 | regex: (.+) 156 | - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 157 | action: replace 158 | regex: (.+):(?:\d+);(\d+) 159 | replacement: ${1}:${2} 160 | target_label: __address__ 161 | - action: labelmap 162 | regex: __meta_kubernetes_pod_label_(.+) 163 | - source_labels: [__meta_kubernetes_pod_namespace] 164 | action: replace 165 | target_label: kubernetes_namespace 166 | - source_labels: [__meta_kubernetes_pod_name] 167 | action: replace 168 | target_label: kubernetes_pod_name -------------------------------------------------------------------------------- /example/test.json: -------------------------------------------------------------------------------- 1 | { 2 | "apiVersion": "incubator.test.com/v1", 3 | "kind": "KafkaCluster", 4 | "metadata": { 5 | "name": "testkafkacluster" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /example/test.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: "kafka.operator.test/v1" 2 | kind: "KafkaCluster" 3 | metadata: 4 | name: "example-kafka-cluster" 5 | spec: 6 | BrokerNodes: 3 7 | ZookeeperConnect: "zookeeper:123" 8 | -------------------------------------------------------------------------------- /images/cruise-control/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8-alpine AS BUILD_IMAGE 2 | 3 | ENV APP_HOME=/root/dev/ 4 | WORKDIR $APP_HOME 5 | 6 | RUN apk --no-cache add gettext git bash 7 | #RUN apt-get update && apt-get install -y gettext git 8 | RUN git clone https://github.com/linkedin/cruise-control.git 9 | WORKDIR $APP_HOME/cruise-control 10 | 11 | COPY cruisecontrol.properties.tpl config/cruisecontrol.properties.tpl 12 | COPY setup-cruise-control.sh setup-cruise-control.sh 13 | RUN chmod +x setup-cruise-control.sh 14 | RUN ./gradlew jar copyDependantLibs 15 | RUN chmod +x kafka-cruise-control-start.sh 16 | 17 | CMD ["./setup-cruise-control.sh"] -------------------------------------------------------------------------------- /images/cruise-control/capacity.json: -------------------------------------------------------------------------------- 1 | { 2 | "brokerCapacities":[ 3 | { 4 | "brokerId": "-1", 5 | "capacity": { 6 | "DISK": "100000", 7 | "CPU": "100", 8 | "NW_IN": "10000", 9 | "NW_OUT": "10000" 10 | }, 11 | "doc": "This is the default capacity." 12 | }, 13 | { 14 | "brokerId": "0", 15 | "capacity": { 16 | "DISK": "500000", 17 | "CPU": "100", 18 | "NW_IN": "50000", 19 | "NW_OUT": "50000" 20 | }, 21 | "doc": "This overrides the capacity for broker 0" 22 | } 23 | ] 24 | } -------------------------------------------------------------------------------- /images/cruise-control/cruisecontrol.properties.tpl: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2017 LinkedIn Corp. Licensed under the BSD 2-Clause License (the "License"). See License in the project root for license information. 3 | # 4 | 5 | # This is an example property file for Kafka Cruise Control. See KafkaCruiseControlConfig for more details. 6 | 7 | # Configuration for the metadata client. 8 | # ======================================= 9 | 10 | # The Kafka cluster to control. 11 | bootstrap.servers=${BOOTSTRAP_BROKER} 12 | 13 | # The maximum interval in milliseconds between two metadata refreshes. 14 | #metadata.max.age.ms=300000 15 | 16 | # Client id for the cruise control. It is used for the metadata client. 17 | #clients.id=kafka-cruise-control 18 | 19 | # The size of TCP send buffer bytes for the metadata client. 20 | #send.buffer.bytes=131072 21 | 22 | # The size of TCP receive buffer size for the metadata client. 23 | #receive.buffer.bytes=131072 24 | 25 | # The time to wait before disconnect an idle TCP connection. 26 | #connections.max.idle.ms=540000 27 | 28 | # The time to wait before reconnect to a given host. 29 | #reconnect.backoff.ms=50 30 | 31 | # The time to wait for a response from a host after sending a request. 32 | #request.timeout.ms=30000 33 | 34 | 35 | # Configurations for the load monitor 36 | # ======================================= 37 | 38 | # The number of metric fetcher thread to fetch metrics for the Kafka cluster 39 | num.metric.fetchers=1 40 | 41 | # The metric sampler class 42 | metric.sampler.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.CruiseControlMetricsReporterSampler 43 | # Configurations for CruiseControlMetricsReporterSampler 44 | metric.reporter.topic.pattern=__CruiseControlMetrics 45 | 46 | # The sample store class name 47 | sample.store.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.KafkaSampleStore 48 | 49 | # The config for the Kafka sample store to save the partition metric samples. 50 | partition.metric.sample.store.topic=__KafkaCruiseControlPartitionMetricSamples 51 | 52 | # The config for the Kafka sample store to save the model training samples. 53 | broker.metric.sample.store.topic=__KafkaCruiseControlModelTrainingSamples 54 | 55 | # The partition assignor class for the metric samplers. 56 | metric.sampler.partition.assignor.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.DefaultMetricSamplerPartitionAssignor 57 | 58 | # The metric sampling interval in milliseconds 59 | metric.sampling.interval.ms=120000 60 | 61 | # The load snapshot window size in milliseconds 62 | load.snapshot.window.ms=300000 63 | 64 | # The number of load snapshot windows to keep in memory 65 | num.load.snapshots=1 66 | 67 | # The minimum samples required for a partition in each load snapshot window 68 | min.samples.per.load.snapshot=1 69 | 70 | # The configuration for the BrokerCapacityConfigFileResolver 71 | capacity.config.file=config/capacity.json 72 | 73 | # Configurations for the analyzer 74 | # ======================================= 75 | 76 | # The list of goals to optimize the Kafka cluster for 77 | goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareCapacityGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.PotentialNwOutGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoals,com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal 78 | 79 | # The broker's disk capacity in megabytes. 80 | broker.disk.capacity.mb=1000000 81 | 82 | # The broker's CPU capacity in percentage. 83 | broker.cpu.capacity=100.0 84 | 85 | # The broker's network inbound bandwidth in KB/s 86 | broker.network.inbound.capacity=100000 87 | 88 | # The broker's network output bandwidth in KB/s 89 | broker.network.outbound.capacity=100000 90 | 91 | # The minimum percentage of well monitored partitions out of all the partitions 92 | min.monitored.partition.percentage=0.95 93 | 94 | # The balance threshold for CPU 95 | cpu.balance.threshold=1.1 96 | 97 | # The balance threshold for disk 98 | disk.balance.threshold=1.1 99 | 100 | # The balance threshold for network inbound utilization 101 | network.inbound.balance.threshold=1.1 102 | 103 | # The balance threshold for network outbound utilizatoin 104 | network.outbound.balance.threshold=1.1 105 | 106 | # The capacity threshold for CPU in percentage 107 | cpu.capacity.threshold=0.8 108 | 109 | # The capacity threshold for disk in percentage 110 | disk.capacity.threshold=0.8 111 | 112 | # The capacity threshold for network inbound utilization in percentage 113 | network.inbound.capacity.threshold=0.8 114 | 115 | # The capacity threshold for network outbound utilization in percentage 116 | network.outbound.capacity.threshold=0.8 117 | 118 | # The threshold to define the cluster to be in a low CPU utilization state 119 | cpu.low.utilization.threshold=0.0 120 | 121 | # The threshold to define the cluster to be in a low disk utilization state 122 | disk.low.utilization.threshold=0.0 123 | 124 | # The threshold to define the cluster to be in a low network inbound utilization state 125 | network.inbound.low.utilization.threshold=0.0 126 | 127 | # The threshold to define the cluster to be in a low disk utilization state 128 | network.outbound.low.utilization.threshold=0.0 129 | 130 | # The maximum number of optimization proposal candidates should the analyzer precompute. 131 | max.proposal.candidates=10 132 | 133 | # How often should the cached proposal be expired and recalculated if necessary 134 | proposal.expiration.ms=60000 135 | 136 | # The number of threads to use for proposal candidate precomputing. 137 | num.proposal.precompute.threads=1 138 | 139 | # the topics that should be excluded from the partition movement. 140 | #topics.excluded.from.partition.movement 141 | 142 | # Configurations for the executor 143 | # ======================================= 144 | 145 | # The zookeeper connect of the Kafka cluster 146 | zookeeper.connect=${ZOOKEEPER_CONNECT} 147 | 148 | # The max number of partitions to move in/out on a given broker at a given time. 149 | num.concurrent.partition.movements.per.broker=10 150 | 151 | # The interval between two execution progress checks. 152 | execution.progress.check.interval.ms=10000 153 | 154 | 155 | # Configurations for anomaly detector 156 | # ======================================= 157 | 158 | # The goal violation notifier class 159 | goal.violation.notifier.class=com.linkedin.kafka.cruisecontrol.detector.notifier.SelfHealingNotifier 160 | 161 | # The anomaly detection interval 162 | anomaly.detection.interval.ms=10000 163 | 164 | # The goal violation to detect. 165 | anomaly.detection.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareCapacityGoal 166 | 167 | # The zk path to store failed broker information. 168 | failed.brokers.zk.path=/CruiseControlBrokerList 169 | 170 | # Self-healing enabled 171 | self.healing.enabled=false -------------------------------------------------------------------------------- /images/cruise-control/setup-cruise-control.sh: -------------------------------------------------------------------------------- 1 | envsubst < config/cruisecontrol.properties.tpl > config/cruisecontrol.properties 2 | cat config/cruisecontrol.properties 3 | echo "Rendered Config" 4 | ./kafka-cruise-control-start.sh "$@" -------------------------------------------------------------------------------- /images/kafka/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8 AS BUILD_IMAGE 2 | ENV APP_HOME=/root/dev/cruise-control 3 | WORKDIR $APP_HOME 4 | RUN git clone https://github.com/linkedin/cruise-control.git 5 | WORKDIR $APP_HOME/cruise-control 6 | RUN git checkout ff461d1288c76c4ab8c41d21e6303fef06872e04 . 7 | RUN ./gradlew jar 8 | 9 | FROM confluentinc/cp-kafka:latest 10 | WORKDIR /root/ 11 | COPY --from=BUILD_IMAGE /root/dev/cruise-control/cruise-control/cruise-control-metrics-reporter/build/libs/cruise-control-metrics-reporter.jar /usr/share/java/kafka/ 12 | CMD ["/etc/confluent/docker/run"] -------------------------------------------------------------------------------- /kafka/kafka.go: -------------------------------------------------------------------------------- 1 | package kafka 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/Shopify/sarama" 7 | log "github.com/Sirupsen/logrus" 8 | "github.com/krallistic/kafka-operator/spec" 9 | "github.com/krallistic/kafka-operator/util" 10 | kazoo "github.com/krallistic/kazoo-go" 11 | ) 12 | 13 | var ( 14 | logger = log.WithFields(log.Fields{ 15 | "package": "kafka", 16 | }) 17 | ) 18 | 19 | type KafkaUtil struct { 20 | KafkaClient sarama.Client 21 | BrokerList []string 22 | ClusterName string 23 | KazooClient *kazoo.Kazoo 24 | } 25 | 26 | func New(clusterSpec spec.Kafkacluster) (*KafkaUtil, error) { 27 | brokerList := util.GetBrokerAdressess(clusterSpec) 28 | 29 | methodLogger := log.WithFields(log.Fields{ 30 | "method": "new", 31 | "clusterName": clusterSpec.ObjectMeta.Name, 32 | "brokers": brokerList, 33 | }) 34 | config := sarama.NewConfig() 35 | 36 | methodLogger.Info("Creating KafkaUtil") 37 | 38 | kz, err := kazoo.NewKazooFromConnectionString(clusterSpec.Spec.ZookeeperConnect, nil) 39 | if err != nil { 40 | methodLogger.WithFields(log.Fields{ 41 | "error": err, 42 | "zookeeperConnect": clusterSpec.Spec.ZookeeperConnect, 43 | }).Error("Cant create kazoo client") 44 | return nil, err 45 | } 46 | 47 | brokers, err := kz.BrokerList() 48 | if err != nil { 49 | methodLogger.WithFields(log.Fields{ 50 | "error": err, 51 | }).Error("Error reading brokers from zk") 52 | return nil, err 53 | } 54 | 55 | client, err := sarama.NewClient(brokers, config) 56 | if err != nil { 57 | methodLogger.WithFields(log.Fields{ 58 | "error": err, 59 | "brokers": brokers, 60 | }).Error("Error creating sarama kafka Client") 61 | return nil, err 62 | } 63 | 64 | k := &KafkaUtil{ 65 | KafkaClient: client, 66 | ClusterName: clusterSpec.ObjectMeta.Name, 67 | BrokerList: brokerList, 68 | KazooClient: kz, 69 | } 70 | 71 | methodLogger.Info("Initilized Kafka CLient, KazooClient and created KafkaUtil") 72 | k.ListTopics() 73 | return k, nil 74 | } 75 | 76 | func (k *KafkaUtil) ListTopics() ([]string, error) { 77 | fmt.Println("Listing KafkaTopics") 78 | topics, err := k.KafkaClient.Topics() 79 | if err != nil { 80 | return nil, err 81 | } 82 | 83 | for _, t := range topics { 84 | fmt.Println("Current topic:", t) 85 | } 86 | return topics, nil 87 | } 88 | 89 | func (k *KafkaUtil) GetPartitions(topic string) ([]int32, error) { 90 | partitions, err := k.KafkaClient.Partitions(topic) 91 | if err != nil { 92 | return nil, err 93 | } 94 | return partitions, nil 95 | } 96 | 97 | func (k *KafkaUtil) PrintFullStats() error { 98 | topics, err := k.ListTopics() 99 | if err != nil { 100 | return err 101 | } 102 | for _, topic := range topics { 103 | partitions, err := k.GetPartitions(topic) 104 | if err != nil { 105 | return err 106 | } 107 | fmt.Println("Topic: %s, Partitions %s", topic, partitions) 108 | } 109 | 110 | return nil 111 | } 112 | 113 | func (k *KafkaUtil) GetTopicsOnBroker(cluster spec.Kafkacluster, brokerId int32) ([]string, error) { 114 | methodLogger := log.WithFields(log.Fields{ 115 | "method": "GetTopicsOnBroker", 116 | "clusterName": cluster.ObjectMeta.Name, 117 | }) 118 | topicConfiguration, err := k.GetTopicConfiguration(cluster) 119 | if err != nil { 120 | return nil, err 121 | } 122 | topicOnBroker := make([]string, 0) 123 | 124 | for _, topic := range topicConfiguration { 125 | partitionLoop: 126 | for _, partition := range topic.Partitions { 127 | for _, replica := range partition.Replicas { 128 | if replica == brokerId { 129 | topicOnBroker = append(topicOnBroker, topic.Topic) 130 | break partitionLoop 131 | } 132 | } 133 | } 134 | } 135 | methodLogger.WithFields(log.Fields{ 136 | "topics": topicOnBroker, 137 | }).Debug("Topics on Broker") 138 | return topicOnBroker, nil 139 | } 140 | 141 | func (k *KafkaUtil) GetTopicConfiguration(cluster spec.Kafkacluster) ([]spec.KafkaTopic, error) { 142 | methodLogger := log.WithFields(log.Fields{ 143 | "method": "GetTopicConfiguration", 144 | "clusterName": cluster.ObjectMeta.Name, 145 | }) 146 | topics, err := k.KafkaClient.Topics() 147 | if err != nil { 148 | methodLogger.Error("Error Listing Topics") 149 | return nil, err 150 | } 151 | configuration := make([]spec.KafkaTopic, len(topics)) 152 | for i, topic := range topics { 153 | 154 | partitions, err := k.KafkaClient.Partitions(topic) 155 | if err != nil { 156 | methodLogger.Error("Error Listing Partitions") 157 | return nil, err 158 | } 159 | t := spec.KafkaTopic{ 160 | Topic: topic, 161 | PartitionFactor: int32(len(partitions)), 162 | ReplicationFactor: 3, 163 | Partitions: make([]spec.KafkaPartition, len(partitions)), 164 | } 165 | for j, partition := range partitions { 166 | replicas, err := k.KafkaClient.Replicas(topic, partition) 167 | if err != nil { 168 | methodLogger.Error("Error listing partitions") 169 | return nil, err 170 | } 171 | t.Partitions[j] = spec.KafkaPartition{ 172 | Partition: int32(j), 173 | Replicas: replicas, 174 | } 175 | } 176 | configuration[i] = t 177 | } 178 | return configuration, nil 179 | } 180 | 181 | func (k *KafkaUtil) RemoveTopicFromBrokers(cluster spec.Kafkacluster, brokerToDelete int32, topic string) error { 182 | methodLogger := log.WithFields(log.Fields{ 183 | "method": "RemoveTopicFromBrokers", 184 | "clusterName": cluster.ObjectMeta.Name, 185 | "brokerToDelte": brokerToDelete, 186 | "topic": topic, 187 | }) 188 | 189 | brokersToDelete := []int32{brokerToDelete} 190 | err := k.KazooClient.RemoveTopicFromBrokers(topic, brokersToDelete) 191 | if err != nil { 192 | methodLogger.Warn("Error removing topic from Broker", err) 193 | return err 194 | } 195 | return nil 196 | } 197 | 198 | func (k *KafkaUtil) RemoveTopicsFromBrokers(cluster spec.Kafkacluster, brokerToDelete int32) error { 199 | methodLogger := log.WithFields(log.Fields{ 200 | "method": "RemoveTopicsFromBrokers", 201 | "clusterName": cluster.ObjectMeta.Name, 202 | "brokerToDelte": brokerToDelete, 203 | }) 204 | topics, err := k.KafkaClient.Topics() 205 | if err != nil { 206 | methodLogger.Error("Error Listing Topics") 207 | return err 208 | } 209 | 210 | //TODO it should be possible to Delete multiple Brokers 211 | for _, topic := range topics { 212 | //TODO what do in cases where ReplicationFactor > remaining broker count 213 | k.RemoveTopicFromBrokers(cluster, brokerToDelete, topic) 214 | } 215 | 216 | return nil 217 | } 218 | 219 | func (k *KafkaUtil) AllTopicsInSync() (bool, error) { 220 | topics, err := k.KazooClient.Topics() 221 | if err != nil { 222 | return false, err 223 | } 224 | for _, topic := range topics { 225 | partitions, err := topic.Partitions() 226 | if err != nil { 227 | return false, err 228 | } 229 | for _, partition := range partitions { 230 | underReplicated, err := partition.UnderReplicated() 231 | if err != nil { 232 | return false, err 233 | } 234 | if underReplicated { 235 | return false, nil 236 | } 237 | } 238 | } 239 | return true, nil 240 | } 241 | 242 | func (k *KafkaUtil) CreateTopic(topicSpec spec.KafkaTopicSpec) error { 243 | fmt.Println("Creating Kafka Topics: ", topicSpec) 244 | broker, _ := k.KafkaClient.Coordinator("operatorConsumerGroup") 245 | request := sarama.MetadataRequest{Topics: []string{topicSpec.Name}} 246 | metadataPartial, err := broker.GetMetadata(&request) 247 | if err != nil { 248 | return err 249 | } 250 | 251 | replicas := []int32{0} 252 | isr := []int32{0} 253 | 254 | metadataResponse := &sarama.MetadataResponse{} 255 | metadataResponse.AddBroker(broker.Addr(), broker.ID()) 256 | 257 | metadataPartial.AddTopic(topicSpec.Name, sarama.ErrNoError) 258 | //TODO dynamic partitions 259 | metadataPartial.AddTopicPartition(topicSpec.Name, 0, broker.ID(), replicas, isr, sarama.ErrNoError) 260 | metadataPartial.AddTopicPartition(topicSpec.Name, 1, broker.ID(), replicas, isr, sarama.ErrNoError) 261 | 262 | return nil 263 | } 264 | -------------------------------------------------------------------------------- /kube/cruisecontrol/cruise-control.go: -------------------------------------------------------------------------------- 1 | package cruisecontrol 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/krallistic/kafka-operator/kube" 7 | "github.com/krallistic/kafka-operator/spec" 8 | util "github.com/krallistic/kafka-operator/util" 9 | 10 | appsv1Beta1 "k8s.io/api/apps/v1beta1" 11 | "k8s.io/api/core/v1" 12 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 | ) 14 | 15 | const ( 16 | cc_deplyomentPrefix = "cruise-control" 17 | cc_image = "krallistic/cruise-control" //TODO 18 | cc_version = "latest" //TODO make version cmd arg 19 | 20 | ) 21 | 22 | func GetCruiseControlName(cluster spec.Kafkacluster) string { 23 | return cc_deplyomentPrefix + "-" + cluster.ObjectMeta.Name 24 | } 25 | 26 | func generateCruiseControlDeployment(cluster spec.Kafkacluster) *appsv1Beta1.Deployment { 27 | replicas := int32(1) 28 | 29 | objectMeta := metav1.ObjectMeta{ 30 | Name: GetCruiseControlName(cluster), 31 | Labels: map[string]string{ 32 | "component": "kafka", 33 | "name": cluster.ObjectMeta.Name, 34 | "role": "data", 35 | "type": "cruise-control", 36 | }, 37 | } 38 | 39 | podObjectMeta := metav1.ObjectMeta{ 40 | Name: GetCruiseControlName(cluster), 41 | Labels: map[string]string{ 42 | "component": "kafka", 43 | "name": cluster.ObjectMeta.Name, 44 | "role": "data", 45 | "type": "cruise-control", 46 | }, 47 | } 48 | brokerList := strings.Join(util.GetBrokerAdressess(cluster), ",") 49 | 50 | deploy := &appsv1Beta1.Deployment{ 51 | ObjectMeta: objectMeta, 52 | Spec: appsv1Beta1.DeploymentSpec{ 53 | Replicas: &replicas, 54 | Template: v1.PodTemplateSpec{ 55 | ObjectMeta: podObjectMeta, 56 | Spec: v1.PodSpec{ 57 | Containers: []v1.Container{ 58 | v1.Container{ 59 | Name: "cruise-control", 60 | Image: "krallistic/cruise-control:latest", 61 | Command: []string{"/bin/sh", "./setup-cruise-control.sh", "config/cruisecontrol.properties", "9095"}, 62 | Env: []v1.EnvVar{ 63 | v1.EnvVar{ 64 | Name: "ZOOKEEPER_CONNECT", 65 | Value: cluster.Spec.ZookeeperConnect, 66 | }, 67 | v1.EnvVar{ 68 | Name: "BOOTSTRAP_BROKER", 69 | Value: brokerList, 70 | }, 71 | }, 72 | Ports: []v1.ContainerPort{ 73 | v1.ContainerPort{ 74 | Name: "rest", 75 | ContainerPort: 9095, 76 | }, 77 | }, 78 | }, 79 | }, 80 | }, 81 | }, 82 | }, 83 | } 84 | 85 | return deploy 86 | } 87 | 88 | func generateCruiseControlService(cluster spec.Kafkacluster) *v1.Service { 89 | obejctMeta := metav1.ObjectMeta{ 90 | Name: GetCruiseControlName(cluster), 91 | Labels: map[string]string{ 92 | "component": "kafka", 93 | "name": cluster.ObjectMeta.Name, 94 | "role": "data", 95 | "type": "cruise-control", 96 | }, 97 | } 98 | 99 | svc := &v1.Service{ 100 | ObjectMeta: obejctMeta, 101 | Spec: v1.ServiceSpec{ 102 | Selector: map[string]string{ 103 | "component": "kafka", 104 | "name": cluster.ObjectMeta.Name, 105 | "role": "data", 106 | "type": "cruise-control", 107 | }, 108 | Ports: []v1.ServicePort{ 109 | v1.ServicePort{ 110 | Name: "rest", 111 | Port: 9095, 112 | }, 113 | }, 114 | }, 115 | } 116 | 117 | return svc 118 | } 119 | 120 | // Deploys the OffsetMonitor as an extra Pod inside the Cluster 121 | func DeployCruiseControl(cluster spec.Kafkacluster, client kube.Kubernetes) error { 122 | deployment := generateCruiseControlDeployment(cluster) 123 | svc := generateCruiseControlService(cluster) 124 | 125 | err := client.CreateOrUpdateDeployment(deployment) 126 | if err != nil { 127 | return err 128 | } 129 | err = client.CreateOrUpdateService(svc) 130 | if err != nil { 131 | return err 132 | } 133 | return nil 134 | } 135 | 136 | func DeleteCruiseControl(cluster spec.Kafkacluster, client kube.Kubernetes) error { 137 | deployment := generateCruiseControlDeployment(cluster) 138 | svc := generateCruiseControlService(cluster) 139 | 140 | err := client.DeleteDeployment(deployment) 141 | if err != nil { 142 | return err 143 | } 144 | err = client.DeleteService(svc) 145 | if err != nil { 146 | return err 147 | } 148 | 149 | return nil 150 | } 151 | -------------------------------------------------------------------------------- /kube/cruisecontrol/cruise-control_test.go: -------------------------------------------------------------------------------- 1 | package cruisecontrol 2 | -------------------------------------------------------------------------------- /kube/deploy-helper.go: -------------------------------------------------------------------------------- 1 | package kube 2 | 3 | import ( 4 | "k8s.io/apimachinery/pkg/api/errors" 5 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 6 | appsv1Beta1 "k8s.io/api/apps/v1beta1" 7 | 8 | log "github.com/Sirupsen/logrus" 9 | ) 10 | 11 | func (k *Kubernetes) updateDeployment(deployment *appsv1Beta1.Deployment) error { 12 | _, err := k.Client.AppsV1beta1().Deployments(deployment.ObjectMeta.Namespace).Update(deployment) 13 | return err 14 | } 15 | 16 | func (k *Kubernetes) createDeployment(deployment *appsv1Beta1.Deployment) error { 17 | _, err := k.Client.AppsV1beta1().Deployments(deployment.ObjectMeta.Namespace).Create(deployment) 18 | return err 19 | } 20 | 21 | func (k *Kubernetes) deleteDeployment(deployment *appsv1Beta1.Deployment) error { 22 | var gracePeriod int64 23 | gracePeriod = 10 24 | 25 | deleteOption := metav1.DeleteOptions{ 26 | GracePeriodSeconds: &gracePeriod, 27 | } 28 | err := k.Client.AppsV1beta1().Deployments(deployment.ObjectMeta.Namespace).Delete(deployment.ObjectMeta.Name, &deleteOption) 29 | return err 30 | } 31 | 32 | func (k *Kubernetes) deploymentExists(deployment *appsv1Beta1.Deployment) (bool, error) { 33 | methodLogger := logger.WithFields(log.Fields{ 34 | "method": "CreateOrUpdateDeployment", 35 | "name": deployment.ObjectMeta.Name, 36 | "namespace": deployment.ObjectMeta.Namespace, 37 | }) 38 | namespace := deployment.ObjectMeta.Namespace 39 | depl, err := k.Client.AppsV1beta1().Deployments(namespace).Get(deployment.ObjectMeta.Name, k.DefaultOption) 40 | 41 | if err != nil { 42 | if errors.IsNotFound(err) { 43 | methodLogger.Debug("Deployment dosnt exist") 44 | return false, nil 45 | } else { 46 | methodLogger.WithFields(log.Fields{ 47 | "error": err, 48 | }).Error("Cant get Deployment INFO from API") 49 | return false, err 50 | } 51 | 52 | } 53 | if len(depl.Name) == 0 { 54 | methodLogger.Debug("Deployment.Name == 0, therefore it dosnt exists") 55 | return false, nil 56 | } 57 | return true, nil 58 | } 59 | 60 | // Deploys the given deployment into kubernetes, error is returned if a non recoverable error happens 61 | func (k *Kubernetes) CreateOrUpdateDeployment(deployment *appsv1Beta1.Deployment) error { 62 | methodLogger := logger.WithFields(log.Fields{ 63 | "method": "CreateOrUpdateDeployment", 64 | "name": deployment.ObjectMeta.Name, 65 | "namespace": deployment.ObjectMeta.Namespace, 66 | }) 67 | 68 | exists, err := k.deploymentExists(deployment) 69 | if err != nil { 70 | methodLogger.WithField("error", err).Error("Error while checking if deployments exists") 71 | return err 72 | } 73 | if exists { 74 | err = k.createDeployment(deployment) 75 | } else { 76 | err = k.updateDeployment(deployment) 77 | } 78 | if err != nil { 79 | methodLogger.WithField("error", err).Error("Error while creating or updating deployment") 80 | } 81 | return err 82 | } 83 | 84 | func (k *Kubernetes) DeleteDeployment(deployment *appsv1Beta1.Deployment) error { 85 | methodLogger := logger.WithFields(log.Fields{ 86 | "method": "DeleteDeployment", 87 | "name": deployment.ObjectMeta.Name, 88 | "namespace": deployment.ObjectMeta.Namespace, 89 | }) 90 | exists, err := k.deploymentExists(deployment) 91 | if err != nil { 92 | methodLogger.WithField("error", err).Error("Error while checking if deployments exists") 93 | return err 94 | } 95 | if exists { 96 | replicas := int32(0) 97 | deployment.Spec.Replicas = &replicas 98 | err = k.updateDeployment(deployment) 99 | if err != nil { 100 | methodLogger.WithField("error", err).Warn("Error while scaling deployment down to 0, ignoring since deleting afterwards") 101 | } 102 | 103 | err = k.deleteDeployment(deployment) 104 | if err != nil { 105 | methodLogger.WithField("error", err).Error("Can delete deployment") 106 | return err 107 | } 108 | } else { 109 | methodLogger.Debug("Trying to delete but Deployment dosnt exist.") 110 | return nil 111 | 112 | } 113 | return nil 114 | } 115 | -------------------------------------------------------------------------------- /kube/exporter/exporter.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/krallistic/kafka-operator/kube" 7 | "github.com/krallistic/kafka-operator/spec" 8 | util "github.com/krallistic/kafka-operator/util" 9 | 10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 | "k8s.io/api/core/v1" 12 | appsv1Beta1 "k8s.io/api/apps/v1beta1" 13 | ) 14 | 15 | const ( 16 | deplyomentPrefix = "kafka-offset-checker" 17 | offsetExporterImage = "braedon/prometheus-kafka-consumer-group-exporter" //TODO 18 | offsetExporterVersion = "0.2.0" //TODO make version cmd arg 19 | 20 | prometheusScrapeAnnotation = "prometheus.io/scrape" 21 | prometheusPortAnnotation = "prometheus.io/port" 22 | prometheusPathAnnotation = "prometheus.io/path" 23 | 24 | metricPath = "/metrics" 25 | metricsPort = "8080" 26 | metricsScrape = "true" 27 | ) 28 | 29 | func getOffsetExporterName(cluster spec.Kafkacluster) string { 30 | return deplyomentPrefix + "-" + cluster.ObjectMeta.Name 31 | } 32 | 33 | func generateExporterLabels(cluster spec.Kafkacluster) map[string]string { 34 | return map[string]string{ 35 | "component": "kafka", 36 | "name": cluster.ObjectMeta.Name, 37 | "role": "data", 38 | "type": "exporter", 39 | } 40 | } 41 | 42 | func generateExporterService(cluster spec.Kafkacluster) *v1.Service { 43 | objectMeta := metav1.ObjectMeta{ 44 | Name: getOffsetExporterName(cluster), 45 | Labels: generateExporterLabels(cluster), 46 | } 47 | 48 | svc := &v1.Service{ 49 | ObjectMeta: objectMeta, 50 | Spec: v1.ServiceSpec{ 51 | Selector: generateExporterLabels(cluster), 52 | Ports: []v1.ServicePort{ 53 | v1.ServicePort{ 54 | Port: 8080, 55 | }, 56 | }, 57 | }, 58 | } 59 | 60 | return svc 61 | } 62 | 63 | func generateExporterDeployment(cluster spec.Kafkacluster) *appsv1Beta1.Deployment { 64 | replicas := int32(1) 65 | 66 | objectMeta := metav1.ObjectMeta{ 67 | Name: getOffsetExporterName(cluster), 68 | Labels: generateExporterLabels(cluster), 69 | } 70 | podObjectMeta := metav1.ObjectMeta{ 71 | Name: getOffsetExporterName(cluster), 72 | Annotations: map[string]string{ 73 | 74 | prometheusScrapeAnnotation: metricsScrape, 75 | prometheusPortAnnotation: metricsPort, 76 | prometheusPathAnnotation: metricPath, 77 | }, 78 | Labels: generateExporterLabels(cluster), 79 | } 80 | brokerList := strings.Join(util.GetBrokerAdressess(cluster), ",") 81 | 82 | deploy := &appsv1Beta1.Deployment{ 83 | ObjectMeta: objectMeta, 84 | Spec: appsv1Beta1.DeploymentSpec{ 85 | Replicas: &replicas, 86 | Template: v1.PodTemplateSpec{ 87 | ObjectMeta: podObjectMeta, 88 | Spec: v1.PodSpec{ 89 | Containers: []v1.Container{ 90 | v1.Container{ 91 | Name: "offset-exporter", 92 | Image: offsetExporterImage + ":" + offsetExporterVersion, 93 | //Command: ["python", "-u", "/usr/local/bin/prometheus-kafka-consumer-group-exporter"], 94 | Args: []string{ 95 | "--port=8080", 96 | "--bootstrap-brokers=" + brokerList, 97 | }, 98 | Ports: []v1.ContainerPort{ 99 | v1.ContainerPort{ 100 | Name: "prometheus", 101 | ContainerPort: 8080, 102 | }, 103 | }, 104 | }, 105 | }, 106 | }, 107 | }, 108 | }, 109 | } 110 | 111 | return deploy 112 | 113 | } 114 | 115 | // Deploys the OffsetMonitor as an extra Pod inside the Cluster 116 | func DeployOffsetMonitor(cluster spec.Kafkacluster, client kube.Kubernetes) error { 117 | deployment := generateExporterDeployment(cluster) 118 | svc := generateExporterService(cluster) 119 | 120 | //Deploy Offset Monitor Deplyoment 121 | err := client.CreateOrUpdateDeployment(deployment) 122 | if err != nil { 123 | return err 124 | } 125 | //Deploy Offset Monitor Service 126 | err = client.CreateOrUpdateService(svc) 127 | 128 | return err 129 | } 130 | func DeleteOffsetMonitor(cluster spec.Kafkacluster, client kube.Kubernetes) error { 131 | deployment := generateExporterDeployment(cluster) 132 | svc := generateExporterService(cluster) 133 | 134 | client.DeleteDeployment(deployment) 135 | client.DeleteService(svc) 136 | 137 | return nil 138 | } 139 | -------------------------------------------------------------------------------- /kube/exporter/exporter_test.go: -------------------------------------------------------------------------------- 1 | package exporter 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "testing" 7 | 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/api/core/v1" 10 | appsv1Beta1 "k8s.io/api/apps/v1beta1" 11 | 12 | "github.com/krallistic/kafka-operator/spec" 13 | ) 14 | 15 | func TestGenerateExporterService(t *testing.T) { 16 | 17 | spec := spec.Kafkacluster{ 18 | ObjectMeta: metav1.ObjectMeta{ 19 | Name: "test-cluster", 20 | Namespace: "test", 21 | }, 22 | Spec: spec.KafkaclusterSpec{ 23 | Image: "testImage", 24 | BrokerCount: 1, 25 | JmxSidecar: false, 26 | ZookeeperConnect: "testZookeeperConnect", 27 | }, 28 | } 29 | expectedResult := &v1.Service{ 30 | ObjectMeta: metav1.ObjectMeta{ 31 | Name: "kafka-offset-checker" + "-" + "test-cluster", 32 | Labels: map[string]string{ 33 | "component": "kafka", 34 | "name": "test-cluster", 35 | "role": "data", 36 | "type": "exporter", 37 | }, 38 | }, 39 | Spec: v1.ServiceSpec{ 40 | Selector: map[string]string{ 41 | "component": "kafka", 42 | "name": "test-cluster", 43 | "role": "data", 44 | "type": "exporter", 45 | }, 46 | Ports: []v1.ServicePort{ 47 | v1.ServicePort{ 48 | Port: 8080, 49 | }, 50 | }, 51 | }, 52 | } 53 | 54 | result := generateExporterService(spec) 55 | if result == nil { 56 | t.Fatalf("return value should not be nil", result) 57 | } 58 | if !reflect.DeepEqual(result, expectedResult) { 59 | fmt.Println(result) 60 | fmt.Println("expected") 61 | fmt.Println(expectedResult) 62 | t.Fatalf("results were not equal", result, expectedResult) 63 | } 64 | } 65 | 66 | func TestGenerateExporterDeployment(t *testing.T) { 67 | 68 | spec := spec.Kafkacluster{ 69 | ObjectMeta: metav1.ObjectMeta{ 70 | Name: "test-cluster", 71 | Namespace: "test", 72 | }, 73 | Spec: spec.KafkaclusterSpec{ 74 | Image: "testImage", 75 | BrokerCount: 1, 76 | JmxSidecar: false, 77 | ZookeeperConnect: "testZookeeperConnect", 78 | }, 79 | } 80 | replicas := int32(1) 81 | 82 | expectedResult := &appsv1Beta1.Deployment{ 83 | ObjectMeta: metav1.ObjectMeta{ 84 | Name: "kafka-offset-checker" + "-" + "test-cluster", 85 | Labels: map[string]string{ 86 | "component": "kafka", 87 | "name": "test-cluster", 88 | "role": "data", 89 | "type": "exporter", 90 | }, 91 | }, 92 | Spec: appsv1Beta1.DeploymentSpec{ 93 | Replicas: &replicas, 94 | Template: v1.PodTemplateSpec{ 95 | ObjectMeta: metav1.ObjectMeta{ 96 | Name: "kafka-offset-checker" + "-" + "test-cluster", 97 | Annotations: map[string]string{ 98 | 99 | prometheusScrapeAnnotation: metricsScrape, 100 | prometheusPortAnnotation: metricsPort, 101 | prometheusPathAnnotation: metricPath, 102 | }, 103 | Labels: map[string]string{ 104 | "component": "kafka", 105 | "name": "test-cluster", 106 | "role": "data", 107 | "type": "exporter", 108 | }, 109 | }, 110 | Spec: v1.PodSpec{ 111 | Containers: []v1.Container{ 112 | v1.Container{ 113 | Name: "offset-exporter", 114 | Image: offsetExporterImage + ":" + offsetExporterVersion, 115 | Args: []string{ 116 | "--port=8080", 117 | "--bootstrap-brokers=" + "test-cluster-0.test-cluster.test.svc.cluster.local:9092", 118 | }, 119 | Ports: []v1.ContainerPort{ 120 | v1.ContainerPort{ 121 | Name: "prometheus", 122 | //TODO configPort 123 | ContainerPort: 8080, 124 | }, 125 | }, 126 | }, 127 | }, 128 | }, 129 | }, 130 | }, 131 | } 132 | 133 | result := generateExporterDeployment(spec) 134 | if result == nil { 135 | t.Fatalf("return value should not be nil", result) 136 | } 137 | if !reflect.DeepEqual(result, expectedResult) { 138 | fmt.Println(result) 139 | fmt.Println("expected") 140 | fmt.Println(expectedResult) 141 | t.Fatalf("results were not equal", result, expectedResult) 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /kube/kafka/kafka.go: -------------------------------------------------------------------------------- 1 | package kafka 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | 7 | "github.com/krallistic/kafka-operator/kube" 8 | "github.com/krallistic/kafka-operator/spec" 9 | 10 | appsv1Beta1 "k8s.io/api/apps/v1beta1" 11 | "k8s.io/api/core/v1" 12 | "k8s.io/apimachinery/pkg/api/resource" 13 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 | ) 15 | 16 | const ( 17 | defaultCPU = "1" 18 | defaultDisk = "100G" 19 | defaultMemory = "4Gi" 20 | ) 21 | 22 | func createLabels(cluster spec.Kafkacluster) map[string]string { 23 | labels := map[string]string{ 24 | "component": "kafka", 25 | "creator": "kafka-operator", 26 | "role": "data", 27 | "name": cluster.ObjectMeta.Name, 28 | } 29 | return labels 30 | } 31 | 32 | func generateKafkaStatefulset(cluster spec.Kafkacluster) *appsv1Beta1.StatefulSet { 33 | 34 | name := cluster.ObjectMeta.Name 35 | replicas := cluster.Spec.BrokerCount 36 | image := cluster.Spec.Image 37 | 38 | storageClass := "standard" 39 | if cluster.Spec.StorageClass != "" { 40 | storageClass = cluster.Spec.StorageClass 41 | } 42 | 43 | //TODO error handling, default value? 44 | cpus, err := resource.ParseQuantity(cluster.Spec.Resources.CPU) 45 | if err != nil { 46 | cpus, _ = resource.ParseQuantity(defaultCPU) 47 | } 48 | 49 | memory, err := resource.ParseQuantity(cluster.Spec.Resources.Memory) 50 | if err != nil { 51 | memory, _ = resource.ParseQuantity(defaultMemory) 52 | } 53 | 54 | heapsize := int64(float64(memory.ScaledValue(resource.Mega)) * 0.6) 55 | if heapsize > 4096 { 56 | heapsize = 4096 57 | } 58 | 59 | fmt.Println(memory) 60 | 61 | diskSpace, err := resource.ParseQuantity(cluster.Spec.Resources.DiskSpace) 62 | if err != nil { 63 | diskSpace, _ = resource.ParseQuantity(defaultDisk) 64 | } 65 | 66 | options := GenerateKafkaOptions(cluster) 67 | 68 | statefulSet := &appsv1Beta1.StatefulSet{ 69 | ObjectMeta: metav1.ObjectMeta{ 70 | Name: name, 71 | Labels: createLabels(cluster), 72 | }, 73 | Spec: appsv1Beta1.StatefulSetSpec{ 74 | Replicas: &replicas, 75 | 76 | ServiceName: cluster.ObjectMeta.Name, 77 | Template: v1.PodTemplateSpec{ 78 | ObjectMeta: metav1.ObjectMeta{ 79 | Labels: createLabels(cluster), 80 | }, 81 | Spec: v1.PodSpec{ 82 | Affinity: &v1.Affinity{ 83 | PodAntiAffinity: &v1.PodAntiAffinity{ 84 | PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ 85 | v1.WeightedPodAffinityTerm{ 86 | Weight: 50, //TODO flexible weihgt? anti affinity with zK? 87 | PodAffinityTerm: v1.PodAffinityTerm{ 88 | Namespaces: []string{cluster.ObjectMeta.Namespace}, 89 | LabelSelector: &metav1.LabelSelector{ 90 | MatchLabels: createLabels(cluster), 91 | }, 92 | TopologyKey: "kubernetes.io/hostname", //TODO topologieKey defined somehwere in k8s? 93 | }, 94 | }, 95 | }, 96 | }, 97 | }, 98 | Tolerations: []v1.Toleration{ 99 | v1.Toleration{ 100 | Key: "node.alpha.kubernetes.io/unreachable", 101 | Operator: v1.TolerationOpExists, 102 | Effect: v1.TaintEffectNoExecute, 103 | TolerationSeconds: &cluster.Spec.MinimumGracePeriod, 104 | }, 105 | v1.Toleration{ 106 | Key: "node.alpha.kubernetes.io/notReady", 107 | Operator: v1.TolerationOpExists, 108 | Effect: v1.TaintEffectNoExecute, 109 | TolerationSeconds: &cluster.Spec.MinimumGracePeriod, 110 | }, 111 | }, 112 | InitContainers: []v1.Container{ 113 | v1.Container{ 114 | Name: "labeler", 115 | Image: "devth/k8s-labeler", //TODO fullName, config 116 | Command: []string{"/bin/bash", 117 | "-c", 118 | fmt.Sprintf( 119 | "set -ex\n" + 120 | "[[ `hostname` =~ -([0-9]+)$ ]] || exit 1\n" + 121 | "export KUBE_LABEL_kafka_broker_id=${BASH_REMATCH[1]}\n" + 122 | "/run.sh"), 123 | }, 124 | Env: []v1.EnvVar{ 125 | v1.EnvVar{ 126 | Name: "KUBE_NAMESPACE", 127 | ValueFrom: &v1.EnvVarSource{ 128 | FieldRef: &v1.ObjectFieldSelector{ 129 | FieldPath: "metadata.namespace", 130 | }, 131 | }, 132 | }, 133 | v1.EnvVar{ 134 | Name: "KUBE_LABEL_hostname", 135 | ValueFrom: &v1.EnvVarSource{ 136 | FieldRef: &v1.ObjectFieldSelector{ 137 | FieldPath: "metadata.name", 138 | }, 139 | }, 140 | }, 141 | v1.EnvVar{ 142 | Name: "KUBE_LABEL_kafka_broker_id", 143 | Value: "thisshouldbeoverwritten", 144 | }, 145 | }, 146 | }, 147 | v1.Container{ 148 | Name: "zookeeper-ready", 149 | Image: "busybox", //TODO full Name, config 150 | Command: []string{"sh", "-c", fmt.Sprintf( 151 | "until nslookup %s; do echo waiting for myservice; sleep 2; done;", 152 | cluster.Spec.ZookeeperConnect)}, 153 | }, 154 | }, 155 | 156 | Containers: []v1.Container{ 157 | v1.Container{ 158 | Name: "kafka", 159 | Image: image, 160 | //TODO String replace operator etc 161 | Command: []string{"/bin/bash", 162 | "-c", 163 | fmt.Sprintf("export KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://$(hostname).%s.$(NAMESPACE).svc.cluster.local:9092; \n"+ 164 | "set -ex\n"+ 165 | "[[ `hostname` =~ -([0-9]+)$ ]] || exit 1\n"+ 166 | "export KAFKA_BROKER_ID=${BASH_REMATCH[1]}\n"+ 167 | "/etc/confluent/docker/run", name), 168 | }, 169 | Env: options, 170 | Ports: []v1.ContainerPort{ 171 | v1.ContainerPort{ 172 | Name: "kafka", 173 | //TODO configPort 174 | ContainerPort: 9092, 175 | }, 176 | }, 177 | Resources: v1.ResourceRequirements{ 178 | Requests: v1.ResourceList{ 179 | v1.ResourceCPU: cpus, 180 | v1.ResourceMemory: *GetMaxHeap(cluster), 181 | }, 182 | Limits: v1.ResourceList{ 183 | v1.ResourceMemory: memory, 184 | }, 185 | }, 186 | }, 187 | }, 188 | }, 189 | }, 190 | VolumeClaimTemplates: []v1.PersistentVolumeClaim{ 191 | v1.PersistentVolumeClaim{ 192 | ObjectMeta: metav1.ObjectMeta{ 193 | Name: "kafka-data", 194 | Annotations: map[string]string{ 195 | //TODO storagClass field in never Versions. 196 | "volume.beta.kubernetes.io/storage-class": storageClass, 197 | }, 198 | }, 199 | Spec: v1.PersistentVolumeClaimSpec{ 200 | AccessModes: []v1.PersistentVolumeAccessMode{ 201 | v1.ReadWriteOnce, 202 | }, 203 | Resources: v1.ResourceRequirements{ 204 | Requests: v1.ResourceList{ 205 | v1.ResourceStorage: diskSpace, 206 | }, 207 | }, 208 | }, 209 | }, 210 | }, 211 | }, 212 | } 213 | return statefulSet 214 | } 215 | 216 | func generateHeadlessService(cluster spec.Kafkacluster) *v1.Service { 217 | labelSelectors := createLabels(cluster) 218 | 219 | objectMeta := metav1.ObjectMeta{ 220 | Name: cluster.ObjectMeta.Name, 221 | Annotations: labelSelectors, 222 | } 223 | 224 | objectMeta.Labels = map[string]string{ 225 | "service.alpha.kubernetes.io/tolerate-unready-endpoints": "true", 226 | } 227 | 228 | service := &v1.Service{ 229 | ObjectMeta: objectMeta, 230 | 231 | Spec: v1.ServiceSpec{ 232 | Selector: labelSelectors, 233 | Ports: []v1.ServicePort{ 234 | v1.ServicePort{ 235 | Name: "broker", 236 | Port: 9092, 237 | }, 238 | }, 239 | ClusterIP: "None", 240 | }, 241 | } 242 | 243 | return service 244 | } 245 | 246 | func generateDirectBrokerServices(cluster spec.Kafkacluster) []*v1.Service { 247 | var services []*v1.Service 248 | 249 | for i := 0; i < int(cluster.Spec.BrokerCount); i++ { 250 | serviceName := cluster.ObjectMeta.Name + "-broker-" + strconv.Itoa(i) 251 | 252 | labelSelectors := createLabels(cluster) 253 | labelSelectors["kafka_broker_id"] = strconv.Itoa(i) 254 | objectMeta := metav1.ObjectMeta{ 255 | Name: serviceName, 256 | Namespace: cluster.ObjectMeta.Namespace, 257 | Annotations: labelSelectors, 258 | } 259 | 260 | service := &v1.Service{ 261 | ObjectMeta: objectMeta, 262 | Spec: v1.ServiceSpec{ 263 | Type: v1.ServiceTypeClusterIP, 264 | Selector: labelSelectors, 265 | Ports: []v1.ServicePort{ 266 | v1.ServicePort{ 267 | Name: "broker", 268 | Port: 9092, 269 | //NodePort: 30920, 270 | }, 271 | }, 272 | }, 273 | } 274 | services = append(services, service) 275 | } 276 | 277 | return services 278 | } 279 | 280 | func DeleteCluster(cluster spec.Kafkacluster, client kube.Kubernetes) error { 281 | cluster.Spec.BrokerCount = 0 282 | sts := generateKafkaStatefulset(cluster) 283 | //Downsize Statefulset to 0 284 | err := client.CreateOrUpdateStatefulSet(sts) 285 | if err != nil { 286 | return err 287 | } 288 | //Delete Headless SVC 289 | headlessSVC := generateHeadlessService(cluster) 290 | err = client.DeleteService(headlessSVC) 291 | if err != nil { 292 | return err 293 | } 294 | 295 | //Delete Direct Broker SVCs 296 | svcs := generateDirectBrokerServices(cluster) 297 | for _, svc := range svcs { 298 | err = client.DeleteService(svc) 299 | if err != nil { 300 | return err 301 | } 302 | } 303 | 304 | //Force Delete of Statefulset 305 | err = client.DeleteStatefulset(sts) 306 | if err != nil { 307 | return err 308 | } 309 | 310 | return nil 311 | } 312 | 313 | func CreateCluster(cluster spec.Kafkacluster, client kube.Kubernetes) error { 314 | //Create Headless SVC 315 | headlessSVC := generateHeadlessService(cluster) 316 | err := client.CreateOrUpdateService(headlessSVC) 317 | if err != nil { 318 | return err 319 | } 320 | 321 | sts := generateKafkaStatefulset(cluster) 322 | //Create Broker Cluster 323 | err = client.CreateOrUpdateStatefulSet(sts) 324 | if err != nil { 325 | return err 326 | } 327 | 328 | //CreateDelete Direct Broker SVCs 329 | svcs := generateDirectBrokerServices(cluster) 330 | for _, svc := range svcs { 331 | err = client.CreateOrUpdateService(svc) 332 | if err != nil { 333 | return err 334 | } 335 | } 336 | 337 | return nil 338 | } 339 | 340 | func UpsizeCluster(cluster spec.Kafkacluster, client kube.Kubernetes) error { 341 | return nil 342 | } 343 | 344 | func DownsizeCluster(cluster spec.Kafkacluster, client kube.Kubernetes) error { 345 | return nil 346 | } 347 | 348 | func UpdateStatus(cluster spec.Kafkacluster, client kube.Kubernetes) error { 349 | return nil 350 | } 351 | -------------------------------------------------------------------------------- /kube/kafka/kafka_test.go: -------------------------------------------------------------------------------- 1 | package kafka 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | 7 | "github.com/krallistic/kafka-operator/spec" 8 | "github.com/kylelemons/godebug/pretty" 9 | appsv1Beta1 "k8s.io/api/apps/v1beta1" 10 | "k8s.io/api/core/v1" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | ) 13 | 14 | func TestCreateStsFromSpec(t *testing.T) { 15 | 16 | spec := spec.Kafkacluster{ 17 | ObjectMeta: metav1.ObjectMeta{ 18 | Name: "test-cluster", 19 | Namespace: "test", 20 | }, 21 | Spec: spec.KafkaclusterSpec{ 22 | Image: "testImage", 23 | BrokerCount: 3, 24 | JmxSidecar: false, 25 | ZookeeperConnect: "testZookeeperConnect", 26 | StorageClass: "testStorageClass", 27 | }, 28 | } 29 | 30 | replicas := int32(3) 31 | expected := &appsv1Beta1.StatefulSet{ 32 | ObjectMeta: metav1.ObjectMeta{ 33 | Name: "test-cluster", 34 | Labels: map[string]string{ 35 | "component": "kafka", 36 | "creator": "kafka-operator", 37 | "role": "data", 38 | "name": "test-cluster", 39 | }, 40 | }, 41 | Spec: appsv1Beta1.StatefulSetSpec{ 42 | Replicas: &replicas, 43 | ServiceName: "test-cluster", 44 | Template: v1.PodTemplateSpec{ 45 | ObjectMeta: metav1.ObjectMeta{ 46 | Labels: map[string]string{ 47 | "component": "kafka", 48 | "creator": "kafka-operator", 49 | "role": "data", 50 | "name": "test-cluster", 51 | }, 52 | }, 53 | Spec: v1.PodSpec{ 54 | Affinity: &v1.Affinity{ 55 | PodAntiAffinity: &v1.PodAntiAffinity{ 56 | PreferredDuringSchedulingIgnoredDuringExecution: []v1.WeightedPodAffinityTerm{ 57 | v1.WeightedPodAffinityTerm{ 58 | Weight: 50, 59 | PodAffinityTerm: v1.PodAffinityTerm{ 60 | Namespaces: []string{"test"}, 61 | LabelSelector: &metav1.LabelSelector{ 62 | MatchLabels: map[string]string{ 63 | "component": "kafka", 64 | "creator": "kafka-operator", 65 | "role": "data", 66 | "name": "test-cluster", 67 | }, 68 | }, 69 | TopologyKey: "kubernetes.io/hostname", 70 | }, 71 | }, 72 | }, 73 | }, 74 | }, 75 | }, 76 | }, 77 | }, 78 | } 79 | 80 | created := generateKafkaStatefulset(spec) 81 | 82 | if created == nil { 83 | t.Fatalf("return value should not be nil", created) 84 | } 85 | if !reflect.DeepEqual(created.ObjectMeta, expected.ObjectMeta) || !reflect.DeepEqual(created.Spec.Template.ObjectMeta, expected.Spec.Template.ObjectMeta) { 86 | t.Fatalf("Different Metadata") 87 | } 88 | if *created.Spec.Replicas != *expected.Spec.Replicas { 89 | t.Fatalf("DifferentAmount of replicas ", *created.Spec.Replicas, *expected.Spec.Replicas) 90 | } 91 | if !reflect.DeepEqual(*created.Spec.Template.Spec.Affinity, *expected.Spec.Template.Spec.Affinity) { 92 | t.Fatalf("Different AntiAffintiy", *expected.Spec.Template.Spec.Affinity, *created.Spec.Template.Spec.Affinity) 93 | } 94 | //if !reflect.DeepEqual(created.Spec.VolumeClaimTemplates, expected.Spec.VolumeClaimTemplates) { 95 | // t.Fatalf("Different Volume claim Temlplate", *created.Spec.VolumeClaimTemplates, *expected.Spec.VolumeClaimTemplates) 96 | //} 97 | 98 | } 99 | 100 | func TestGenerateHeadlessService(t *testing.T) { 101 | 102 | spec := spec.Kafkacluster{ 103 | ObjectMeta: metav1.ObjectMeta{ 104 | Name: "test-cluster", 105 | Namespace: "test", 106 | }, 107 | Spec: spec.KafkaclusterSpec{ 108 | Image: "testImage", 109 | BrokerCount: 3, 110 | JmxSidecar: false, 111 | ZookeeperConnect: "testZookeeperConnect", 112 | }, 113 | } 114 | 115 | objectMeta := metav1.ObjectMeta{ 116 | Name: "test-cluster", 117 | Annotations: map[string]string{ 118 | "component": "kafka", 119 | "creator": "kafka-operator", 120 | "role": "data", 121 | "name": "test-cluster", 122 | }, 123 | } 124 | 125 | objectMeta.Labels = map[string]string{ 126 | "service.alpha.kubernetes.io/tolerate-unready-endpoints": "true", 127 | } 128 | 129 | expectedResult := &v1.Service{ 130 | ObjectMeta: objectMeta, 131 | 132 | Spec: v1.ServiceSpec{ 133 | Selector: map[string]string{ 134 | "component": "kafka", 135 | "creator": "kafka-operator", 136 | "role": "data", 137 | "name": "test-cluster", 138 | }, 139 | Ports: []v1.ServicePort{ 140 | v1.ServicePort{ 141 | Name: "broker", 142 | Port: 9092, 143 | }, 144 | }, 145 | ClusterIP: "None", 146 | }, 147 | } 148 | 149 | result := generateHeadlessService(spec) 150 | if result == nil { 151 | t.Fatalf("return value should not be nil", result) 152 | } 153 | if !reflect.DeepEqual(result, expectedResult) { 154 | t.Fatalf("results were not equal", result, expectedResult) 155 | } 156 | } 157 | 158 | func TestGenerateDirectBrokerService(t *testing.T) { 159 | 160 | spec := spec.Kafkacluster{ 161 | ObjectMeta: metav1.ObjectMeta{ 162 | Name: "test-cluster", 163 | Namespace: "test", 164 | }, 165 | Spec: spec.KafkaclusterSpec{ 166 | Image: "testImage", 167 | BrokerCount: 3, 168 | JmxSidecar: false, 169 | ZookeeperConnect: "testZookeeperConnect", 170 | }, 171 | } 172 | expectedResult := []*v1.Service{ 173 | &v1.Service{ 174 | ObjectMeta: metav1.ObjectMeta{ 175 | Name: "test-cluster-broker-0", 176 | Annotations: map[string]string{ 177 | "component": "kafka", 178 | "creator": "kafka-operator", 179 | "role": "data", 180 | "name": "test-cluster", 181 | "kafka_broker_id": "0", 182 | }, 183 | Namespace: "test", 184 | }, 185 | Spec: v1.ServiceSpec{ 186 | Selector: map[string]string{ 187 | "component": "kafka", 188 | "creator": "kafka-operator", 189 | "role": "data", 190 | "name": "test-cluster", 191 | "kafka_broker_id": "0", 192 | }, 193 | Ports: []v1.ServicePort{ 194 | v1.ServicePort{ 195 | Name: "broker", 196 | Port: 9092, 197 | }, 198 | }, 199 | Type: "ClusterIP", 200 | }, 201 | }, 202 | &v1.Service{ 203 | ObjectMeta: metav1.ObjectMeta{ 204 | Name: "test-cluster-broker-1", 205 | Annotations: map[string]string{ 206 | "component": "kafka", 207 | "creator": "kafka-operator", 208 | "role": "data", 209 | "name": "test-cluster", 210 | "kafka_broker_id": "1", 211 | }, 212 | Namespace: "test", 213 | }, 214 | 215 | Spec: v1.ServiceSpec{ 216 | Selector: map[string]string{ 217 | "component": "kafka", 218 | "creator": "kafka-operator", 219 | "role": "data", 220 | "name": "test-cluster", 221 | "kafka_broker_id": "1", 222 | }, 223 | Ports: []v1.ServicePort{ 224 | v1.ServicePort{ 225 | Name: "broker", 226 | Port: 9092, 227 | }, 228 | }, 229 | Type: "ClusterIP", 230 | }, 231 | }, 232 | &v1.Service{ 233 | ObjectMeta: metav1.ObjectMeta{ 234 | Name: "test-cluster-broker-2", 235 | Annotations: map[string]string{ 236 | "component": "kafka", 237 | "creator": "kafka-operator", 238 | "role": "data", 239 | "name": "test-cluster", 240 | "kafka_broker_id": "2", 241 | }, 242 | Namespace: "test", 243 | }, 244 | 245 | Spec: v1.ServiceSpec{ 246 | Selector: map[string]string{ 247 | "component": "kafka", 248 | "creator": "kafka-operator", 249 | "role": "data", 250 | "name": "test-cluster", 251 | "kafka_broker_id": "2", 252 | }, 253 | Ports: []v1.ServicePort{ 254 | v1.ServicePort{ 255 | Name: "broker", 256 | Port: 9092, 257 | }, 258 | }, 259 | Type: "ClusterIP", 260 | }, 261 | }, 262 | } 263 | 264 | result := generateDirectBrokerServices(spec) 265 | if result == nil { 266 | t.Fatalf("return value should not be nil", result) 267 | } 268 | 269 | if diff := pretty.Compare(result, expectedResult); diff != "" { 270 | t.Errorf("%s: diff: (-got +want)\n%s", diff) 271 | } 272 | if !reflect.DeepEqual(result, expectedResult) { 273 | t.Fatalf("results were not equal", result, expectedResult) 274 | } 275 | 276 | } 277 | -------------------------------------------------------------------------------- /kube/kafka/options.go: -------------------------------------------------------------------------------- 1 | package kafka 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | 7 | "strings" 8 | 9 | log "github.com/Sirupsen/logrus" 10 | "github.com/azer/snakecase" 11 | "github.com/krallistic/kafka-operator/spec" 12 | "k8s.io/apimachinery/pkg/api/resource" 13 | "k8s.io/api/core/v1" 14 | ) 15 | 16 | var () 17 | 18 | func ReflectOptionsStruct(v interface{}) []v1.EnvVar { 19 | val := reflect.ValueOf(v) 20 | options := make([]v1.EnvVar, 0) 21 | 22 | for i := 0; i < val.NumField(); i++ { 23 | valueField := val.Field(i) 24 | typeField := val.Type().Field(i) 25 | 26 | if valueField.Kind() == reflect.Interface && !valueField.IsNil() { 27 | elm := valueField.Elem() 28 | if elm.Kind() == reflect.Ptr && !elm.IsNil() && elm.Elem().Kind() == reflect.Ptr { 29 | valueField = elm 30 | } 31 | } 32 | 33 | if valueField.IsNil() { 34 | continue 35 | } else { 36 | if valueField.Kind() == reflect.Ptr { 37 | valueField = valueField.Elem() 38 | } 39 | value := fmt.Sprintf("%v", valueField.Interface()) 40 | name := strings.ToUpper(snakecase.SnakeCase(typeField.Name)) 41 | 42 | env := v1.EnvVar{ 43 | Name: name, 44 | Value: value, 45 | } 46 | options = append(options, env) 47 | } 48 | } 49 | return options 50 | } 51 | 52 | func GetMaxHeap(cluster spec.Kafkacluster) *resource.Quantity { 53 | memory, err := resource.ParseQuantity(cluster.Spec.Resources.Memory) 54 | if err != nil { 55 | memory, _ = resource.ParseQuantity(defaultMemory) 56 | } 57 | 58 | heapsize := int64(float64(memory.ScaledValue(resource.Mega)) * 0.6) 59 | if heapsize > 4096 { 60 | heapsize = 4096 61 | } 62 | maxHeap := resource.NewScaledQuantity(heapsize, resource.Mega) 63 | 64 | return maxHeap 65 | } 66 | 67 | func GetMaxHeapJavaString(cluster spec.Kafkacluster) string { 68 | return "-Xmx" + GetMaxHeap(cluster).String() 69 | } 70 | 71 | func GenerateKafkaOptions(cluster spec.Kafkacluster) []v1.EnvVar { 72 | kafkaOptions := cluster.Spec.KafkaOptions 73 | 74 | structOptions := ReflectOptionsStruct(kafkaOptions) 75 | log.WithFields(log.Fields{ 76 | "method": "GenerateKafkaOptions", 77 | "options": structOptions, 78 | }).Debug("Generated KafkaOptions from Struct to Env Vars") 79 | 80 | staticOptions := []v1.EnvVar{ 81 | v1.EnvVar{ 82 | Name: "NAMESPACE", 83 | ValueFrom: &v1.EnvVarSource{ 84 | FieldRef: &v1.ObjectFieldSelector{ 85 | FieldPath: "metadata.namespace", 86 | }, 87 | }, 88 | }, 89 | v1.EnvVar{ 90 | Name: "KAFKA_ZOOKEEPER_CONNECT", 91 | Value: cluster.Spec.ZookeeperConnect, 92 | }, 93 | v1.EnvVar{ 94 | Name: "KAFKA_HEAP_OPTS", 95 | Value: GetMaxHeapJavaString(cluster), 96 | }, 97 | v1.EnvVar{ 98 | Name: "KAFKA_METRIC_REPORTERS", 99 | Value: "com.linkedin.kafka.cruisecontrol.metricsreporter.CruiseControlMetricsReporter", 100 | }, 101 | v1.EnvVar{ 102 | Name: "KAFKA_CRUISE_CONTROL_METRICS_REPORTER_BOOTSTRAP_SERVER", 103 | Value: fmt.Sprintf("%s-0.%s.%s.svc.cluster.local:9092", cluster.GetObjectMeta().GetName(), cluster.GetObjectMeta().GetName(), cluster.GetObjectMeta().GetNamespace()), 104 | }, 105 | } 106 | 107 | if cluster.Spec.BrokerCount < 3 { 108 | offset_topic := v1.EnvVar{ 109 | Name: "KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR", 110 | Value: "1", 111 | } 112 | staticOptions = append(staticOptions, offset_topic) 113 | } 114 | 115 | options := append(structOptions, staticOptions...) 116 | fmt.Println(options) 117 | return options 118 | } 119 | -------------------------------------------------------------------------------- /kube/kafka/options_test.go: -------------------------------------------------------------------------------- 1 | package kafka 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | "testing" 7 | 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/api/core/v1" 10 | 11 | "github.com/krallistic/kafka-operator/spec" 12 | ) 13 | 14 | func TestGenerateKafkaOptions(t *testing.T) { 15 | topicsCreate := true 16 | logRetentionBytes := "testLogRetentionTime" 17 | testClusterSpec := spec.Kafkacluster{ 18 | ObjectMeta: metav1.ObjectMeta{ 19 | Name: "test-cluster", 20 | Namespace: "test", 21 | }, 22 | Spec: spec.KafkaclusterSpec{ 23 | 24 | KafkaOptions: spec.KafkaOptions{ 25 | AutoCreateTopicsEnable: &topicsCreate, 26 | LogRetentionBytes: &logRetentionBytes, 27 | }, 28 | }, 29 | } 30 | 31 | expectedResult := []v1.EnvVar{ 32 | v1.EnvVar{ 33 | Name: "AUTO_CREATE_TOPICS_ENABLE", 34 | Value: "true", 35 | }, 36 | v1.EnvVar{ 37 | Name: "LOG_RETENTION_BYTES", 38 | Value: "testLogRetentionTime", 39 | }, 40 | v1.EnvVar{ 41 | Name: "NAMESPACE", 42 | ValueFrom: &v1.EnvVarSource{ 43 | FieldRef: &v1.ObjectFieldSelector{ 44 | FieldPath: "metadata.namespace", 45 | }, 46 | }, 47 | }, 48 | v1.EnvVar{ 49 | Name: "KAFKA_ZOOKEEPER_CONNECT", 50 | }, 51 | v1.EnvVar{ 52 | Name: "KAFKA_HEAP_OPTS", 53 | Value: "-Xmx2577M", 54 | }, 55 | v1.EnvVar{ 56 | Name: "KAFKA_METRIC_REPORTERS", 57 | Value: "com.linkedin.kafka.cruisecontrol.metricsreporter.CruiseControlMetricsReporter", 58 | }, 59 | v1.EnvVar{ 60 | Name: "KAFKA_CRUISE_CONTROL_METRICS_REPORTER_BOOTSTRAP_SERVER", 61 | Value: "test-cluster-0.test-cluster.test.svc.cluster.local:9092", 62 | }, 63 | v1.EnvVar{ 64 | Name: "KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR", 65 | Value: "1", 66 | }, 67 | } 68 | 69 | result := GenerateKafkaOptions(testClusterSpec) 70 | if result == nil { 71 | t.Fatalf("return value should not be nil", result) 72 | } 73 | if !reflect.DeepEqual(result, expectedResult) { 74 | t.Fatalf("results were not equal") 75 | } 76 | 77 | fmt.Println(result) 78 | 79 | } 80 | -------------------------------------------------------------------------------- /kube/kube.go: -------------------------------------------------------------------------------- 1 | package kube 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | k8sclient "k8s.io/client-go/kubernetes" 6 | "k8s.io/client-go/rest" 7 | "k8s.io/client-go/tools/clientcmd" 8 | 9 | log "github.com/Sirupsen/logrus" 10 | ) 11 | 12 | var ( 13 | logger = log.WithFields(log.Fields{ 14 | "package": "kube", 15 | }) 16 | ) 17 | 18 | type Kubernetes struct { 19 | Client *k8sclient.Clientset 20 | MasterHost string 21 | DefaultOption metav1.GetOptions 22 | DeleteOption metav1.DeleteOptions 23 | } 24 | 25 | func New(kubeConfigFile, masterHost string) (*Kubernetes, error) { 26 | methodLogger := logger.WithFields(log.Fields{"method": "New"}) 27 | 28 | // Create the client config. Use kubeconfig if given, otherwise assume in-cluster. 29 | client, err := NewKubeClient(kubeConfigFile) 30 | if err != nil { 31 | methodLogger.WithFields(log.Fields{ 32 | "error": err, 33 | "config": kubeConfigFile, 34 | "client": client, 35 | }).Error("could not init Kubernetes client") 36 | return nil, err 37 | } 38 | 39 | k := &Kubernetes{ 40 | Client: client, 41 | MasterHost: masterHost, 42 | } 43 | methodLogger.WithFields(log.Fields{ 44 | "config": kubeConfigFile, 45 | "client": client, 46 | }).Debug("Initilized kubernetes cLient") 47 | 48 | return k, nil 49 | } 50 | 51 | func BuildConfig(kubeconfig string) (*rest.Config, error) { 52 | if kubeconfig != "" { 53 | return clientcmd.BuildConfigFromFlags("", kubeconfig) 54 | } 55 | return rest.InClusterConfig() 56 | } 57 | 58 | //TODO refactor for config *rest.Config :) 59 | func NewKubeClient(kubeCfgFile string) (*k8sclient.Clientset, error) { 60 | 61 | config, err := BuildConfig(kubeCfgFile) 62 | if err != nil { 63 | return nil, err 64 | } 65 | 66 | //TODO refactor & log errors 67 | return k8sclient.NewForConfig(config) 68 | } 69 | -------------------------------------------------------------------------------- /kube/service-helper.go: -------------------------------------------------------------------------------- 1 | package kube 2 | 3 | import ( 4 | "k8s.io/api/core/v1" 5 | "k8s.io/apimachinery/pkg/api/errors" 6 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 7 | 8 | log "github.com/Sirupsen/logrus" 9 | ) 10 | 11 | func (k *Kubernetes) updateService(service *v1.Service) error { 12 | 13 | _, err := k.Client.Core().Services(service.ObjectMeta.Namespace).Update(service) 14 | return err 15 | } 16 | 17 | func (k *Kubernetes) createService(service *v1.Service) error { 18 | _, err := k.Client.Core().Services(service.ObjectMeta.Namespace).Create(service) 19 | 20 | return err 21 | } 22 | 23 | func (k *Kubernetes) deleteService(service *v1.Service) error { 24 | var gracePeriod int64 25 | gracePeriod = 10 26 | 27 | deleteOption := metav1.DeleteOptions{ 28 | GracePeriodSeconds: &gracePeriod, 29 | } 30 | err := k.Client.Core().Services(service.ObjectMeta.Namespace).Delete(service.ObjectMeta.Name, &deleteOption) 31 | return err 32 | } 33 | 34 | func (k *Kubernetes) serviceExists(service *v1.Service) (bool, error) { 35 | methodLogger := logger.WithFields(log.Fields{ 36 | "method": "CreateOrUpdateService", 37 | "name": service.ObjectMeta.Name, 38 | "namespace": service.ObjectMeta.Namespace, 39 | }) 40 | namespace := service.ObjectMeta.Namespace 41 | svc, err := k.Client.Core().Services(namespace).Get(service.ObjectMeta.Name, k.DefaultOption) 42 | 43 | if err != nil { 44 | if errors.IsNotFound(err) { 45 | methodLogger.Debug("Service dosnt exist") 46 | return false, nil 47 | } else { 48 | methodLogger.WithFields(log.Fields{ 49 | "error": err, 50 | }).Error("Cant get Service INFO from API") 51 | return false, err 52 | } 53 | 54 | } 55 | if len(svc.Name) == 0 { 56 | methodLogger.Debug("Service.Name == 0, therefore it dosnt exists") 57 | return false, nil 58 | } 59 | return true, nil 60 | } 61 | 62 | // Deploys the given service into kubernetes, error is returned if a non recoverable error happens 63 | func (k *Kubernetes) CreateOrUpdateService(service *v1.Service) error { 64 | methodLogger := logger.WithFields(log.Fields{ 65 | "method": "CreateOrUpdateService", 66 | "name": service.ObjectMeta.Name, 67 | "namespace": service.ObjectMeta.Namespace, 68 | }) 69 | 70 | exists, err := k.serviceExists(service) 71 | if err != nil { 72 | methodLogger.WithField("error", err).Error("Error while checking if services exists") 73 | return err 74 | } 75 | if exists { 76 | err = k.createService(service) 77 | } else { 78 | err = k.updateService(service) 79 | } 80 | if err != nil { 81 | methodLogger.WithField("error", err).Error("Error while creating or updating service") 82 | } 83 | return err 84 | } 85 | 86 | func (k *Kubernetes) DeleteService(service *v1.Service) error { 87 | methodLogger := logger.WithFields(log.Fields{ 88 | "method": "DeleteService", 89 | "name": service.ObjectMeta.Name, 90 | "namespace": service.ObjectMeta.Namespace, 91 | }) 92 | exists, err := k.serviceExists(service) 93 | if err != nil { 94 | methodLogger.WithField("error", err).Error("Error while checking if services exists") 95 | return err 96 | } 97 | if exists { 98 | err = k.deleteService(service) 99 | if err != nil { 100 | methodLogger.WithField("error", err).Error("Can delete service") 101 | return err 102 | } 103 | } else { 104 | methodLogger.Debug("Trying to delete but Service dosnt exist.") 105 | 106 | } 107 | return nil 108 | } 109 | -------------------------------------------------------------------------------- /kube/statefulset-helper.go: -------------------------------------------------------------------------------- 1 | package kube 2 | 3 | import ( 4 | "time" 5 | 6 | appsv1Beta1 "k8s.io/api/apps/v1beta1" 7 | "k8s.io/apimachinery/pkg/api/errors" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | 10 | log "github.com/Sirupsen/logrus" 11 | ) 12 | 13 | func (k *Kubernetes) updateStatefulSet(statefulset *appsv1Beta1.StatefulSet) error { 14 | _, err := k.Client.AppsV1beta1().StatefulSets(statefulset.ObjectMeta.Namespace).Update(statefulset) 15 | 16 | return err 17 | } 18 | 19 | func (k *Kubernetes) deleteStatefulSet(statefulset *appsv1Beta1.StatefulSet) error { 20 | var gracePeriod int64 21 | gracePeriod = 1 22 | 23 | deleteOption := metav1.DeleteOptions{ 24 | GracePeriodSeconds: &gracePeriod, 25 | } 26 | err := k.Client.AppsV1beta1().StatefulSets(statefulset.ObjectMeta.Namespace).Delete(statefulset.ObjectMeta.Name, &deleteOption) 27 | 28 | return err 29 | } 30 | func (k *Kubernetes) createStatefulSet(statefulset *appsv1Beta1.StatefulSet) error { 31 | _, err := k.Client.AppsV1beta1().StatefulSets(statefulset.ObjectMeta.Namespace).Create(statefulset) 32 | return err 33 | } 34 | 35 | func (k *Kubernetes) statefulsetExists(statefulset *appsv1Beta1.StatefulSet) (bool, error) { 36 | methodLogger := logger.WithFields(log.Fields{ 37 | "method": "CreateOrUpdateStatefulSet", 38 | "name": statefulset.ObjectMeta.Name, 39 | "namespace": statefulset.ObjectMeta.Namespace, 40 | }) 41 | namespace := statefulset.ObjectMeta.Namespace 42 | sts, err := k.Client.AppsV1beta1().StatefulSets(namespace).Get(statefulset.ObjectMeta.Name, k.DefaultOption) 43 | 44 | if err != nil { 45 | if errors.IsNotFound(err) { 46 | methodLogger.Debug("StatefulSet dosnt exist") 47 | return false, nil 48 | } else { 49 | methodLogger.WithFields(log.Fields{ 50 | "error": err, 51 | }).Error("Cant get StatefulSet INFO from API") 52 | return false, err 53 | } 54 | 55 | } 56 | if len(sts.Name) == 0 { 57 | methodLogger.Debug("StatefulSet.Name == 0, therefore it dosnt exists") 58 | return false, nil 59 | } 60 | return true, nil 61 | } 62 | 63 | // Deploys the given statefulset into kubernetes, error is returned if a non recoverable error happens 64 | func (k *Kubernetes) CreateOrUpdateStatefulSet(statefulset *appsv1Beta1.StatefulSet) error { 65 | methodLogger := logger.WithFields(log.Fields{ 66 | "method": "CreateOrUpdateStatefulSet", 67 | "name": statefulset.ObjectMeta.Name, 68 | "namespace": statefulset.ObjectMeta.Namespace, 69 | }) 70 | 71 | exists, err := k.statefulsetExists(statefulset) 72 | if err != nil { 73 | methodLogger.WithField("error", err).Error("Error while checking if statefulsets exists") 74 | return err 75 | } 76 | if exists { 77 | err = k.createStatefulSet(statefulset) 78 | } else { 79 | err = k.updateStatefulSet(statefulset) 80 | } 81 | if err != nil { 82 | methodLogger.WithField("error", err).Error("Error while creating or updating statefulset") 83 | } 84 | return err 85 | } 86 | 87 | func (k *Kubernetes) DeleteStatefulset(statefulset *appsv1Beta1.StatefulSet) error { 88 | methodLogger := logger.WithFields(log.Fields{ 89 | "method": "DeleteStatefulset", 90 | "name": statefulset.ObjectMeta.Name, 91 | "namespace": statefulset.ObjectMeta.Namespace, 92 | }) 93 | exists, err := k.statefulsetExists(statefulset) 94 | if err != nil { 95 | methodLogger.WithField("error", err).Error("Error while checking if statefulsets exists") 96 | return err 97 | } 98 | if exists { 99 | replicas := int32(0) 100 | statefulset.Spec.Replicas = &replicas 101 | err = k.updateStatefulSet(statefulset) 102 | if err != nil { 103 | methodLogger.WithField("error", err).Warn("Error while scaling statefulset down to 0, ignoring since deleting afterwards") 104 | } 105 | methodLogger.Info("Sleeping 15s per Broker to let Statefulset scale down ") 106 | time.Sleep(time.Duration(int(*statefulset.Spec.Replicas)) * time.Second * 15) 107 | err = k.deleteStatefulSet(statefulset) 108 | if err != nil { 109 | methodLogger.WithField("error", err).Error("Can delete statefulset") 110 | return err 111 | } 112 | } else { 113 | methodLogger.Debug("Trying to delete but Statefulset dosnt exist.") 114 | 115 | } 116 | return nil 117 | } 118 | -------------------------------------------------------------------------------- /processor/metrics.go: -------------------------------------------------------------------------------- 1 | package processor 2 | 3 | import "github.com/prometheus/client_golang/prometheus" 4 | 5 | var ( 6 | clustersTotal = prometheus.NewGauge(prometheus.GaugeOpts{ 7 | Namespace: "kafka_operator", 8 | Subsystem: "processor", 9 | Name: "clusters", 10 | Help: "Total number of clusters managed by the processor", 11 | }) 12 | 13 | clustersCreated = prometheus.NewCounter(prometheus.CounterOpts{ 14 | Namespace: "kafka_operator", 15 | Subsystem: "processor", 16 | Name: "clusters_created", 17 | Help: "Total number of clusters created", 18 | }) 19 | 20 | clustersDeleted = prometheus.NewCounter(prometheus.CounterOpts{ 21 | Namespace: "kafka_operator", 22 | Subsystem: "processor", 23 | Name: "clusters_deleted", 24 | Help: "Total number of clusters deleted", 25 | }) 26 | 27 | clustersModified = prometheus.NewCounter(prometheus.CounterOpts{ 28 | Namespace: "kafka_operator", 29 | Subsystem: "processor", 30 | Name: "clusters_modified", 31 | Help: "Total number of clusters modified", 32 | }) 33 | 34 | internalErrors = prometheus.NewCounter(prometheus.CounterOpts{ 35 | Namespace: "kafka_operator", 36 | Subsystem: "processor", 37 | Name: "internal_errors", 38 | Help: "Total Number of errors occured somewhere inside the operator", 39 | }) 40 | ) 41 | 42 | func init() { 43 | prometheus.MustRegister(clustersTotal) 44 | prometheus.MustRegister(clustersCreated) 45 | prometheus.MustRegister(clustersDeleted) 46 | prometheus.MustRegister(clustersModified) 47 | prometheus.MustRegister(internalErrors) 48 | } 49 | -------------------------------------------------------------------------------- /processor/processor.go: -------------------------------------------------------------------------------- 1 | package processor 2 | 3 | import ( 4 | "reflect" 5 | "time" 6 | 7 | log "github.com/Sirupsen/logrus" 8 | 9 | "github.com/krallistic/kafka-operator/controller" 10 | "github.com/krallistic/kafka-operator/kafka" 11 | "github.com/krallistic/kafka-operator/kube" 12 | spec "github.com/krallistic/kafka-operator/spec" 13 | 14 | cruisecontrol_kube "github.com/krallistic/kafka-operator/kube/cruisecontrol" 15 | exporter_kube "github.com/krallistic/kafka-operator/kube/exporter" 16 | kafka_kube "github.com/krallistic/kafka-operator/kube/kafka" 17 | ) 18 | 19 | type Processor struct { 20 | baseBrokerImage string 21 | crdController controller.CustomResourceController 22 | kafkaClusters map[string]*spec.Kafkacluster 23 | watchEventsChannel chan spec.KafkaclusterWatchEvent 24 | clusterEvents chan spec.KafkaclusterEvent 25 | kafkaClient map[string]*kafka.KafkaUtil 26 | control chan int 27 | errors chan error 28 | kube kube.Kubernetes 29 | } 30 | 31 | func New(image string, 32 | crdClient controller.CustomResourceController, 33 | control chan int, 34 | kube kube.Kubernetes, 35 | ) (*Processor, error) { 36 | p := &Processor{ 37 | baseBrokerImage: image, 38 | kafkaClusters: make(map[string]*spec.Kafkacluster), 39 | watchEventsChannel: make(chan spec.KafkaclusterWatchEvent, 100), 40 | clusterEvents: make(chan spec.KafkaclusterEvent, 100), 41 | crdController: crdClient, 42 | kafkaClient: make(map[string]*kafka.KafkaUtil), 43 | control: control, 44 | errors: make(chan error), 45 | kube: kube, 46 | } 47 | log.Info("Created Processor") 48 | return p, nil 49 | } 50 | 51 | func (p *Processor) initKafkaClient(cluster spec.Kafkacluster) error { 52 | methodLogger := log.WithFields(log.Fields{ 53 | "method": "initKafkaClient", 54 | "clusterName": cluster.ObjectMeta.Name, 55 | "zookeeperConnectL": cluster.Spec.ZookeeperConnect, 56 | }) 57 | methodLogger.Info("Creating KafkaCLient for cluster") 58 | 59 | client, err := kafka.New(cluster) 60 | if err != nil { 61 | internalErrors.Inc() 62 | return err 63 | } 64 | 65 | //TODO can metadata.uuid used? check how that changed 66 | name := p.getClusterUUID(cluster) 67 | p.kafkaClient[name] = client 68 | 69 | methodLogger.Info("Create KakfaClient for cluser") 70 | return nil 71 | } 72 | 73 | func (p *Processor) Run() error { 74 | //TODO getListOfAlredyRunningCluster/Refresh 75 | log.Info("Running Processor") 76 | p.watchEvents() 77 | return nil 78 | } 79 | 80 | //We detect basic change through the event type, beyond that we use the API server to find differences. 81 | //Functions compares the KafkaClusterSpec with the real Pods/Services which are there. 82 | //We do that because otherwise we would have to use a local state to track changes. 83 | func (p *Processor) DetectChangeType(event spec.KafkaclusterWatchEvent) spec.KafkaclusterEvent { 84 | methodLogger := log.WithFields(log.Fields{ 85 | "method": "DetectChangeType", 86 | "clusterName": event.Object.ObjectMeta.Name, 87 | "eventType": event.Type, 88 | }) 89 | methodLogger.Debug("Detecting type of change in Kafka CRD") 90 | 91 | //TODO multiple changes in one Update? right now we only detect one change 92 | clusterEvent := spec.KafkaclusterEvent{ 93 | Cluster: event.Object, 94 | } 95 | if event.Type == "ADDED" { 96 | clusterEvent.Type = spec.NEW_CLUSTER 97 | return clusterEvent 98 | } 99 | if event.Type == "DELETED" { 100 | clusterEvent.Type = spec.DELETE_CLUSTER 101 | return clusterEvent 102 | 103 | } 104 | //EVENT type must be modfied now. 105 | oldCluster := event.OldObject 106 | newCluster := event.Object 107 | 108 | if reflect.DeepEqual(oldCluster, spec.Kafkacluster{}) { 109 | methodLogger.Error("Got changed type, but either new or old object is nil") 110 | clusterEvent.Type = spec.ERROR_STATE 111 | return clusterEvent 112 | } 113 | 114 | methodLogger = methodLogger.WithFields(log.Fields{ 115 | "oldCluster": oldCluster, 116 | "newCluster": newCluster, 117 | }) 118 | 119 | clusterEvent.OldCluster = event.OldObject 120 | 121 | if !reflect.DeepEqual(oldCluster.State, newCluster.State) { 122 | methodLogger.Debug("Cluster State different, doing nothing") 123 | clusterEvent.Type = spec.STATE_CHANGE 124 | return clusterEvent 125 | } else if !reflect.DeepEqual(oldCluster.Scale, newCluster.Scale) { 126 | methodLogger.Debug("Cluster Scale different, doing nothing") 127 | clusterEvent.Type = spec.SCALE_CHANGE 128 | return clusterEvent 129 | } else if oldCluster.Spec.Image != newCluster.Spec.Image { 130 | clusterEvent.Type = spec.CHANGE_IMAGE 131 | return clusterEvent 132 | } else if oldCluster.Spec.BrokerCount < newCluster.Spec.BrokerCount { 133 | clusterEvent.Type = spec.UPSIZE_CLUSTER 134 | return clusterEvent 135 | } else if oldCluster.Spec.BrokerCount > newCluster.Spec.BrokerCount { 136 | clusterEvent.Type = spec.DOWNSIZE_CLUSTER 137 | return clusterEvent 138 | } else { 139 | clusterEvent.Type = spec.UNKNOWN_CHANGE 140 | //TODO change to reconsilation event? 141 | methodLogger.Error("Unknown Event found") 142 | return clusterEvent 143 | } 144 | 145 | clusterEvent.Type = spec.UNKNOWN_CHANGE 146 | return clusterEvent 147 | } 148 | 149 | func (p *Processor) getClusterUUID(cluster spec.Kafkacluster) string { 150 | return cluster.ObjectMeta.Namespace + "-" + cluster.ObjectMeta.Name 151 | } 152 | 153 | //Takes in raw Kafka events, lets then detected and the proced to initiate action accoriding to the detected event. 154 | func (p *Processor) processEvent(currentEvent spec.KafkaclusterEvent) { 155 | methodLogger := log.WithFields(log.Fields{ 156 | "method": "processEvent", 157 | "clusterName": currentEvent.Cluster.ObjectMeta.Name, 158 | "KafkaClusterEventType": currentEvent.Type, 159 | }) 160 | methodLogger.Debug("Recieved Event, processing") 161 | switch currentEvent.Type { 162 | case spec.NEW_CLUSTER: 163 | methodLogger.WithField("event-type", spec.NEW_CLUSTER).Info("New CRD added, creating cluster") 164 | p.createKafkaCluster(currentEvent.Cluster) 165 | 166 | clustersTotal.Inc() 167 | clustersCreated.Inc() 168 | 169 | methodLogger.Info("Init heartbeat type checking...") 170 | //TODO rename 171 | clusterEvent := spec.KafkaclusterEvent{ 172 | Cluster: currentEvent.Cluster, 173 | Type: spec.KAKFA_EVENT, 174 | } 175 | p.sleep30AndSendEvent(clusterEvent) 176 | break 177 | 178 | case spec.DELETE_CLUSTER: 179 | methodLogger.WithField("event-type", spec.DELETE_CLUSTER).Info("Delete Cluster, deleting all Objects ") 180 | 181 | p.deleteKafkaCluster(currentEvent.Cluster) 182 | 183 | go func() { 184 | time.Sleep(time.Duration(currentEvent.Cluster.Spec.BrokerCount) * time.Minute) 185 | //TODO dynamic sleep, depending till sts is completely scaled down. 186 | clusterEvent := spec.KafkaclusterEvent{ 187 | Cluster: currentEvent.Cluster, 188 | Type: spec.CLEANUP_EVENT, 189 | } 190 | p.clusterEvents <- clusterEvent 191 | }() 192 | clustersTotal.Dec() 193 | clustersDeleted.Inc() 194 | case spec.CHANGE_IMAGE: 195 | methodLogger.Info("Change Image Event detected, updating StatefulSet to trigger a new rollout") 196 | methodLogger.Error("Not Implemented Currently") 197 | clustersModified.Inc() 198 | case spec.UPSIZE_CLUSTER: 199 | methodLogger.Warn("Upsize Cluster, changing StatefulSet with higher Replicas, no Rebalacing") 200 | if kafka_kube.UpsizeCluster(currentEvent.Cluster, p.kube) != nil { 201 | internalErrors.Inc() 202 | p.sleep30AndSendEvent(currentEvent) 203 | break 204 | } 205 | clustersModified.Inc() 206 | case spec.UNKNOWN_CHANGE: 207 | methodLogger.Warn("Unknown (or unsupported) change occured, doing nothing. Maybe manually check the cluster") 208 | clustersModified.Inc() 209 | case spec.DOWNSIZE_CLUSTER: 210 | //TODO remove poor mans casting :P 211 | //TODO support Downsizing Multiple Brokers 212 | brokerToDelete := currentEvent.Cluster.Spec.BrokerCount - 0 213 | methodLogger.Info("Downsizing Broker, deleting Data on Broker: ", brokerToDelete) 214 | 215 | //TODO INIT CC Rebalance 216 | 217 | //TODO wait till rebalcing complete 218 | err := kafka_kube.DownsizeCluster(currentEvent.Cluster, p.kube) 219 | if err != nil { 220 | //just re-try delete event 221 | internalErrors.Inc() 222 | p.sleep30AndSendEvent(currentEvent) 223 | break 224 | } 225 | clustersModified.Inc() 226 | case spec.CHANGE_ZOOKEEPER_CONNECT: 227 | methodLogger.Error("Trying to change zookeeper connect, not supported currently") 228 | clustersModified.Inc() 229 | case spec.CLEANUP_EVENT: 230 | methodLogger.Info("Recieved CleanupEvent, force delete of StatefuleSet.") 231 | //p.util.CleanupKafkaCluster(currentEvent.Cluster) 232 | clustersModified.Inc() 233 | case spec.KAKFA_EVENT: 234 | methodLogger.Debug("Kafka Event, heartbeat etc..") 235 | p.sleep30AndSendEvent(currentEvent) 236 | 237 | } 238 | } 239 | 240 | func (p *Processor) sleep30AndSendEvent(currentEvent spec.KafkaclusterEvent) { 241 | p.sleepAndSendEvent(currentEvent, 30) 242 | } 243 | 244 | func (p *Processor) sleepAndSendEvent(currentEvent spec.KafkaclusterEvent, seconds int) { 245 | go func() { 246 | time.Sleep(time.Second * time.Duration(seconds)) 247 | p.clusterEvents <- currentEvent 248 | }() 249 | } 250 | 251 | //Creates inside a goroutine a watch channel on the KafkaCLuster Endpoint and distibutes the events. 252 | //control chan used for showdown events from outside 253 | func (p *Processor) watchEvents() { 254 | 255 | p.crdController.MonitorKafkaEvents(p.watchEventsChannel, p.control) 256 | log.Debug("Watching Events") 257 | go func() { 258 | for { 259 | select { 260 | case currentEvent := <-p.watchEventsChannel: 261 | classifiedEvent := p.DetectChangeType(currentEvent) 262 | p.clusterEvents <- classifiedEvent 263 | case clusterEvent := <-p.clusterEvents: 264 | p.processEvent(clusterEvent) 265 | case err := <-p.errors: 266 | log.WithField("error", err).Error("Recieved Error through error channel") 267 | case ctl := <-p.control: 268 | log.WithField("control-event", ctl).Warn("Recieved Something on Control Channel, shutting down") 269 | return 270 | } 271 | } 272 | }() 273 | } 274 | 275 | // CreateKafkaCluster with the following components: Service, Volumes, StatefulSet. 276 | //Maybe move this also into util 277 | func (p *Processor) createKafkaCluster(clusterSpec spec.Kafkacluster) { 278 | methodLogger := log.WithFields(log.Fields{ 279 | "method": "CreateKafkaCluster", 280 | "clusterName": clusterSpec.ObjectMeta.Name, 281 | }) 282 | 283 | err := kafka_kube.CreateCluster(clusterSpec, p.kube) 284 | if err != nil { 285 | methodLogger.WithField("error", err).Fatal("Cant create statefulset") 286 | } 287 | 288 | p.initKafkaClient(clusterSpec) 289 | 290 | err = exporter_kube.DeployOffsetMonitor(clusterSpec, p.kube) 291 | if err != nil { 292 | methodLogger.WithField("error", err).Fatal("Cant deploy stats exporter") 293 | } 294 | 295 | err = cruisecontrol_kube.DeployCruiseControl(clusterSpec, p.kube) 296 | if err != nil { 297 | methodLogger.WithField("error", err).Fatal("Cant deploy cruise-control exporter") 298 | } 299 | 300 | } 301 | 302 | func (p *Processor) deleteKafkaCluster(clusterSpec spec.Kafkacluster) error { 303 | client := p.kube 304 | err := exporter_kube.DeleteOffsetMonitor(clusterSpec, client) 305 | if err != nil { 306 | return err 307 | } 308 | err = cruisecontrol_kube.DeleteCruiseControl(clusterSpec, client) 309 | if err != nil { 310 | //Error while deleting, just resubmit event after wait time. 311 | return err 312 | 313 | } 314 | err = kafka_kube.DeleteCluster(clusterSpec, client) 315 | if err != nil { 316 | //Error while deleting, just resubmit event after wait time. 317 | return err 318 | } 319 | return nil 320 | } 321 | -------------------------------------------------------------------------------- /processor/processor_test.go: -------------------------------------------------------------------------------- 1 | package processor 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | 7 | "github.com/krallistic/kafka-operator/controller" 8 | "github.com/krallistic/kafka-operator/kafka" 9 | spec "github.com/krallistic/kafka-operator/spec" 10 | ) 11 | 12 | func TestProcessor_DetectChangeType(t *testing.T) { 13 | type fields struct { 14 | baseBrokerImage string 15 | crdController controller.CustomResourceController 16 | kafkaClusters map[string]*spec.Kafkacluster 17 | watchEventsChannel chan spec.KafkaclusterWatchEvent 18 | clusterEvents chan spec.KafkaclusterEvent 19 | kafkaClient map[string]*kafka.KafkaUtil 20 | control chan int 21 | errors chan error 22 | } 23 | type args struct { 24 | event spec.KafkaclusterWatchEvent 25 | } 26 | tests := []struct { 27 | name string 28 | fields fields 29 | args args 30 | want spec.KafkaclusterEvent 31 | }{ 32 | { 33 | "detect error state change if everything is nil", 34 | fields{}, 35 | args{ 36 | event: spec.KafkaclusterWatchEvent{}, 37 | }, 38 | spec.KafkaclusterEvent{Type: spec.ERROR_STATE, Cluster: spec.Kafkacluster{}}, 39 | }, 40 | { 41 | "detect if state is changed", 42 | fields{}, 43 | args{ 44 | event: spec.KafkaclusterWatchEvent{ 45 | Type: "TEST", 46 | Object: spec.Kafkacluster{ 47 | State: spec.KafkaclusterState{ 48 | Status: "stateAfter", 49 | }, 50 | }, 51 | OldObject: spec.Kafkacluster{ 52 | State: spec.KafkaclusterState{ 53 | Status: "stateBefore", 54 | }, 55 | }, 56 | }, 57 | }, 58 | spec.KafkaclusterEvent{ 59 | Type: spec.STATE_CHANGE, 60 | Cluster: spec.Kafkacluster{ 61 | State: spec.KafkaclusterState{ 62 | Status: "stateAfter", 63 | }, 64 | }, 65 | OldCluster: spec.Kafkacluster{ 66 | State: spec.KafkaclusterState{ 67 | Status: "stateBefore", 68 | }, 69 | }, 70 | }, 71 | }, 72 | { 73 | "replica count changed, upscale", 74 | fields{}, 75 | args{ 76 | event: spec.KafkaclusterWatchEvent{ 77 | Type: "TEST", 78 | Object: spec.Kafkacluster{ 79 | Spec: spec.KafkaclusterSpec{ 80 | BrokerCount: 3, 81 | }, 82 | }, 83 | OldObject: spec.Kafkacluster{ 84 | Spec: spec.KafkaclusterSpec{ 85 | BrokerCount: 2, 86 | }, 87 | }, 88 | }, 89 | }, 90 | spec.KafkaclusterEvent{ 91 | Type: spec.UPSIZE_CLUSTER, 92 | Cluster: spec.Kafkacluster{ 93 | Spec: spec.KafkaclusterSpec{ 94 | BrokerCount: 3, 95 | }, 96 | }, 97 | OldCluster: spec.Kafkacluster{ 98 | Spec: spec.KafkaclusterSpec{ 99 | BrokerCount: 2, 100 | }, 101 | }, 102 | }, 103 | }, 104 | { 105 | "replica count changed, downsclale", 106 | fields{}, 107 | args{ 108 | event: spec.KafkaclusterWatchEvent{ 109 | Type: "TEST", 110 | Object: spec.Kafkacluster{ 111 | Spec: spec.KafkaclusterSpec{ 112 | BrokerCount: 2, 113 | }, 114 | }, 115 | OldObject: spec.Kafkacluster{ 116 | Spec: spec.KafkaclusterSpec{ 117 | BrokerCount: 3, 118 | }, 119 | }, 120 | }, 121 | }, 122 | spec.KafkaclusterEvent{ 123 | Type: spec.DOWNSIZE_CLUSTER, 124 | Cluster: spec.Kafkacluster{ 125 | Spec: spec.KafkaclusterSpec{ 126 | BrokerCount: 2, 127 | }, 128 | }, 129 | OldCluster: spec.Kafkacluster{ 130 | Spec: spec.KafkaclusterSpec{ 131 | BrokerCount: 3, 132 | }, 133 | }, 134 | }, 135 | }, 136 | { 137 | "add cluster event", 138 | fields{}, 139 | args{ 140 | event: spec.KafkaclusterWatchEvent{ 141 | Type: "ADDED", 142 | Object: spec.Kafkacluster{ 143 | Spec: spec.KafkaclusterSpec{ 144 | BrokerCount: 2, 145 | }, 146 | }, 147 | }, 148 | }, 149 | spec.KafkaclusterEvent{ 150 | Type: spec.NEW_CLUSTER, 151 | Cluster: spec.Kafkacluster{ 152 | Spec: spec.KafkaclusterSpec{ 153 | BrokerCount: 2, 154 | }, 155 | }, 156 | }, 157 | }, 158 | { 159 | "delete cluster event", 160 | fields{}, 161 | args{ 162 | event: spec.KafkaclusterWatchEvent{ 163 | Type: "DELETED", 164 | Object: spec.Kafkacluster{ 165 | Spec: spec.KafkaclusterSpec{ 166 | BrokerCount: 2, 167 | }, 168 | }, 169 | }, 170 | }, 171 | spec.KafkaclusterEvent{ 172 | Type: spec.DELETE_CLUSTER, 173 | Cluster: spec.Kafkacluster{ 174 | Spec: spec.KafkaclusterSpec{ 175 | BrokerCount: 2, 176 | }, 177 | }, 178 | }, 179 | }, 180 | // TODO: Add test cases. 181 | } 182 | 183 | for _, tt := range tests { 184 | t.Run(tt.name, func(t *testing.T) { 185 | p := &Processor{ 186 | baseBrokerImage: tt.fields.baseBrokerImage, 187 | crdController: tt.fields.crdController, 188 | kafkaClusters: tt.fields.kafkaClusters, 189 | watchEventsChannel: tt.fields.watchEventsChannel, 190 | clusterEvents: tt.fields.clusterEvents, 191 | kafkaClient: tt.fields.kafkaClient, 192 | control: tt.fields.control, 193 | errors: tt.fields.errors, 194 | } 195 | if got := p.DetectChangeType(tt.args.event); !reflect.DeepEqual(got, tt.want) { 196 | t.Errorf("Processor.DetectChangeType() = %v, want %v", got, tt.want) 197 | } 198 | }) 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /spec/register.go: -------------------------------------------------------------------------------- 1 | package spec 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | "k8s.io/apimachinery/pkg/runtime" 6 | "k8s.io/apimachinery/pkg/runtime/schema" 7 | ) 8 | 9 | var ( 10 | SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) 11 | AddToScheme = SchemeBuilder.AddToScheme 12 | ) 13 | 14 | const ( 15 | CRDGroupName = "krallistic.github.com" 16 | CRDRessourcePlural = "kafkaclusters" 17 | CRDName = "kafkacluster" 18 | CRDVersion = "v1" 19 | ) 20 | 21 | var ( 22 | CRDFullName = CRDRessourcePlural + "." + CRDGroupName 23 | ) 24 | 25 | // GroupName is the group name used in this package. 26 | 27 | // SchemeGroupVersion is the group version used to register these objects. 28 | var SchemeGroupVersion = schema.GroupVersion{Group: CRDGroupName, Version: CRDVersion} 29 | 30 | // Resource takes an unqualified resource and returns a Group-qualified GroupResource. 31 | func Resource(resource string) schema.GroupResource { 32 | return SchemeGroupVersion.WithResource(resource).GroupResource() 33 | } 34 | 35 | // addKnownTypes adds the set of types defined in this package to the supplied scheme. 36 | func addKnownTypes(scheme *runtime.Scheme) error { 37 | scheme.AddKnownTypes(SchemeGroupVersion, 38 | &Kafkacluster{}, 39 | &KafkaclusterList{}, 40 | ) 41 | metav1.AddToGroupVersion(scheme, SchemeGroupVersion) 42 | return nil 43 | } 44 | -------------------------------------------------------------------------------- /spec/spec.go: -------------------------------------------------------------------------------- 1 | package spec 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | 7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 8 | "k8s.io/apimachinery/pkg/conversion" 9 | "k8s.io/apimachinery/pkg/runtime" 10 | "k8s.io/apimachinery/pkg/runtime/schema" 11 | ) 12 | 13 | //Main API Object 14 | type Kafkacluster struct { 15 | metav1.TypeMeta `json:",inline"` 16 | metav1.ObjectMeta `json:"metadata"` 17 | //Metadata metav1.ObjectMeta `json:"metadata"` 18 | 19 | Spec KafkaclusterSpec `json:"spec"` 20 | State KafkaclusterState `json:"state,omitempty"` 21 | Scale KafkaclusterScale `json:"scale,omitempty"` 22 | } 23 | 24 | // k8s API List Type 25 | type KafkaclusterList struct { 26 | metav1.TypeMeta `json:",inline"` 27 | metav1.ListMeta `json:"metadata"` 28 | //Metadata metav1.ListMeta `json:"metadata"` 29 | 30 | Items []Kafkacluster `json:"items"` 31 | } 32 | 33 | type KafkaclusterSpec struct { 34 | //Amount of Broker Nodes 35 | Image string `json:"image"` 36 | BrokerCount int32 `json:"brokerCount"` 37 | Resources ResourceSpec `json:"resources"` 38 | KafkaOptions KafkaOptions `json:"kafkaOptions"` 39 | JmxSidecar bool `json:"jmxSidecar"` 40 | Topics []KafkaTopicSpec `json:"topics"` 41 | ZookeeperConnect string `json:"zookeeperConnect"` 42 | NodeSelector map[string]string `json:"nodeSelector,omitempty"` 43 | StorageClass string `json:"storageClass"` //TODO use k8s type? 44 | 45 | // Toleration time if node is down/unreachable/not ready before moving to a new net 46 | // Set to 0 to disable moving to all together. 47 | MinimumGracePeriod int64 `json:"minimumGracePeriod"` 48 | 49 | LeaderImbalanceRatio float32 `json:"leaderImbalanceRatio"` 50 | LeaderImbalanceInterval int32 `json:"leaderImbalanceInterval"` 51 | } 52 | 53 | //KafkaclusterState Represent State field inside cluster, is used to do insert current state information. 54 | type KafkaclusterState struct { 55 | Status string `json:"status,omitempty"` 56 | Topics []string `json:"topics,omitempty"` 57 | Brokers []BrokerState `json:"brokers,omitempty"` 58 | } 59 | 60 | //BrokerState contains state about brokers 61 | type BrokerState struct { 62 | ID string `json:"id,omitempty"` 63 | State string `json:"state,omitempty"` 64 | } 65 | 66 | //KafkaclusterScale represent the `scale` field inside the crd 67 | type KafkaclusterScale struct { 68 | CurrentScale int32 `json:"currentScale,omitempty"` 69 | DesiredScale int32 `json:"desiredScale,omitempty"` 70 | } 71 | 72 | //TODO refactor to just use native k8s types 73 | type ResourceSpec struct { 74 | Memory string `json:"memory"` 75 | DiskSpace string `json:"diskSpace"` 76 | CPU string `json:"cpu"` 77 | } 78 | 79 | type KafkaBrokerSpec struct { 80 | BrokerID int32 `json:"brokerID"` 81 | 82 | ClientPort int32 `json:"clientPort"` 83 | Topics map[string]string `json:"topics"` 84 | } 85 | 86 | type KafkaTopicSpec struct { 87 | Name string `json:"name"` 88 | Partitions int32 `json:"partitions"` 89 | ReplicationFactor int32 `json:"replicationFactor"` 90 | } 91 | 92 | type KafkaclusterWatchEvent struct { 93 | Type string `json:"type"` 94 | Object Kafkacluster `json:"object"` 95 | OldObject Kafkacluster `json:"oldObject"` 96 | } 97 | 98 | type KafkaOption struct { 99 | Name string `json:"name"` 100 | Value string `json:"value"` 101 | Type interface{} 102 | } 103 | 104 | //Unsused 105 | type KafkaOptions struct { 106 | // Default: true 107 | AutoCreateTopicsEnable *bool `json:"autoCreateTopicsEnable,omitempty"` 108 | 109 | //Enables auto balancing of topic leaders. Done by a background thread. 110 | // Default: true 111 | AutoLeaderRebalanceEnable *bool `json:"autoLeaderRebalanceEnable,omitempty"` 112 | 113 | //Amount of threads for various background tasks 114 | // Default: 10 115 | BackgroundThreads *int32 `json:"backgroudThreads,omitempty"` 116 | 117 | //Default compression type for a topic. Can be "gzip", "snappy", "lz4" 118 | // Default: "gzip" 119 | CompressionType *string `json:"compressionType,omitempty"` 120 | 121 | // Enables delete topic. Delete topic through the admin tool will have no effect if this config is turned off 122 | // Default: false 123 | DeleteTopicEnable *bool `json:"deleteTopicEnable,omitempty"` 124 | 125 | //The frequency with which the partition rebalance check is triggered by the controller 126 | // Default:300 127 | LeaderImbalanceCheckIntervalSeconds *int32 `json:"leaderImbalanceCheckIntervalSeconds,omitempty"` 128 | 129 | // The ratio of leader imbalance allowed per broker. 130 | // The controller would trigger a leader balance if it goes above this value per broker. The value is specified in percentage. 131 | // Default: 10 132 | LeaderImbalanceBrokerPercentage *int32 `json:"leaderImbalanceBrokerPercentage,omitempty"` 133 | 134 | //The number of messages accumulated on a log partition before messages are flushed to disk 135 | // Default: 9223372036854775807 136 | LogFlushIntervalMessages *int64 `json:"logFlushIntervalMessages,omitempty"` 137 | 138 | // The maximum time in ms that a message in any topic is kept in memory before flushed to disk. 139 | // If not set, the value in log.flush.scheduler.interval.ms is used 140 | // Default: null 141 | LogFlushIntervalMs *int64 `json:"logFlushIntervalMs,omitempty"` 142 | 143 | //The frequency with which we update the persistent record of the last flush which acts as the log recovery point 144 | // Default: 60000 145 | LogFlushOffsetCheckpointIntervalMs *int32 `json:"logFlushOffsetCheckpointIntervalMs,omitempty"` 146 | 147 | //The frequency in ms that the log flusher checks whether any log needs to be flushed to disk 148 | // Default: 9223372036854775807 149 | LogFlushSchedulerIntervalMs *int64 `json:"LogFlushSchedulerIntervalMs,omitempty"` 150 | 151 | // The maximum size of the log before deleting it 152 | // Default: -1 153 | LogRetentionBytes *string `json:"logRetentionBytes,omitempty"` 154 | 155 | // The number of hours to keep a log file before deleting it (in hours), tertiary to log.retention.ms property 156 | // Default: 168 157 | LogRetentionHours *int32 `json:"logRetentionHours,omitempty"` 158 | 159 | //The maximum time before a new log segment is rolled out (in hours), secondary to log.roll.ms property 160 | // Default: 168 161 | LogRollHours *int32 `json:"logRollHours,omitempty"` 162 | 163 | // The maximum jitter to subtract from logRollTimeMillis (in hours), secondary to log.roll.jitter.ms property 164 | // Default: 0 165 | LogRollJitterHours *int32 `json:"logRollJitterHours,omitempty"` 166 | 167 | //The maximum size of a single log file 168 | // Default: 1073741824 169 | LogSegmentBytes *int32 `json:"logSegmentBytes,omitempty"` 170 | 171 | // The amount of time to wait before deleting a file from the filesystem 172 | // Default: 60000 173 | LogSegmentDeleteDelayMS *int64 `json:"logSegmentDeleteDelayMS,omitempty"` 174 | 175 | // The maximum size of message that the server can receive 176 | // Default: 1000012 177 | MessagesMaxBytes *int32 `json:"messagesMaxBytes,omitempty"` 178 | 179 | // When a producer sets acks to "all" (or "-1"), min.insync.replicas specifies the minimum number of replicas that must acknowledge 180 | // a write for the write to be considered successful. 181 | // Can be overwritten at topic level 182 | // Default: 1 183 | MinInsyncReplicas *int32 `json:"minInsyncReplicas,omitempty"` 184 | 185 | // The number of io threads that the server uses for carrying out network requests 186 | // Default: 8 187 | NumIOThreads *int32 `json:"numIOThreads,omitempty"` 188 | 189 | // The number of network threads that the server uses for handling network requests 190 | // Default: 3 191 | NumNetworkThreads *int32 `json:"numNetworkThreads,omitempty"` 192 | 193 | //The number of threads per data directory to be used for log recovery at startup and flushing at shutdown 194 | // Default: 1 195 | NumRecoveryThreadsPerDataDir *int32 `json:"numRecoveryThreadsPerDataDir,omitempty"` 196 | 197 | // Number of fetcher threads used to replicate messages from a source broker. 198 | // Increasing this value can increase the degree of I/O parallelism in the follower broker. 199 | // Default: 1 200 | NumReplicaFetchers *int32 `json:"numReplicaFetchers,omitempty"` 201 | 202 | // The maximum size for a metadata entry associated with an offset commit. 203 | // Default: 4096 204 | OffsetMetadataMaxBytes *int32 `json:"offsetMetadataMaxBytes,omitempty"` 205 | 206 | // The required acks before the commit can be accepted. In general, the default (-1) should not be overridden 207 | // Default: -1 208 | // Commented out because of dangerous option 209 | //OffsetCommitReadRequiredAcks int32 `json:"offsetCommitReadRequiredAcks"` 210 | 211 | //Offset commit will be delayed until all replicas for the offsets topic receive the commit or this timeout is reached. 212 | // This is similar to the producer request timeout. 213 | // Default: 5000 214 | OffsetCommitTimeoutMs *int32 `json:"offsetCommitTimeoutMs,omitempty"` 215 | 216 | // Batch size for reading from the offsets segments when loading offsets into the cache. 217 | // Default: 5242880 218 | OffsetLoadBufferSize *int32 `json:"offsetLoadBufferSize,omitempty"` 219 | 220 | // Frequency at which to check for stale offsets 221 | // Default: 600000 222 | OffsetRetentionCheckIntervalMs *int64 `json:"offsetRetentionCheckIntervalMs,omitempty"` 223 | 224 | // Log retention window in minutes for offsets topic 225 | // Default: 1440 226 | OffsetRetentionMinutes *int32 `json:"offsetRetentionMinutes,omitempty"` 227 | 228 | // Compression codec for the offsets topic - compression may be used to achieve "atomic" commits 229 | // Default: 0 230 | //Commented out, wrong doku? int fro compression??? 231 | //OffsetTopicCompressionCodec int32 `json:"offset_topic_compression_coded"` 232 | 233 | // The number of partitions for the offset commit topic (should not change after deployment) 234 | // Default: 50 235 | OffsetTopicNumPartitions *int32 `json:"offsetTopicNumPartitions,omitempty"` 236 | 237 | // The replication factor for the offsets topic (set higher to ensure availability). 238 | // To ensure that the effective replication factor of the offsets topic is the configured value, the number of alive brokers has to be at least the replication factor at the time of the first request for the offsets topic. 239 | // If not, either the offsets topic creation will fail or it will get a replication factor of min(alive brokers, configured replication factor) 240 | // Default: 3 241 | OffsetTopicReplicationFactor *int32 `json:"offsetTopicReplicationFactor,omitempty"` 242 | 243 | // The offsets topic segment bytes should be kept relatively small in order to facilitate faster log compaction and cache loads 244 | // Default: 104857600 245 | OffsetTopicSegmentsBytes *int32 `json:"offsetTopicSegmentsBytes,omitempty"` 246 | 247 | // The number of queued requests allowed before blocking the network threads 248 | // Default: 100 249 | QueuedMaxRequest *int32 `json:"queuedMaxRequest,omitempty"` 250 | 251 | // Minimum bytes expected for each fetch response. If not enough bytes, wait up to replicaMaxWaitTimeMs 252 | // Default: 1 253 | ReplicaFetchMinBytes *int32 `json:"replicaFetchMinBytes,omitempty"` 254 | 255 | //max wait time for each fetcher request issued by follower replicas. 256 | // This value should always be less than the replica.lag.time.max.ms at all times to prevent frequent shrinking of ISR for low throughput topics 257 | //Default: 500 258 | ReplicaFetchWaitMaxMs *int32 `json:"replicaFetchWaitMaxMs,omitempty"` 259 | 260 | //The frequency with which the high watermark is saved out to disk 261 | // Default: 5000 262 | ReplicaHighWatermarkCheckpointIntervalMs *int64 `json:"replicaHighWatermarkCheckpointIntervalMs,omitempty"` 263 | 264 | // If a follower hasn't sent any fetch requests or hasn't consumed up to the leaders log end offset for at least this time, 265 | // the leader will remove the follower from isr 266 | // Defaut: 10000 267 | ReplicaLagTimeMaxMs *int64 `json:"replicaLagTimeMaxMs,omitempty"` 268 | 269 | // The socket receive buffer for network requests 270 | // Default: 65536 271 | ReplicaSocketReceiveBufferBytes *int32 `json:"replicaSocketReceiveBufferBytes,omitempty"` 272 | 273 | // The socket timeout for network requests. Its value should be at least replica.fetch.wait.max.ms 274 | // Default: 30000 275 | ReplicaSocketTimeoutMs *int32 `json:"replicaSocketTimeoutMs,omitempty"` 276 | 277 | // The configuration controls the maximum amount of time the client will wait for the response of a request. 278 | // If the response is not received before the timeout elapses the client will resend the request if necessary 279 | // or fail the request if retries are exhausted. 280 | // Default: 30000 281 | RequestTimeoutMs *int32 `json:"requestTimeoutMs,omitempty"` 282 | 283 | //Socket Settings? TODO? needed 284 | 285 | // Indicates whether to enable replicas not in the ISR set to be elected as leader as a last resort, even though doing so may result in data loss 286 | // Default: true 287 | UncleanLeaderElectionEnable *bool `json:"uncleanLeaderElectionEnable,omitempty"` 288 | 289 | // The max time that the client waits to establish a connection to zookeeper. 290 | // If not set, the value in zookeeper.session.timeout.ms is used 291 | // Default: null 292 | ZookeeperConnectionTimeoutMs *int32 `json:"zookeeperConnectionTimeoutMs,omitempty"` 293 | 294 | // Zookeeper session timeout 295 | // Default: 6000 296 | ZookeeperSessionTimeoutMs *int32 `json:"zookeeperSessionTimeoutMs,omitempty"` 297 | } 298 | 299 | //ReassigmentConfig 300 | type KafkaReassignmentConfig struct { 301 | Partition []KafkaPartition `json:"partition"` 302 | Version string `json:"version"` 303 | } 304 | 305 | type KafkaPartition struct { 306 | Partition int32 `json:"partition"` 307 | Replicas []int32 `json:"replicas"` 308 | } 309 | 310 | type KafkaTopic struct { 311 | Topic string `json:"topic"` 312 | PartitionFactor int32 `json:"partition_factor"` 313 | ReplicationFactor int32 `json:"replication_factor"` 314 | Partitions []KafkaPartition `json:"partitions"` 315 | } 316 | 317 | //No json needed since internal Event type. 318 | type KafkaclusterEvent struct { 319 | Type KafkaEventType 320 | Cluster Kafkacluster 321 | OldCluster Kafkacluster 322 | } 323 | 324 | type KafkaEventType int32 325 | 326 | const ( 327 | //Event when a new CR Object is detected and a cluster needs to be created. 328 | NEW_CLUSTER KafkaEventType = iota + 1 329 | //Event when a deletion of a CR Object is detected. Deletion of that Cluster is initiatated 330 | DELETE_CLUSTER 331 | //Event when a CR Object is changed and currentReplicas < DesisredReplicas. Creating new node and trigger an rebalance. 332 | UPSIZE_CLUSTER 333 | //CR Object changed, current > desired. Initiating Rebalancing. 334 | DOWNSIZE_CLUSTER 335 | //Different Image of the Broker, Update Chain 336 | CHANGE_IMAGE 337 | //Different Broker Ressource, Rolling Update 338 | CHANGE_BROKER_RESOURCES 339 | 340 | CHANGE_NAME 341 | CHANGE_ZOOKEEPER_CONNECT 342 | //Different Broker Config, Rolling Update. 343 | BROKER_CONFIG_CHANGE 344 | UNKNOWN_CHANGE 345 | 346 | DOWNSIZE__EVENT 347 | //Cleanup event which get emmised after a Cluster Delete. 348 | //Its ensure the deletion of the Statefulset after it has been scaled down. 349 | CLEANUP_EVENT 350 | KAKFA_EVENT 351 | STATE_CHANGE 352 | SCALE_CHANGE 353 | ERROR_STATE 354 | ) 355 | 356 | type KafkaBrokerState string 357 | 358 | const ( 359 | EMPTY_BROKER KafkaBrokerState = "EMPTYING" 360 | REBALANCE_BROKER KafkaBrokerState = "REBALANCING" 361 | NORMAL_STATE KafkaBrokerState = "NORMAL" 362 | ) 363 | 364 | //convenience functions 365 | func PrintCluster(cluster *Kafkacluster) string { 366 | return fmt.Sprintf("%s/%s, APIVersion: %s, Kind: %s, Value: %#v", cluster.ObjectMeta.Namespace, cluster.ObjectMeta.Name, cluster.APIVersion, cluster.Kind, cluster) 367 | } 368 | 369 | // Required to satisfy Object interface 370 | func (e *Kafkacluster) GetObjectKind() schema.ObjectKind { 371 | return &e.TypeMeta 372 | } 373 | 374 | // Required to satisfy Object interface 375 | func (el *KafkaclusterList) GetObjectKind() schema.ObjectKind { 376 | 377 | return &el.TypeMeta 378 | } 379 | 380 | //Shamefull copied over from: https://github.com/kubernetes/client-go/blob/master/examples/third-party-resources/types.go 381 | // The code below is used only to work around a known problem with third-party 382 | // resources and ugorji. If/when these issues are resolved, the code below 383 | // should no longer be required. 384 | 385 | // type KafkaclusterListCopy KafkaclusterList 386 | // type KafkaclusterCopy Kafkacluster 387 | 388 | // func (e *Kafkacluster) UnmarshalJSON(data []byte) error { 389 | // tmp := KafkaclusterCopy{} 390 | // err := json.Unmarshal(data, &tmp) 391 | // if err != nil { 392 | // return err 393 | // } 394 | // tmp2 := Kafkacluster(tmp) 395 | // *e = tmp2 396 | // return nil 397 | // } 398 | 399 | // func (el *KafkaclusterList) UnmarshalJSON(data []byte) error { 400 | // tmp := KafkaclusterListCopy{} 401 | // err := json.Unmarshal(data, &tmp) 402 | // if err != nil { 403 | // return err 404 | // } 405 | // tmp2 := KafkaclusterList(tmp) 406 | // *el = tmp2 407 | // return nil 408 | // } 409 | 410 | // Deprecated: GetGeneratedDeepCopyFuncs returns the generated funcs, since we aren't registering them. 411 | func GetGeneratedDeepCopyFuncs() []conversion.GeneratedDeepCopyFunc { 412 | return []conversion.GeneratedDeepCopyFunc{ 413 | {Fn: func(in interface{}, out interface{}, c *conversion.Cloner) error { 414 | in.(*Kafkacluster).DeepCopyInto(out.(*Kafkacluster)) 415 | return nil 416 | }, InType: reflect.TypeOf(&Kafkacluster{})}, 417 | {Fn: func(in interface{}, out interface{}, c *conversion.Cloner) error { 418 | in.(*KafkaclusterList).DeepCopyInto(out.(*KafkaclusterList)) 419 | return nil 420 | }, InType: reflect.TypeOf(&KafkaclusterList{})}, 421 | {Fn: func(in interface{}, out interface{}, c *conversion.Cloner) error { 422 | in.(*KafkaclusterSpec).DeepCopyInto(out.(*KafkaclusterSpec)) 423 | return nil 424 | }, InType: reflect.TypeOf(&KafkaclusterSpec{})}, 425 | } 426 | } 427 | 428 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 429 | func (in *Kafkacluster) DeepCopyInto(out *Kafkacluster) { 430 | *out = *in 431 | out.TypeMeta = in.TypeMeta 432 | //in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) 433 | out.ObjectMeta = in.ObjectMeta 434 | 435 | out.Spec = in.Spec 436 | return 437 | } 438 | 439 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, creating a new Kafkacluster. 440 | func (x *Kafkacluster) DeepCopy() *Kafkacluster { 441 | if x == nil { 442 | return nil 443 | } 444 | out := new(Kafkacluster) 445 | x.DeepCopyInto(out) 446 | return out 447 | } 448 | 449 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 450 | func (x *Kafkacluster) DeepCopyObject() runtime.Object { 451 | if c := x.DeepCopy(); c != nil { 452 | return c 453 | } else { 454 | return nil 455 | } 456 | } 457 | 458 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 459 | func (in *KafkaclusterList) DeepCopyInto(out *KafkaclusterList) { 460 | *out = *in 461 | out.TypeMeta = in.TypeMeta 462 | out.ListMeta = in.ListMeta 463 | if in.Items != nil { 464 | in, out := &in.Items, &out.Items 465 | *out = make([]Kafkacluster, len(*in)) 466 | for i := range *in { 467 | (*in)[i].DeepCopyInto(&(*out)[i]) 468 | } 469 | } 470 | return 471 | } 472 | 473 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, creating a new KafkaclusterList. 474 | func (x *KafkaclusterList) DeepCopy() *KafkaclusterList { 475 | if x == nil { 476 | return nil 477 | } 478 | out := new(KafkaclusterList) 479 | x.DeepCopyInto(out) 480 | return out 481 | } 482 | 483 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 484 | func (x *KafkaclusterList) DeepCopyObject() runtime.Object { 485 | if c := x.DeepCopy(); c != nil { 486 | return c 487 | } else { 488 | return nil 489 | } 490 | } 491 | 492 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 493 | func (in *KafkaclusterSpec) DeepCopyInto(out *KafkaclusterSpec) { 494 | *out = *in 495 | return 496 | } 497 | 498 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, creating a new KafkaclusterSpec. 499 | func (x *KafkaclusterSpec) DeepCopy() *KafkaclusterSpec { 500 | if x == nil { 501 | return nil 502 | } 503 | out := new(KafkaclusterSpec) 504 | x.DeepCopyInto(out) 505 | return out 506 | } 507 | -------------------------------------------------------------------------------- /spec/spec_test.go: -------------------------------------------------------------------------------- 1 | package spec 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | "k8s.io/apimachinery/pkg/runtime" 6 | ) 7 | 8 | var _ runtime.Object = &Kafkacluster{} 9 | var _ metav1.ObjectMetaAccessor = &Kafkacluster{} 10 | 11 | var _ runtime.Object = &KafkaclusterList{} 12 | var _ metav1.ListMetaAccessor = &KafkaclusterList{} 13 | 14 | // func exampleFuzzerFuncs(t apitesting.TestingCommon) []interface{} { 15 | // return []interface{}{ 16 | // func(obj *KafkaclusterList, c fuzz.Continue) { 17 | // c.FuzzNoCustom(obj) 18 | // obj.Items = make([]Kafkacluster, c.Intn(10)) 19 | // for i := range obj.Items { 20 | // c.Fuzz(&obj.Items[i]) 21 | // } 22 | // }, 23 | // } 24 | // } 25 | 26 | // // TestRoundTrip tests that the third-party kinds can be marshaled and unmarshaled correctly to/from JSON 27 | // // without the loss of information. Moreover, deep copy is tested. 28 | // func TestRoundTrip(t *testing.T) { 29 | // scheme := runtime.NewScheme() 30 | // codecs := serializer.NewCodecFactory(scheme) 31 | 32 | // AddToScheme(scheme) 33 | 34 | // seed := rand.Int63() 35 | // fuzzerFuncs := fuzzer.MergeFuzzerFuncs(t, fuzzer.GenericFuzzerFuncs(t, codecs), exampleFuzzerFuncs(t)) 36 | // fuzzer := fuzzer.FuzzerFor(fuzzerFuncs, rand.NewSource(seed)) 37 | 38 | // roundtrip.RoundTripSpecificKindWithoutProtobuf(t, SchemeGroupVersion.WithKind("Kafkacluster"), scheme, codecs, fuzzer, nil) 39 | // roundtrip.RoundTripSpecificKindWithoutProtobuf(t, SchemeGroupVersion.WithKind("KafkaclusterList"), scheme, codecs, fuzzer, nil) 40 | // } 41 | -------------------------------------------------------------------------------- /util/util.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "github.com/krallistic/kafka-operator/spec" 5 | 6 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 7 | 8 | "k8s.io/api/core/v1" 9 | k8sclient "k8s.io/client-go/kubernetes" 10 | 11 | "strconv" 12 | 13 | log "github.com/Sirupsen/logrus" 14 | "k8s.io/apimachinery/pkg/api/errors" 15 | ) 16 | 17 | const ( 18 | 19 | //TODO move default Options to spec 20 | defaultCPU = "1" 21 | defaultDisk = "100G" 22 | defaultMemory = "4Gi" 23 | stateAnnotation = "kafka-cluster.incubator/state" 24 | ) 25 | 26 | var ( 27 | logger = log.WithFields(log.Fields{ 28 | "package": "util", 29 | }) 30 | ) 31 | 32 | type ClientUtil struct { 33 | KubernetesClient *k8sclient.Clientset 34 | MasterHost string 35 | DefaultOption metav1.GetOptions 36 | } 37 | 38 | func EnrichSpecWithLogger(logger *log.Entry, cluster spec.Kafkacluster) *log.Entry { 39 | return logger.WithFields(log.Fields{"clusterName": cluster.ObjectMeta.Name, "namespace": cluster.ObjectMeta.Name}) 40 | } 41 | 42 | func (c *ClientUtil) createLabels(cluster spec.Kafkacluster) map[string]string { 43 | labels := map[string]string{ 44 | "component": "kafka", 45 | "creator": "kafka-operator", 46 | "role": "data", 47 | "name": cluster.ObjectMeta.Name, 48 | } 49 | return labels 50 | } 51 | 52 | func (c *ClientUtil) CreateDirectBrokerService(cluster spec.Kafkacluster) error { 53 | methodLogger := logger.WithFields(log.Fields{ 54 | "method": "CreateDirectBrokerService", 55 | "name": cluster.ObjectMeta.Name, 56 | "namespace": cluster.ObjectMeta.Namespace, 57 | "brokerCount": cluster.Spec.BrokerCount, 58 | }) 59 | 60 | brokerCount := cluster.Spec.BrokerCount 61 | methodLogger.Info("Creating direkt broker SVCs") 62 | 63 | for i := 0; i < int(brokerCount); i++ { 64 | 65 | serviceName := cluster.ObjectMeta.Name + "-broker-" + strconv.Itoa(i) 66 | methodLogger.WithFields(log.Fields{ 67 | "id": i, 68 | "service_name": serviceName, 69 | }).Info("Creating Direct Broker SVC: ") 70 | 71 | svc, err := c.KubernetesClient.Core().Services(cluster.ObjectMeta.Namespace).Get(serviceName, c.DefaultOption) 72 | if err != nil { 73 | if !errors.IsNotFound(err) { 74 | methodLogger.WithFields(log.Fields{ 75 | "error": err, 76 | }).Error("Cant get Service INFO from API") 77 | return err 78 | } 79 | } 80 | if len(svc.Name) == 0 { 81 | //Service dosnt exist, creating 82 | 83 | labelSelectors := c.createLabels(cluster) 84 | labelSelectors["kafka_broker_id"] = strconv.Itoa(i) 85 | objectMeta := metav1.ObjectMeta{ 86 | Name: serviceName, 87 | Namespace: cluster.ObjectMeta.Namespace, 88 | Annotations: labelSelectors, 89 | } 90 | 91 | service := &v1.Service{ 92 | ObjectMeta: objectMeta, 93 | Spec: v1.ServiceSpec{ 94 | Type: v1.ServiceTypeNodePort, 95 | Selector: labelSelectors, 96 | Ports: []v1.ServicePort{ 97 | v1.ServicePort{ 98 | Name: "broker", 99 | Port: 9092, 100 | //NodePort: 30920, 101 | }, 102 | }, 103 | }, 104 | } 105 | _, err := c.KubernetesClient.Core().Services(cluster.ObjectMeta.Namespace).Create(service) 106 | if err != nil { 107 | methodLogger.WithFields(log.Fields{ 108 | "error": err, 109 | "service_name": serviceName, 110 | }).Error("Error while creating direct broker service") 111 | return err 112 | } 113 | methodLogger.WithFields(log.Fields{ 114 | "service": service, 115 | "service_name": serviceName, 116 | }).Debug("Created direct Access Service") 117 | } 118 | } 119 | return nil 120 | } 121 | 122 | func GetBrokerAdressess(cluster spec.Kafkacluster) []string { 123 | brokers := make([]string, cluster.Spec.BrokerCount) 124 | 125 | //TODO make governing domain config 126 | dnsSuffix := cluster.ObjectMeta.Name + "." + cluster.ObjectMeta.Namespace + ".svc.cluster.local" 127 | port := "9092" 128 | 129 | for i := 0; i < int(cluster.Spec.BrokerCount); i++ { 130 | hostName := cluster.ObjectMeta.Name + "-" + strconv.Itoa(i) 131 | brokers[i] = hostName + "." + dnsSuffix + ":" + port 132 | } 133 | 134 | log.WithFields(log.Fields{ 135 | "method": "GetBrokerAdressess", 136 | "cluster": cluster.ObjectMeta.Name, 137 | "broker": brokers, 138 | }).Info("Created Broker Adresses") 139 | return brokers 140 | } 141 | -------------------------------------------------------------------------------- /util/util_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | --------------------------------------------------------------------------------