├── .codeclimate.yml ├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── cmd └── server.go ├── diagram.png ├── doc ├── images │ └── http_probes.png └── metrics.md ├── examples ├── deploy.sh ├── deployment.yml ├── deployment_etcd.yml ├── pod.yml └── svc.yml ├── glide.lock ├── glide.yaml ├── helm-chart └── netchecker-server │ ├── .helmignore │ ├── Chart.yaml │ ├── templates │ ├── NOTES.txt │ ├── _helpers.tpl │ ├── pod.yaml │ ├── rbac-config.yaml │ └── service.yaml │ └── values.yaml ├── pkg ├── extensions │ ├── apis │ │ └── v1 │ │ │ ├── register.go │ │ │ └── types.go │ └── client │ │ ├── agent.go │ │ └── client.go └── utils │ ├── config.go │ ├── data.go │ ├── handler.go │ ├── handler_test.go │ ├── k8s.go │ ├── metrics.go │ ├── storer_etcd.go │ ├── storer_k8s.go │ ├── storer_types.go │ └── utils.go ├── scripts ├── build_image_server_or_agent.sh ├── docker_publish.sh ├── helm_install_and_deploy.sh ├── import_images.sh └── kubeadm_dind_cluster.sh └── test └── e2e ├── basic_suite_test.go ├── basic_test.go └── utils └── utils.go /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | engines: 2 | gofmt: 3 | enabled: true 4 | golint: 5 | enabled: true 6 | #checks: 7 | # GoLint/Comments/DocComments: 8 | # enabled: false 9 | # GoLint/Imports/ImportDot: 10 | # enabled: false 11 | govet: 12 | enabled: true 13 | shellcheck: 14 | enabled: true 15 | markdownlint: 16 | enabled: true 17 | fixme: 18 | enabled: true 19 | config: 20 | strings: 21 | - FIXME 22 | - BUG 23 | - TODO 24 | ratings: 25 | paths: 26 | - "**.go" 27 | - "**.sh" 28 | - "**.md" 29 | #exclude_paths: 30 | # - "dir/dir/file.txt" 31 | # - "dir/*" 32 | # - spec/**/* 33 | # - "**/vendor/**/*" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _output 2 | vendor 3 | 4 | # CI related 5 | .build-image.complete 6 | .env-prepare.complete 7 | scripts/dind-cluster-v* 8 | scripts/get_helm.sh 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | services: 3 | - docker 4 | language: go 5 | go: 6 | - 1.8.x 7 | install: 8 | - make get-deps 9 | script: 10 | - make test 11 | - make docker-publish -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Mirantis 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | FROM mkoppanen/etcdtool 16 | 17 | MAINTAINER Artem Roma 18 | 19 | COPY _output/server /usr/bin/netchecker-server 20 | 21 | ENTRYPOINT ["netchecker-server", "-logtostderr"] 22 | CMD ["-v=5", "-kubeproxyinit", "-endpoint=0.0.0.0:8081"] 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Mirantis 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | IMAGE_REPO_SERVER ?= mirantis/k8s-netchecker-server 17 | IMAGE_REPO_AGENT ?= mirantis/k8s-netchecker-agent 18 | HELM_SERVER_PATH ?= helm-chart/netchecker-server 19 | HELM_AGENT_PATH ?= helm-chart/netchecker-agent 20 | HELM_SCRIPT_NAME ?= get_helm.sh 21 | # repo for biuld agent docker image 22 | NETCHECKER_REPO ?= k8s-netchecker-agent 23 | DOCKER_BUILD ?= no 24 | 25 | BUILD_DIR = _output 26 | VENDOR_DIR = vendor 27 | ROOT_DIR = $(abspath $(dir $(lastword $(MAKEFILE_LIST)))) 28 | 29 | # kubeadm-dind-cluster supports k8s versions: 30 | # "v1.6", "v1.7" and "v1.8". 31 | DIND_CLUSTER_VERSION ?= v1.8 32 | 33 | VERSION=$(shell date +'%Y%m%d-%H:%M:%S-%Z') 34 | 35 | ENV_PREPARE_MARKER = .env-prepare.complete 36 | BUILD_IMAGE_MARKER = .build-image.complete 37 | 38 | 39 | ifeq ($(DOCKER_BUILD), yes) 40 | _DOCKER_GOPATH = /go 41 | _DOCKER_WORKDIR = $(_DOCKER_GOPATH)/src/github.com/Mirantis/k8s-netchecker-server/ 42 | _DOCKER_IMAGE = golang:1.8 43 | DOCKER_EXEC = docker run --rm -it -v "$(ROOT_DIR):$(_DOCKER_WORKDIR)" \ 44 | -w "$(_DOCKER_WORKDIR)" $(_DOCKER_IMAGE) 45 | else 46 | DOCKER_EXEC = 47 | endif 48 | 49 | 50 | .PHONY: help 51 | help: 52 | @echo "For containerized "make get-deps"" 53 | @echo "and "make test" export DOCKER_BUILD=yes" 54 | @echo "" 55 | @echo "Usage: 'make '" 56 | @echo "" 57 | @echo "Targets:" 58 | @echo "help - Print this message and exit" 59 | @echo "get-deps - Install project dependencies" 60 | @echo "build - Build k8s-netchecker-server binary" 61 | @echo "containerized-build - Build k8s-netchecker-server binary in container" 62 | @echo "build-image - Build docker image" 63 | @echo "test - Run all tests" 64 | @echo "unit - Run unit tests" 65 | @echo "e2e - Run e2e tests" 66 | @echo "docker-publish - Push images to Docker Hub registry" 67 | @echo "clean - Delete binaries" 68 | @echo "clean-k8s - Delete kubeadm-dind-cluster" 69 | @echo "clean-all - Delete binaries and vendor files" 70 | 71 | 72 | .PHONY: get-deps 73 | get-deps: $(VENDOR_DIR) 74 | 75 | 76 | .PHONY: build 77 | build: $(BUILD_DIR)/server 78 | 79 | 80 | .PHONY: containerized-build 81 | containerized-build: 82 | make build DOCKER_BUILD=yes 83 | 84 | 85 | .PHONY: build-image 86 | build-image: $(BUILD_IMAGE_MARKER) 87 | 88 | 89 | .PHONY: unit 90 | unit: 91 | $(DOCKER_EXEC) go test -v ./pkg/... 92 | 93 | 94 | .PHONY: e2e 95 | e2e: $(BUILD_DIR)/e2e.test $(ENV_PREPARE_MARKER) 96 | sudo $(BUILD_DIR)/e2e.test --master=http://localhost:8080 -ginkgo.v 97 | 98 | 99 | .PHONY: test 100 | test: unit e2e 101 | 102 | 103 | .PHONY: docker-publish 104 | docker-publish: 105 | IMAGE_REPO=$(IMAGE_REPO_SERVER) bash ./scripts/docker_publish.sh 106 | 107 | 108 | .PHONY: clean 109 | clean: 110 | rm -rf $(BUILD_DIR) 111 | 112 | 113 | .PHONY: clean-k8s 114 | clean-k8s: 115 | rm -f ./scripts/$(HELM_SCRIPT_NAME) 116 | rm -rf $(HOME)/.helm 117 | bash ./scripts/dind-cluster-$(DIND_CLUSTER_VERSION).sh clean 118 | rm -f ./scripts/dind-cluster-$(DIND_CLUSTER_VERSION).sh 119 | rm -rf $(HOME)/.kubeadm-dind-cluster 120 | rm -rf $(HOME)/.kube 121 | rm -f $(ENV_PREPARE_MARKER) 122 | 123 | 124 | .PHONY: clean-all 125 | clean-all: clean clean-k8s 126 | rm -rf $(VENDOR_DIR) 127 | docker rmi -f $(IMAGE_REPO_SERVER) 128 | docker rmi -f $(IMAGE_REPO_AGENT) 129 | rm -f $(BUILD_IMAGE_MARKER) 130 | 131 | 132 | $(BUILD_DIR): 133 | mkdir -p $(BUILD_DIR) 134 | 135 | 136 | $(VENDOR_DIR): 137 | $(DOCKER_EXEC) sh -xc 'go get github.com/Masterminds/glide && \ 138 | glide install --strip-vendor; \ 139 | chown $(shell id -u):$(shell id -g) -R $(VENDOR_DIR)' 140 | 141 | 142 | $(BUILD_DIR)/server: $(BUILD_DIR) $(VENDOR_DIR) 143 | $(DOCKER_EXEC) sh -xc '\ 144 | CGO_ENABLED=0 go build --ldflags "-s -w -X main.version=${VERSION}" \ 145 | -x -o $@ ./cmd/server.go; \ 146 | chown $(shell id -u):$(shell id -g) -R $(BUILD_DIR)' 147 | 148 | 149 | 150 | $(BUILD_DIR)/e2e.test: $(BUILD_DIR) $(VENDOR_DIR) 151 | $(DOCKER_EXEC) bash -xc '\ 152 | go test -c -o $@ ./test/e2e/' 153 | 154 | 155 | $(BUILD_IMAGE_MARKER): $(BUILD_DIR)/server 156 | docker build -t $(IMAGE_REPO_SERVER) . 157 | touch $(BUILD_IMAGE_MARKER) 158 | 159 | 160 | $(ENV_PREPARE_MARKER): build-image 161 | NETCHECKER_REPO=$(NETCHECKER_REPO) bash ./scripts/build_image_server_or_agent.sh 162 | bash ./scripts/kubeadm_dind_cluster.sh 163 | IMAGE_REPO_SERVER=$(IMAGE_REPO_SERVER) IMAGE_REPO_AGENT=$(IMAGE_REPO_AGENT) bash ./scripts/import_images.sh 164 | # NETCHECKER_REPO=$(NETCHECKER_REPO) bash ./scripts/helm_install_and_deploy.sh 165 | touch $(ENV_PREPARE_MARKER) 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Status 2 | 3 | [![Build Status](https://goo.gl/XzSwDu)](https://goo.gl/bx20uy) 4 | [![Stories in Progress](https://goo.gl/Y3SfPH)](https://goo.gl/eY1d9l) 5 | [![Go Report Card](https://goo.gl/EN7y2i)](https://goo.gl/ultF3D) 6 | [![Code Climate](https://goo.gl/F5iNWP)](https://goo.gl/mGsQj1) 7 | [![License Apache 2.0](https://goo.gl/joRzTI)](https://goo.gl/pbOuG0) 8 | [![Docker Pulls](https://goo.gl/ZYz1nt)](https://goo.gl/nAfD9C) 9 | 10 | ## What it is and how it works 11 | 12 | ![Diagram](diagram.png) 13 | 14 | Network checker is a Kubernetes application. Its main purpose is checking 15 | of connectivity between the cluster's nodes. Network checker consists of two 16 | parts: server (this repository) and agent 17 | ([developed here](https://github.com/Mirantis/k8s-netchecker-agent)). Agents 18 | are deployed on every Kubernetes node using 19 | [Daemonset](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/). 20 | Agents come in two flavors - and default setup includes two corresponding 21 | daemonsets. The difference between them is that "Agent-hostnet" is tapped into 22 | host network namespace via supplying `hostNetwork: True` key-value for the 23 | corresponding Pod's specification. As shown on the diagram, both daemonsets 24 | are enabled for each node meaning exactly one pod of each kind will be deployed 25 | on each node. 26 | 27 | The agents periodically gather network related information 28 | (e.g. interfaces' info, results of nslookup, results of latencies measurement, 29 | etc.) and send it to the server as periodic agent reports. 30 | Report includes agent pod name and its node name so that the report is uniquely 31 | identified using them. 32 | 33 | The server is deployed in a dedicated pod using 34 | [Deployment](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/) 35 | and exposed inside of the cluster via Kubernetes service resource. Thus, every 36 | agent can access the server by the service's DNS name. 37 | 38 | Server processes the incoming agent data (agents' reports) and store it in a 39 | persistent data storage. Server is capable to use either Kubernetes third party 40 | resources (TPR) or etcd as a persistent data storage: 41 | 42 | - TPR. New data type called `agent` was added into TPR, Kubernetes API was extended 43 | with this new type, and all agent data is stored using it. When using TPR, 44 | the server is vulnerable to [date change issue](https://github.com/Mirantis/k8s-netchecker-server/issues/80). 45 | The issue was solved by using etcd and its TTL feature. Please also note 46 | that TPR is deprecated starting from Kubernetes v.1.7 and can be removed in 47 | future Kubrenetes versions. It will not be supported in Netchecker then. No 48 | migration to Kubernetes CRD (substitution for TPR) is planned either. 49 | - etcd. The recommended storage provider. When using etcd, the server is resistant 50 | to issues described in TPR section. Agent data is stored in etcd in this case, 51 | under `/netchecker` path. 52 | 53 | Server also calculates metrics based on agent data. Metrics data is stored in 54 | server's memory for now - this implicates loss of metrics data when server 55 | application is shutdown or restarted; it is going to be reworked by moving to 56 | a persistent storage (to etcd only) in future. 57 | 58 | Server provides HTTP RESTful interface which currently includes the following 59 | requests (verb - URI designator - meaning of the operation): 60 | 61 | - GET/POST - /api/v1/agents/{agent_name} - get, create/update agent's data record 62 | in a persistant storage. 63 | - GET - /api/v1/agents/ - get the whole agent data dump. 64 | - GET - /api/v1/connectivity_check - get result of connectivity check between 65 | the server and the agents. 66 | - GET - /metrics - get the network checker metrics. 67 | 68 | The main logic of network checking is implemented behind `connectivity_check` 69 | endpoint. It is the only user-facing URI. 70 | In order to determine whether connectivity is present between the server and 71 | agents, former retrieves the list of pods using Kubernetes API 72 | (filtering by labels `netchecker-agent` and `netchecker-agent-hostnet`), then 73 | analyses stored agent data. 74 | Success of the checking is determined based on two criteria. 75 | First - there is an entry in the stored data for the each retrieved agent's pod; 76 | it means an agent request has got through the network to the server. Consequently, 77 | link is established and active within the agent-server pair. 78 | Second - difference between the time of the check and the time when the data 79 | was received from particular agent must not exceed two periods of agent's 80 | reporting (there is a field in the payload holding the report interval). In 81 | opposite case, it will indicate that connection is lost and requests are not 82 | coming through. In case of using etcd, period of agent's data obsolescence is 83 | set explicitly in parameters to the server (`-report-ttl` parameter, in seconds). 84 | Let us remember that each agent corresponds to one particular pod, unique for 85 | particular node, so connection between agents and server means connection 86 | between the corresponding nodes. 87 | 88 | Results of the connectivity check which are represented in response from the 89 | endpoint particularly indicate possible connectivity issue (e.g. there is an 90 | `Absent` field listing agents which haven't reported at all and `Outdated` one 91 | listing those which reports are out of data obsolescence period). 92 | 93 | One aspect of functioning of network checker is worth mentioning. Payloads sent 94 | by the agents are of relatively small byte size which in some cases can be less 95 | than MTU value set for the cluster's network links. When this happens, the 96 | network checker will not catch problems with network packet's fragmentation. 97 | For that reason, special option can be used with the agent application - 98 | `-zeroextenderlength`. By default, it has value of 1500. The parameter tells 99 | the agent to extend each payload by given length to exceed packet fragmentation 100 | trigger threshold. This dummy data has no effect on the server's processing 101 | of the agent's requests (reports). 102 | 103 | ## Usage 104 | 105 | To start the server inside Kubernetes pod using Kubernetes TPR as a persistent storage 106 | and listen on port 8081, use the following command: 107 | 108 | ```bash 109 | server -v 5 -logtostderr -kubeproxyinit -endpoint 0.0.0.0:8081 110 | ``` 111 | 112 | To start the server using etcd as a persistent storage, use the following setting: 113 | 114 | ``` 115 | -kubeproxyinit=false 116 | ``` 117 | 118 | Also, a few parameters are required to establish the connection with etcd: 119 | 120 | ``` 121 | -etcd-endpoints=https://192.0.10.11:4001,https://192.0.10.12:4001 122 | -etcd-key=/var/lib/etcd/client.key (optional, ommited when using http) 123 | -etcd-cert=/var/lib/etcd/client.pem (optional, ommited when using http) 124 | -etcd-ca=/var/lib/etcd/ca.pem (optional, can be ommited even when using https) 125 | ``` 126 | 127 | For other possibilities regarding testing, code and Docker images building etc. 128 | please refer to the Makefile. 129 | 130 | ## Deployment in Kubernetes cluster 131 | 132 | In order to deploy the application, two options can be used. 133 | 134 | First - using `./examples/deploy.sh` script. Users must provide all the needed 135 | environment variables (e.g. name and tag for Docker images) before running the 136 | script. 137 | 138 | Second - deploy as a helm chart. If users have 139 | [Helm](https://github.com/kubernetes/helm) installed on their Kubernetes cluster 140 | they can build the chart from its description (`./helm-chart/`) and then deploy 141 | it (please, use Helm's documentation for details). 142 | 143 | ## Additional documentation 144 | 145 | - [Metrics](doc/metrics.md) - metrics and Prometheus configuration how to. 146 | 147 | -------------------------------------------------------------------------------- /cmd/server.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "flag" 19 | "net/http" 20 | "time" 21 | 22 | "github.com/Mirantis/k8s-netchecker-server/pkg/utils" 23 | "github.com/golang/glog" 24 | ) 25 | 26 | var version string 27 | 28 | func main() { 29 | var ( 30 | repTTL int 31 | pingTimeout int 32 | checkInterval int 33 | ) 34 | 35 | config := utils.GetOrCreateConfig() 36 | 37 | flag.StringVar(&config.HttpListen, "endpoint", "0.0.0.0:8081", "Endpoint (IP address, port) for server to listen on") 38 | flag.BoolVar(&config.UseKubeClient, "kubeproxyinit", false, "use k8s TPR (true) or Etcd (false) as a data storage") 39 | flag.IntVar(&repTTL, "report-ttl", 300, "TTL for agents reports data stored in Etcd (sec)") 40 | flag.IntVar(&pingTimeout, "ping-timeout", 5, "Etcd server ping timeout (sec)") 41 | flag.StringVar(&config.EtcdEndpoints, "etcd-endpoints", "", "Etcd server endpoints list") 42 | flag.StringVar(&config.EtcdTree, "etcd-tree", "netchecker", "Root of Etcd tree") 43 | flag.StringVar(&config.EtcdKeyFile, "etcd-key", "", "SSL key file when using HTTPS to connect to etcd") 44 | flag.StringVar(&config.EtcdCertFile, "etcd-cert", "", "SSL certificate file when using HTTPS to connect to etcd") 45 | flag.StringVar(&config.EtcdCAFile, "etcd-ca", "", "SSL CA file when using HTTPS to connect to etcd") 46 | flag.IntVar(&checkInterval, "check-interval", 10, "Interval of checking that agents data is up-to-date (sec)") 47 | flag.Parse() 48 | glog.Infof("K8s netchecker. Compiled at: %s", version) 49 | 50 | config.ReportTTL = time.Duration(repTTL) * time.Second 51 | config.PingTimeout = time.Duration(pingTimeout) * time.Second 52 | config.CheckInterval = time.Duration(checkInterval) * time.Second 53 | 54 | glog.V(5).Infof("Start listening on %v", config.HttpListen) 55 | 56 | handler, err := utils.NewHandler(config.UseKubeClient) 57 | if err != nil { 58 | glog.Errorf("Error while setting up the handler. Details: %v", err) 59 | panic(err.Error()) 60 | } 61 | 62 | go handler.CollectAgentsMetrics(config.CheckInterval, config.UseKubeClient) 63 | glog.Fatal(http.ListenAndServe(config.HttpListen, handler.HTTPHandler)) 64 | } 65 | -------------------------------------------------------------------------------- /diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mirantis/k8s-netchecker-server/653293a6c7a0176b99803ee2edb876009f248e74/diagram.png -------------------------------------------------------------------------------- /doc/images/http_probes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mirantis/k8s-netchecker-server/653293a6c7a0176b99803ee2edb876009f248e74/doc/images/http_probes.png -------------------------------------------------------------------------------- /doc/metrics.md: -------------------------------------------------------------------------------- 1 | # Using metrics 2 | 3 | ## Available metrics 4 | 5 | ### Basic metrics 6 | 7 | * `go_*` - a set of default Go metrics provided by Prometheus library 8 | * `process_*` - a set of default Process metrics provided by Prometheus library 9 | * `ncagent_report_count_total` (label `agent`) - Counter. Number of total 10 | reports from every agent (agents separated by label). 11 | * `ncagent_error_count_total` (label `agent`) - Counter. Number of total errors 12 | from every agent (agents separated by label). This counter is incremented 13 | when agent does not report within `reporting_interval * 2` timeframe. 14 | 15 | ### HTTP probes metrics 16 | 17 | ![HTTP probe times](images/http_probes.png) 18 | 19 | * `http_probe_connection_result` - Gauge. Connection result: 0 - error, 20 | 1 - success. 21 | * `http_probe_code` - Gauge. HTTP status code, 0 if no HTTP response. 22 | * `http_probe_total_time_ms` - Gauge. Total duration of http transaction. 23 | * `http_probe_content_transfer_time_ms` - Gauge. The duration of content 24 | transfer from the first response byte till the end (in ms). 25 | * `http_probe_tcp_connection_time_ms` - Gauge. TCP establishing time 26 | (in ms). 27 | * `http_probe_dns_lookup_time_ms` - Gauge. DNS lookup time (in ms). 28 | * `http_probe_connect_time_ms` - Gauge. Connection time in ms. 29 | * `http_probe_server_processing_time_ms` - Gauge. Server processing time 30 | (in ms). 31 | 32 | ## Prometheus configuration example 33 | 34 | ### Scrape config 35 | 36 | No additional configuration is needed if Prometheus has the following 37 | configuration for PODs metrics autodiscovery: 38 | 39 | ``` 40 | # Scrape config for service endpoints. 41 | # 42 | # The relabeling allows the actual service scrape endpoint to be configured 43 | # via the following annotations: 44 | # 45 | # * `prometheus.io/scrape`: Only scrape services that have a value of `true` 46 | # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need 47 | # to set this to `https` & most likely set the `tls_config` of the scrape config. 48 | # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. 49 | # * `prometheus.io/port`: If the metrics are exposed on a different port to the 50 | # service then set this appropriately. 51 | - job_name: 'kubernetes-service-endpoints' 52 | 53 | kubernetes_sd_configs: 54 | - role: endpoints 55 | 56 | relabel_configs: 57 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] 58 | action: keep 59 | regex: true 60 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] 61 | action: replace 62 | target_label: __scheme__ 63 | regex: (https?) 64 | - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] 65 | action: replace 66 | target_label: __metrics_path__ 67 | regex: (.+) 68 | - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] 69 | action: replace 70 | target_label: __address__ 71 | regex: (.+)(?::\d+)?;(\d+) 72 | replacement: $1:$2 73 | - action: labelmap 74 | regex: __meta_kubernetes_service_label_(.+) 75 | - source_labels: [__meta_kubernetes_namespace] 76 | action: replace 77 | target_label: kubernetes_namespace 78 | - source_labels: [__meta_kubernetes_service_name] 79 | action: replace 80 | target_label: kubernetes_name 81 | ``` 82 | 83 | The only thing which is needed in order to enable metrics gathering from 84 | Netchecker Server is proper labeling: 85 | 86 | ``` 87 | kubectl annotate pods --selector='app==netchecker-server' \ 88 | prometheus.io/scrape=true prometheus.io/port=8081 --overwrite 89 | ``` 90 | 91 | ### Alert rules configuration 92 | 93 | * Monitoring **ncagent_error_count_total** - in this example we're firing alert 94 | when number for errors for the last hour becomes greater than 10: 95 | 96 | ``` 97 | ALERT NetCheckerAgentErrors 98 | IF absent(ncagent_error_count_total) OR 99 | increase(ncagent_error_count_total[1h]) > 10 100 | LABELS { 101 | service = "netchecker", 102 | severity = "warning" 103 | } 104 | ANNOTATIONS { 105 | summary = "A high number of errors in Netchecker is happening", 106 | description = "{{ $value }} errors have been registered within the last hour 107 | for Netchecker Agent {{ $labels.instance }}" 108 | } 109 | ``` 110 | 111 | * Monitoring **ncagent_report_count_total** - in this example we're checking that 112 | Netchecker Server is actually alive (not hanging or glitched). In order to do 113 | so we just need to check that report counter is increasing as expected. 114 | Report interval is 15s, so we should see at least 15 reports per 5m (ideally 115 | 20, but due to network delays we may get less than ideal amount of reports). 116 | 117 | ``` 118 | ALERT NetCheckerReportsMissing 119 | IF absent(ncagent_report_count_total) OR 120 | increase(ncagent_report_count_total[5m]) < 15 121 | LABELS { 122 | service = "netchecker", 123 | severity = "warning" 124 | } 125 | ANNOTATIONS { 126 | summary = "The number of agent reports is lower than expected", 127 | description = "Netchecker Agent {{ $labels.instance }} has reported only 128 | {{ $value }} times for the last 5 minutes", 129 | } 130 | ``` 131 | 132 | * Example of monitoring alert based on **http_probe_tcp_connection_time_ms**. 133 | Let's monitor TCP connection time from Netchecker agents to Netchecker 134 | server in this example and raise an alert if it increases by 100 ms. 135 | 136 | ``` 137 | ALERT NetCheckerTCPServerDelay 138 | IF absent(ncagent_http_probe_tcp_connection_time_ms) OR 139 | delta(ncagent_http_probe_tcp_connection_time_ms{ 140 | url="http://netchecker-service:8081/api/v1/ping"}[5m]) > 100 141 | LABELS { 142 | service = "netchecker", 143 | severity = "warning" 144 | } 145 | ANNOTATIONS { 146 | summary = "TCP connection to Netchecker server takes too much time", 147 | description = "Netchecker Agent {{ $labels.instance }} TCP connection time 148 | to Netchecker server has increased by {{ $value }} within the last 5 149 | minutes", 150 | } 151 | ``` 152 | 153 | * Example of alert rule to monitor DNS lookup time based on 154 | **ncagent_http_probe_dns_lookup_time_ms** metric. 155 | 156 | ``` 157 | ALERT NetCheckerDNSSlow 158 | IF absent(ncagent_http_probe_dns_lookup_time_ms) OR 159 | delta(ncagent_http_probe_dns_lookup_time_ms[5m]) > 300 160 | LABELS { 161 | service = "netchecker", 162 | severity = "warning" 163 | } 164 | ANNOTATIONS { 165 | summary = "DNS lookup time is too high", 166 | description = "DNS lookup time on Netchecker Agent {{ $labels.instance }} 167 | has increased by {{ $value }} within the last 5 minutes", 168 | } 169 | ``` 170 | -------------------------------------------------------------------------------- /examples/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 Mirantis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | # env: NS - a namespace name (also as $1) 17 | # env: KUBE_DIR - manifests directory, e.g. /etc/kubernetes 18 | # env: KUBE_USER - a user to own the manifests directory 19 | # env: NODE_PORT - a node port for the server app to listen on 20 | # env: PURGE - if true, will only erase applications 21 | # env: AGENT_REPORT_INTERVAL - an interval for agents to report 22 | 23 | set -o xtrace 24 | set -o pipefail 25 | set -o errexit 26 | set -o nounset 27 | 28 | 29 | NS=${NS:-default} 30 | REAL_NS="--namespace=${1:-$NS}" 31 | KUBE_DIR=${KUBE_DIR:-.} 32 | KUBE_USER=${KUBE_USER:-} 33 | NODE_PORT=${NODE_PORT:-31081} 34 | PURGE=${PURGE:-false} 35 | SERVER_IMAGE_NAME=${SERVER_IMAGE_NAME:-mirantis/k8s-netchecker-server} 36 | AGENT_IMAGE_NAME=${AGENT_IMAGE_NAME:-mirantis/k8s-netchecker-agent} 37 | IMAGE_TAG=${IMAGE_TAG:-stable} 38 | SERVER_IMAGE_TAG=${SERVER_IMAGE_TAG:-$IMAGE_TAG} 39 | AGENT_IMAGE_TAG=${AGENT_IMAGE_TAG:-$IMAGE_TAG} 40 | SERVER_PORT=${SERVER_PORT:-8081} 41 | 42 | if [ -z ${USE_ETCD_ENDPOINT} ] ; then 43 | # use 3rd party resources (TPR) API to store agent reports 44 | SERVER_ENV_TAIL="-kubeproxyinit" 45 | else 46 | # use ETCD to store agent reports 47 | ETCD_ENDPOINT=${ETCD_ENDPOINT:-"https://localhost:2379"} 48 | EEPS=$(etcdctl --endpoints=${ETCD_ENDPOINT} member list | awk '{print $4}' | awk -F'=' '{print $2}' | paste -sd "," -) 49 | SERVER_ENV_TAIL="-etcd-endpoints=${EEPS}" 50 | fi 51 | 52 | 53 | if [ "${KUBE_DIR}" != "." ] && [ -n "${KUBE_USER}" ]; then 54 | mkdir -p "${KUBE_DIR}" 55 | fi 56 | 57 | # check there are nodes in the cluster 58 | kubectl get nodes 59 | 60 | echo "Deploying netchecker server and agents" 61 | cat << EOF > "${KUBE_DIR}"/netchecker-server-dep.yml 62 | apiVersion: apps/v1beta1 63 | kind: Deployment 64 | metadata: 65 | name: netchecker-server 66 | spec: 67 | replicas: 1 68 | template: 69 | metadata: 70 | annotations: 71 | prometheus.io/scrape: "true" 72 | prometheus.io/port: "${SERVER_PORT}" 73 | name: netchecker-server 74 | labels: 75 | app: netchecker-server 76 | spec: 77 | containers: 78 | - name: netchecker-server 79 | image: ${SERVER_IMAGE_NAME}:${SERVER_IMAGE_TAG} 80 | imagePullPolicy: IfNotPresent 81 | ports: 82 | - containerPort: ${SERVER_PORT} 83 | args: 84 | - "-v=5" 85 | - "-logtostderr" 86 | - "-endpoint=0.0.0.0:${SERVER_PORT}" 87 | - "${SERVER_ENV_TAIL}" 88 | EOF 89 | 90 | cat << EOF > "${KUBE_DIR}"/netchecker-server-svc.yml 91 | apiVersion: v1 92 | kind: "Service" 93 | metadata: 94 | name: netchecker-service 95 | spec: 96 | selector: 97 | app: netchecker-server 98 | ports: 99 | - 100 | protocol: TCP 101 | port: ${SERVER_PORT} 102 | targetPort: ${SERVER_PORT} 103 | nodePort: ${NODE_PORT} 104 | type: NodePort 105 | EOF 106 | 107 | cat << EOF > "${KUBE_DIR}"/netchecker-agent-ds.yml 108 | apiVersion: extensions/v1beta1 109 | kind: DaemonSet 110 | metadata: 111 | labels: 112 | app: netchecker-agent 113 | name: netchecker-agent 114 | spec: 115 | updateStrategy: 116 | type: RollingUpdate 117 | template: 118 | metadata: 119 | name: netchecker-agent 120 | labels: 121 | app: netchecker-agent 122 | spec: 123 | containers: 124 | - name: netchecker-agent 125 | image: ${AGENT_IMAGE_NAME}:${AGENT_IMAGE_TAG} 126 | env: 127 | - name: MY_NODE_NAME 128 | valueFrom: 129 | fieldRef: 130 | fieldPath: spec.nodeName 131 | - name: MY_POD_NAME 132 | valueFrom: 133 | fieldRef: 134 | fieldPath: metadata.name 135 | args: 136 | - "-v=5" 137 | - "-logtostderr" 138 | - "-serverendpoint=netchecker-service:${SERVER_PORT}" 139 | - "-reportinterval=60" 140 | imagePullPolicy: IfNotPresent 141 | EOF 142 | 143 | cat << EOF > "${KUBE_DIR}"/netchecker-agent-hostnet-ds.yml 144 | apiVersion: extensions/v1beta1 145 | kind: DaemonSet 146 | metadata: 147 | labels: 148 | app: netchecker-agent-hostnet 149 | name: netchecker-agent-hostnet 150 | spec: 151 | updateStrategy: 152 | type: RollingUpdate 153 | template: 154 | metadata: 155 | name: netchecker-agent-hostnet 156 | labels: 157 | app: netchecker-agent-hostnet 158 | spec: 159 | hostNetwork: True 160 | containers: 161 | - name: netchecker-agent 162 | image: ${AGENT_IMAGE_NAME}:${AGENT_IMAGE_TAG} 163 | env: 164 | - name: MY_NODE_NAME 165 | valueFrom: 166 | fieldRef: 167 | fieldPath: spec.nodeName 168 | - name: MY_POD_NAME 169 | valueFrom: 170 | fieldRef: 171 | fieldPath: metadata.name 172 | args: 173 | - "-v=5" 174 | - "-logtostderr" 175 | - "-serverendpoint=netchecker-service:${SERVER_PORT}" 176 | - "-reportinterval=60" 177 | imagePullPolicy: IfNotPresent 178 | EOF 179 | 180 | if [ "${KUBE_DIR}" != "." ] && [ -n "${KUBE_USER}" ]; then 181 | chown -R "${KUBE_USER}":"${KUBE_DIR}" 182 | fi 183 | 184 | kubectl delete --grace-period=1 -f "${KUBE_DIR}"/netchecker-agent-ds.yml "${REAL_NS}" || true 185 | kubectl delete --grace-period=1 -f "${KUBE_DIR}"/netchecker-agent-hostnet-ds.yml "${REAL_NS}" || true 186 | kubectl delete --grace-period=1 -f "${KUBE_DIR}"/netchecker-server-svc.yml "${REAL_NS}" || true 187 | (kubectl delete --grace-period=1 -f "${KUBE_DIR}"/netchecker-server-dep.yml "${REAL_NS}" && sleep 10) || true 188 | 189 | if [ "${PURGE}" != "true" ]; then 190 | kubectl create -f "${KUBE_DIR}"/netchecker-server-dep.yml "${REAL_NS}" 191 | kubectl create -f "${KUBE_DIR}"/netchecker-server-svc.yml "${REAL_NS}" 192 | kubectl create -f "${KUBE_DIR}"/netchecker-agent-ds.yml "${REAL_NS}" 193 | kubectl create -f "${KUBE_DIR}"/netchecker-agent-hostnet-ds.yml "${REAL_NS}" 194 | fi 195 | 196 | set +o xtrace 197 | echo "DONE" 198 | 199 | if [ "${PURGE}" != "true" ]; then 200 | echo "Use the following commands to " 201 | echo "- get latest agents reports:" 202 | echo " curl -s -X GET 'http://localhost:${NODE_PORT}/api/v1/agents/' | python -mjson.tool" 203 | echo "- check connectivity with agents:" 204 | echo " curl -X GET 'http://localhost:${NODE_PORT}/api/v1/connectivity_check'" 205 | echo "- get agents metrics:" 206 | echo " curl -X GET 'http://localhost:${NODE_PORT}/metrics'" 207 | fi 208 | -------------------------------------------------------------------------------- /examples/deployment.yml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: netchecker-server 5 | spec: 6 | replicas: 1 7 | template: 8 | metadata: 9 | annotations: 10 | prometheus.io/scrape: "true" 11 | prometheus.io/port: "8081" 12 | name: netchecker-server 13 | labels: 14 | app: netchecker-server 15 | namespace: default 16 | spec: 17 | containers: 18 | - name: netchecker-server 19 | image: mirantis/k8s-netchecker-server:stable 20 | imagePullPolicy: IfNotPresent 21 | ports: 22 | - containerPort: 8081 23 | args: 24 | - "-v=5" 25 | - "-logtostderr" 26 | - "-kubeproxyinit" 27 | - "-endpoint=0.0.0.0:8081" 28 | -------------------------------------------------------------------------------- /examples/deployment_etcd.yml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: netchecker-server 5 | spec: 6 | replicas: 1 7 | template: 8 | metadata: 9 | annotations: 10 | prometheus.io/scrape: "true" 11 | prometheus.io/port: "8081" 12 | name: netchecker-server 13 | labels: 14 | app: netchecker-server 15 | namespace: default 16 | spec: 17 | containers: 18 | - name: netchecker-server 19 | image: mirantis/k8s-netchecker-server:stable 20 | imagePullPolicy: IfNotPresent 21 | ports: 22 | - containerPort: 8081 23 | args: 24 | - "-v=5" 25 | - "-logtostderr" 26 | - "-kubeproxyinit=false" 27 | - "-endpoint=0.0.0.0:8081" 28 | - "-report-ttl=180" 29 | - "-etcd-endpoints=https://172.0.10.2:4009,https://172.0.10.3:4009" 30 | - "-etcd-key=/var/lib/etcd/etcd-client.key" 31 | - "-etcd-cert=/var/lib/etcd/etcd-client.pem" 32 | - "-etcd-ca=/var/lib/etcd/ca.pem" 33 | # optional. to provide etcd certs data if it is in the host file system 34 | volumeMounts: 35 | - mountPath: /var/lib/etcd/ 36 | name: etcd-certs 37 | readOnly: true 38 | # optional. to provide etcd certs data if it is in the host file system 39 | volumes: 40 | - hostPath: 41 | path: /var/lib/etcd 42 | name: etcd-certs 43 | 44 | -------------------------------------------------------------------------------- /examples/pod.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: netchecker-server 5 | labels: 6 | app: netchecker-server 7 | spec: 8 | containers: 9 | - name: netchecker-server 10 | image: mirantis/k8s-netchecker-server:stable 11 | imagePullPolicy: Always 12 | ports: 13 | - containerPort: 8081 14 | hostPort: 8081 15 | args: 16 | - "-v=5" 17 | - "-logtostderr" 18 | - "-kubeproxyinit" 19 | - "-endpoint=0.0.0.0:8081" 20 | -------------------------------------------------------------------------------- /examples/svc.yml: -------------------------------------------------------------------------------- 1 | kind: "Service" 2 | apiVersion: "v1" 3 | metadata: 4 | name: "netchecker-service" 5 | spec: 6 | selector: 7 | app: "netchecker-server" 8 | ports: 9 | - 10 | protocol: "TCP" 11 | port: 8081 12 | targetPort: 8081 13 | nodePort: 31081 14 | type: "NodePort" 15 | -------------------------------------------------------------------------------- /glide.lock: -------------------------------------------------------------------------------- 1 | hash: 1fb28bc91515dacacff2c77325ffc6e50accc1bb32720207cf3585e128875c86 2 | updated: 2017-11-30T14:54:19.537081249+01:00 3 | imports: 4 | - name: github.com/beorn7/perks 5 | version: 3ac7bf7a47d159a033b107610db8a1b6575507a4 6 | subpackages: 7 | - quantile 8 | - name: github.com/coreos/etcd 9 | version: 20490caaf0dcd96bb4a95e40625559def8ef5b04 10 | subpackages: 11 | - client 12 | - pkg/pathutil 13 | - pkg/types 14 | - name: github.com/davecgh/go-spew 15 | version: 5215b55f46b2b919f50a1df0eaa5886afe4e3b3d 16 | subpackages: 17 | - spew 18 | - name: github.com/docker/distribution 19 | version: cd27f179f2c10c5d300e6d09025b538c475b0d51 20 | subpackages: 21 | - digest 22 | - reference 23 | - name: github.com/docker/spdystream 24 | version: 449fdfce4d962303d702fec724ef0ad181c92528 25 | subpackages: 26 | - spdy 27 | - name: github.com/emicklei/go-restful 28 | version: ff4f55a206334ef123e4f79bbf348980da81ca46 29 | subpackages: 30 | - log 31 | - name: github.com/emicklei/go-restful-swagger12 32 | version: dcef7f55730566d41eae5db10e7d6981829720f6 33 | - name: github.com/ghodss/yaml 34 | version: 73d445a93680fa1a78ae23a5839bad48f32ba1ee 35 | - name: github.com/go-openapi/analysis 36 | version: b44dc874b601d9e4e2f6e19140e794ba24bead3b 37 | - name: github.com/go-openapi/jsonpointer 38 | version: 46af16f9f7b149af66e5d1bd010e3574dc06de98 39 | - name: github.com/go-openapi/jsonreference 40 | version: 13c6e3589ad90f49bd3e3bbe2c2cb3d7a4142272 41 | - name: github.com/go-openapi/loads 42 | version: 18441dfa706d924a39a030ee2c3b1d8d81917b38 43 | - name: github.com/go-openapi/spec 44 | version: 6aced65f8501fe1217321abf0749d354824ba2ff 45 | - name: github.com/go-openapi/swag 46 | version: 1d0bd113de87027671077d3c71eb3ac5d7dbba72 47 | - name: github.com/gogo/protobuf 48 | version: c0656edd0d9eab7c66d1eb0c568f9039345796f7 49 | subpackages: 50 | - proto 51 | - sortkeys 52 | - name: github.com/golang/glog 53 | version: 44145f04b68cf362d9c4df2182967c2275eaefed 54 | - name: github.com/golang/protobuf 55 | version: 4bd1920723d7b7c925de087aa32e2187708897f7 56 | subpackages: 57 | - proto 58 | - name: github.com/google/gofuzz 59 | version: 44d81051d367757e1c7c6a5a86423ece9afcf63c 60 | - name: github.com/howeyc/gopass 61 | version: bf9dde6d0d2c004a008c27aaee91170c786f6db8 62 | - name: github.com/imdario/mergo 63 | version: 6633656539c1639d9d78127b7d47c622b5d7b6dc 64 | - name: github.com/juju/ratelimit 65 | version: 5b9ff866471762aa2ab2dced63c9fb6f53921342 66 | - name: github.com/julienschmidt/httprouter 67 | version: 975b5c4c7c21c0e3d2764200bf2aa8e34657ae6e 68 | - name: github.com/mailru/easyjson 69 | version: d5b7844b561a7bc640052f1b935f7b800330d7e0 70 | subpackages: 71 | - buffer 72 | - jlexer 73 | - jwriter 74 | - name: github.com/matttproud/golang_protobuf_extensions 75 | version: fc2b8d3a73c4867e51861bbdd5ae3c1f0869dd6a 76 | subpackages: 77 | - pbutil 78 | - name: github.com/onsi/ginkgo 79 | version: 67b9df7f55fe1165fd9ad49aca7754cce01a42b8 80 | subpackages: 81 | - config 82 | - internal/codelocation 83 | - internal/containernode 84 | - internal/failer 85 | - internal/leafnodes 86 | - internal/remote 87 | - internal/spec 88 | - internal/spec_iterator 89 | - internal/specrunner 90 | - internal/suite 91 | - internal/testingtproxy 92 | - internal/writer 93 | - reporters 94 | - reporters/stenographer 95 | - reporters/stenographer/support/go-colorable 96 | - reporters/stenographer/support/go-isatty 97 | - types 98 | - name: github.com/onsi/gomega 99 | version: d59fa0ac68bb5dd932ee8d24eed631cdd519efc3 100 | subpackages: 101 | - format 102 | - internal/assertion 103 | - internal/asyncassertion 104 | - internal/oraclematcher 105 | - internal/testingtsupport 106 | - matchers 107 | - matchers/support/goraph/bipartitegraph 108 | - matchers/support/goraph/edge 109 | - matchers/support/goraph/node 110 | - matchers/support/goraph/util 111 | - types 112 | - name: github.com/prometheus/client_golang 113 | version: e7e903064f5e9eb5da98208bae10b475d4db0f8c 114 | subpackages: 115 | - prometheus 116 | - prometheus/promhttp 117 | - name: github.com/prometheus/client_model 118 | version: fa8ad6fec33561be4280a8f0514318c79d7f6cb6 119 | subpackages: 120 | - go 121 | - name: github.com/prometheus/common 122 | version: 13ba4ddd0caa9c28ca7b7bffe1dfa9ed8d5ef207 123 | subpackages: 124 | - expfmt 125 | - internal/bitbucket.org/ww/goautoneg 126 | - model 127 | - name: github.com/prometheus/procfs 128 | version: 65c1f6f8f0fc1e2185eb9863a3bc751496404259 129 | subpackages: 130 | - xfs 131 | - name: github.com/PuerkitoBio/purell 132 | version: 8a290539e2e8629dbc4e6bad948158f790ec31f4 133 | - name: github.com/PuerkitoBio/urlesc 134 | version: 5bd2802263f21d8788851d5305584c82a5c75d7e 135 | - name: github.com/spf13/pflag 136 | version: 9ff6c6923cfffbcd502984b8e0c80539a94968b7 137 | - name: github.com/ugorji/go 138 | version: ded73eae5db7e7a0ef6f55aace87a2873c5d2b74 139 | subpackages: 140 | - codec 141 | - name: github.com/urfave/negroni 142 | version: 3019daf414cfd2c51de68c3a535707c0de6e3d83 143 | - name: golang.org/x/crypto 144 | version: d172538b2cfce0c13cee31e647d0367aa8cd2486 145 | subpackages: 146 | - ssh/terminal 147 | - name: golang.org/x/net 148 | version: f2499483f923065a842d38eb4c7f1927e6fc6e6d 149 | subpackages: 150 | - context 151 | - http2 152 | - http2/hpack 153 | - idna 154 | - lex/httplex 155 | - websocket 156 | - name: golang.org/x/sys 157 | version: 8f0908ab3b2457e2e15403d3697c9ef5cb4b57a9 158 | subpackages: 159 | - unix 160 | - name: golang.org/x/text 161 | version: 2910a502d2bf9e43193af9d68ca516529614eed3 162 | subpackages: 163 | - cases 164 | - internal/tag 165 | - language 166 | - runes 167 | - secure/bidirule 168 | - secure/precis 169 | - transform 170 | - unicode/bidi 171 | - unicode/norm 172 | - width 173 | - name: gopkg.in/inf.v0 174 | version: 3887ee99ecf07df5b447e9b00d9c0b2adaa9f3e4 175 | - name: gopkg.in/yaml.v2 176 | version: 53feefa2559fb8dfa8d81baad31be332c97d6c77 177 | - name: k8s.io/api 178 | version: 4fe9229aaa9d704f8a2a21cdcd50de2bbb6e1b57 179 | subpackages: 180 | - core/v1 181 | - name: k8s.io/apiextensions-apiserver 182 | version: fcd622fe88a4a6efcb5aea9e94ee87324ac1b036 183 | subpackages: 184 | - pkg/apis/apiextensions 185 | - pkg/apis/apiextensions/v1beta1 186 | - pkg/client/clientset/clientset 187 | - pkg/client/clientset/clientset/scheme 188 | - pkg/client/clientset/clientset/typed/apiextensions/v1beta1 189 | - name: k8s.io/apimachinery 190 | version: 8a1a257c3a3503c77f25e5802e96e89a2a11ad61 191 | subpackages: 192 | - pkg/api/equality 193 | - pkg/api/errors 194 | - pkg/api/meta 195 | - pkg/api/resource 196 | - pkg/apimachinery 197 | - pkg/apimachinery/announced 198 | - pkg/apimachinery/registered 199 | - pkg/apis/meta/v1 200 | - pkg/apis/meta/v1/unstructured 201 | - pkg/apis/meta/v1alpha1 202 | - pkg/conversion 203 | - pkg/conversion/queryparams 204 | - pkg/conversion/unstructured 205 | - pkg/fields 206 | - pkg/labels 207 | - pkg/openapi 208 | - pkg/runtime 209 | - pkg/runtime/schema 210 | - pkg/runtime/serializer 211 | - pkg/runtime/serializer/json 212 | - pkg/runtime/serializer/protobuf 213 | - pkg/runtime/serializer/recognizer 214 | - pkg/runtime/serializer/streaming 215 | - pkg/runtime/serializer/versioning 216 | - pkg/selection 217 | - pkg/types 218 | - pkg/util/clock 219 | - pkg/util/diff 220 | - pkg/util/errors 221 | - pkg/util/framer 222 | - pkg/util/httpstream 223 | - pkg/util/httpstream/spdy 224 | - pkg/util/intstr 225 | - pkg/util/json 226 | - pkg/util/net 227 | - pkg/util/rand 228 | - pkg/util/remotecommand 229 | - pkg/util/runtime 230 | - pkg/util/sets 231 | - pkg/util/validation 232 | - pkg/util/validation/field 233 | - pkg/util/wait 234 | - pkg/util/yaml 235 | - pkg/version 236 | - pkg/watch 237 | - third_party/forked/golang/netutil 238 | - third_party/forked/golang/reflect 239 | - name: k8s.io/apiserver 240 | version: b6348e73bd115a31b70a4e9ff12d35869d057d8c 241 | subpackages: 242 | - pkg/admission 243 | - pkg/admission/initializer 244 | - pkg/admission/plugin/namespace/lifecycle 245 | - pkg/apis/apiserver 246 | - pkg/apis/apiserver/install 247 | - pkg/apis/apiserver/v1alpha1 248 | - pkg/apis/audit 249 | - pkg/apis/audit/install 250 | - pkg/apis/audit/v1alpha1 251 | - pkg/apis/audit/validation 252 | - pkg/audit 253 | - pkg/audit/policy 254 | - pkg/authentication/authenticator 255 | - pkg/authentication/authenticatorfactory 256 | - pkg/authentication/group 257 | - pkg/authentication/request/anonymous 258 | - pkg/authentication/request/bearertoken 259 | - pkg/authentication/request/headerrequest 260 | - pkg/authentication/request/union 261 | - pkg/authentication/request/x509 262 | - pkg/authentication/serviceaccount 263 | - pkg/authentication/token/tokenfile 264 | - pkg/authentication/user 265 | - pkg/authorization/authorizer 266 | - pkg/authorization/authorizerfactory 267 | - pkg/authorization/union 268 | - pkg/endpoints 269 | - pkg/endpoints/discovery 270 | - pkg/endpoints/filters 271 | - pkg/endpoints/handlers 272 | - pkg/endpoints/handlers/negotiation 273 | - pkg/endpoints/handlers/responsewriters 274 | - pkg/endpoints/metrics 275 | - pkg/endpoints/openapi 276 | - pkg/endpoints/request 277 | - pkg/features 278 | - pkg/registry/generic 279 | - pkg/registry/generic/registry 280 | - pkg/registry/rest 281 | - pkg/server 282 | - pkg/server/filters 283 | - pkg/server/healthz 284 | - pkg/server/httplog 285 | - pkg/server/mux 286 | - pkg/server/openapi 287 | - pkg/server/options 288 | - pkg/server/routes 289 | - pkg/server/routes/data/swagger 290 | - pkg/server/storage 291 | - pkg/storage 292 | - pkg/storage/errors 293 | - pkg/storage/etcd 294 | - pkg/storage/etcd/metrics 295 | - pkg/storage/etcd/util 296 | - pkg/storage/etcd3 297 | - pkg/storage/names 298 | - pkg/storage/storagebackend 299 | - pkg/storage/storagebackend/factory 300 | - pkg/storage/value 301 | - pkg/util/feature 302 | - pkg/util/flag 303 | - pkg/util/flushwriter 304 | - pkg/util/logs 305 | - pkg/util/proxy 306 | - pkg/util/trace 307 | - pkg/util/trie 308 | - pkg/util/webhook 309 | - pkg/util/wsstream 310 | - plugin/pkg/audit/log 311 | - plugin/pkg/audit/webhook 312 | - plugin/pkg/authenticator/token/webhook 313 | - plugin/pkg/authorizer/webhook 314 | - name: k8s.io/client-go 315 | version: 4cbb4d746a6a36cf99d6cd4f1b69a6907f49318a 316 | subpackages: 317 | - discovery 318 | - discovery/fake 319 | - kubernetes 320 | - kubernetes/fake 321 | - kubernetes/scheme 322 | - kubernetes/typed/admissionregistration/v1alpha1 323 | - kubernetes/typed/admissionregistration/v1alpha1/fake 324 | - kubernetes/typed/apps/v1beta1 325 | - kubernetes/typed/apps/v1beta1/fake 326 | - kubernetes/typed/authentication/v1 327 | - kubernetes/typed/authentication/v1/fake 328 | - kubernetes/typed/authentication/v1beta1 329 | - kubernetes/typed/authentication/v1beta1/fake 330 | - kubernetes/typed/authorization/v1 331 | - kubernetes/typed/authorization/v1/fake 332 | - kubernetes/typed/authorization/v1beta1 333 | - kubernetes/typed/authorization/v1beta1/fake 334 | - kubernetes/typed/autoscaling/v1 335 | - kubernetes/typed/autoscaling/v1/fake 336 | - kubernetes/typed/autoscaling/v2alpha1 337 | - kubernetes/typed/autoscaling/v2alpha1/fake 338 | - kubernetes/typed/batch/v1 339 | - kubernetes/typed/batch/v1/fake 340 | - kubernetes/typed/batch/v2alpha1 341 | - kubernetes/typed/batch/v2alpha1/fake 342 | - kubernetes/typed/certificates/v1beta1 343 | - kubernetes/typed/certificates/v1beta1/fake 344 | - kubernetes/typed/core/v1 345 | - kubernetes/typed/core/v1/fake 346 | - kubernetes/typed/extensions/v1beta1 347 | - kubernetes/typed/extensions/v1beta1/fake 348 | - kubernetes/typed/networking/v1 349 | - kubernetes/typed/networking/v1/fake 350 | - kubernetes/typed/policy/v1beta1 351 | - kubernetes/typed/policy/v1beta1/fake 352 | - kubernetes/typed/rbac/v1alpha1 353 | - kubernetes/typed/rbac/v1alpha1/fake 354 | - kubernetes/typed/rbac/v1beta1 355 | - kubernetes/typed/rbac/v1beta1/fake 356 | - kubernetes/typed/settings/v1alpha1 357 | - kubernetes/typed/settings/v1alpha1/fake 358 | - kubernetes/typed/storage/v1 359 | - kubernetes/typed/storage/v1/fake 360 | - kubernetes/typed/storage/v1beta1 361 | - kubernetes/typed/storage/v1beta1/fake 362 | - pkg/api 363 | - pkg/api/v1 364 | - pkg/api/v1/ref 365 | - pkg/apis/admissionregistration 366 | - pkg/apis/admissionregistration/v1alpha1 367 | - pkg/apis/apps 368 | - pkg/apis/apps/v1beta1 369 | - pkg/apis/authentication 370 | - pkg/apis/authentication/v1 371 | - pkg/apis/authentication/v1beta1 372 | - pkg/apis/authorization 373 | - pkg/apis/authorization/v1 374 | - pkg/apis/authorization/v1beta1 375 | - pkg/apis/autoscaling 376 | - pkg/apis/autoscaling/v1 377 | - pkg/apis/autoscaling/v2alpha1 378 | - pkg/apis/batch 379 | - pkg/apis/batch/v1 380 | - pkg/apis/batch/v2alpha1 381 | - pkg/apis/certificates 382 | - pkg/apis/certificates/v1beta1 383 | - pkg/apis/extensions 384 | - pkg/apis/extensions/v1beta1 385 | - pkg/apis/networking 386 | - pkg/apis/networking/v1 387 | - pkg/apis/policy 388 | - pkg/apis/policy/v1beta1 389 | - pkg/apis/rbac 390 | - pkg/apis/rbac/v1alpha1 391 | - pkg/apis/rbac/v1beta1 392 | - pkg/apis/settings 393 | - pkg/apis/settings/v1alpha1 394 | - pkg/apis/storage 395 | - pkg/apis/storage/v1 396 | - pkg/apis/storage/v1beta1 397 | - pkg/util 398 | - pkg/util/parsers 399 | - pkg/version 400 | - rest 401 | - rest/watch 402 | - testing 403 | - tools/auth 404 | - tools/clientcmd 405 | - tools/clientcmd/api 406 | - tools/clientcmd/api/latest 407 | - tools/clientcmd/api/v1 408 | - tools/metrics 409 | - tools/remotecommand 410 | - transport 411 | - util/cert 412 | - util/exec 413 | - util/flowcontrol 414 | - util/homedir 415 | - util/integer 416 | - name: k8s.io/kubernetes 417 | version: d3ada0119e776222f11ec7945e6d860061339aad 418 | subpackages: 419 | - pkg/api 420 | - pkg/kubelet/server/remotecommand 421 | - pkg/util/exec 422 | testImports: [] -------------------------------------------------------------------------------- /glide.yaml: -------------------------------------------------------------------------------- 1 | package: github.com/Mirantis/k8s-netchecker-server 2 | import: 3 | - package: github.com/golang/glog 4 | - package: github.com/julienschmidt/httprouter 5 | version: 975b5c4c7c21c0e3d2764200bf2aa8e34657ae6e 6 | - package: k8s.io/kubernetes 7 | version: v1.7.0 8 | - package: k8s.io/client-go 9 | version: 4cbb4d746a6a36cf99d6cd4f1b69a6907f49318a 10 | - package: github.com/onsi/ginkgo 11 | - package: k8s.io/apiextensions-apiserver 12 | version: fcd622fe88a4a6efcb5aea9e94ee87324ac1b036 13 | - package: github.com/onsi/gomega 14 | - package: k8s.io/api 15 | version: 4fe9229aaa9d704f8a2a21cdcd50de2bbb6e1b57 16 | - package: github.com/urfave/negroni 17 | version: 3019daf414cfd2c51de68c3a535707c0de6e3d83 18 | - package: github.com/prometheus/client_golang 19 | version: e7e903064f5e9eb5da98208bae10b475d4db0f8c 20 | subpackages: 21 | - prometheus/promhttp 22 | - package: github.com/coreos/etcd 23 | version: 20490caaf0dcd96bb4a95e40625559def8ef5b04 24 | subpackages: 25 | - client 26 | -------------------------------------------------------------------------------- /helm-chart/netchecker-server/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | -------------------------------------------------------------------------------- /helm-chart/netchecker-server/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | description: A simple network checker for kubernetes (server-part) 3 | name: netchecker-server 4 | version: v1.0 5 | maintainers: 6 | - name: Artem Roma 7 | email: aroma@mirantis.com 8 | -------------------------------------------------------------------------------- /helm-chart/netchecker-server/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 1. Get the application URL by running these commands: 2 | {{- if contains "NodePort" .Values.service.type }} 3 | export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "fullname" . }}) 4 | export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") 5 | echo http://$NODE_IP:$NODE_PORT/login 6 | {{- else if contains "LoadBalancer" .Values.service.type }} 7 | NOTE: It may take a few minutes for the LoadBalancer IP to be available. 8 | You can watch the status of by running 'kubectl get svc -w {{ template "fullname" . }}' 9 | export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') 10 | echo http://$SERVICE_IP:{{ .Values.service.externalPort }} 11 | {{- else if contains "ClusterIP" .Values.service.type }} 12 | export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "fullname" . }}" -o jsonpath="{.items[0].metadata.name}") 13 | echo "Visit http://127.0.0.1:8080 to use your application" 14 | kubectl port-forward $POD_NAME 8080:{{ .Values.service.externalPort }} 15 | {{- end }} 16 | -------------------------------------------------------------------------------- /helm-chart/netchecker-server/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 24 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 24 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | */}} 13 | {{- define "fullname" -}} 14 | {{- $name := default .Chart.Name .Values.nameOverride -}} 15 | {{- printf "%s-%s" .Release.Name $name | trunc 24 | trimSuffix "-" -}} 16 | {{- end -}} 17 | -------------------------------------------------------------------------------- /helm-chart/netchecker-server/templates/pod.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: {{ .Values.app.name }} 5 | labels: 6 | app: {{ .Values.app.name }} 7 | chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" 8 | spec: 9 | serviceAccountName: {{ .Values.rbac.serviceaccount }} 10 | containers: 11 | - name: {{ .Values.container.name }} 12 | image: {{ .Values.image.repository }}:{{ .Values.image.tag }} 13 | imagePullPolicy: {{.Values.image.pullPolicy}} 14 | ports: 15 | - containerPort: {{ .Values.container.port }} 16 | hostPort: {{ .Values.container.hostPort }} 17 | args: 18 | {{- range .Values.container.args }} 19 | - {{ . | quote }} 20 | {{- end }} 21 | -------------------------------------------------------------------------------- /helm-chart/netchecker-server/templates/rbac-config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: {{ .Values.rbac.serviceaccount }} 5 | namespace: {{ .Release.Namespace }} 6 | --- 7 | apiVersion: rbac.authorization.k8s.io/v1beta1 8 | kind: ClusterRole 9 | metadata: 10 | name: {{ .Values.rbac.clusterrole }} 11 | rules: 12 | - apiGroups: 13 | - apiextensions.k8s.io 14 | resources: 15 | - customresourcedefinitions 16 | verbs: 17 | - "*" 18 | - apiGroups: [""] 19 | resources: 20 | - pods 21 | verbs: ["list", "get"] 22 | - apiGroups: 23 | - network-checker.ext 24 | resources: 25 | - agents 26 | verbs: 27 | - "*" 28 | --- 29 | apiVersion: rbac.authorization.k8s.io/v1beta1 30 | kind: ClusterRoleBinding 31 | metadata: 32 | name: {{ .Values.rbac.clusterrolebinding }} 33 | roleRef: 34 | apiGroup: rbac.authorization.k8s.io 35 | kind: ClusterRole 36 | name: {{ .Values.rbac.clusterrole }} 37 | subjects: 38 | - kind: ServiceAccount 39 | name: {{ .Values.rbac.serviceaccount }} 40 | namespace: {{ .Release.Namespace }} 41 | -------------------------------------------------------------------------------- /helm-chart/netchecker-server/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ .Values.service.name }} 5 | labels: 6 | chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" 7 | spec: 8 | type: {{ .Values.service.type }} 9 | ports: 10 | - port: {{ .Values.service.externalPort }} 11 | targetPort: {{ .Values.service.internalPort }} 12 | protocol: TCP 13 | {{ if .Values.service.nodePort }}nodePort: {{ .Values.service.nodePort }}{{ end }} 14 | selector: 15 | app: {{ .Values.app.name }} 16 | -------------------------------------------------------------------------------- /helm-chart/netchecker-server/values.yaml: -------------------------------------------------------------------------------- 1 | app: 2 | name: netchecker-server 3 | 4 | image: 5 | repository: mirantis/k8s-netchecker-server 6 | tag: latest 7 | pullPolicy: Always 8 | 9 | container: 10 | name: netchecker-server 11 | port: 8081 12 | hostPort: 8081 13 | args: 14 | - -v=5 15 | - -logtostderr 16 | - -kubeproxyinit 17 | - -endpoint=0.0.0.0:8081 18 | 19 | service: 20 | name: netchecker-service 21 | type: NodePort 22 | externalPort: 8081 23 | internalPort: 8081 24 | nodePort: 31081 25 | 26 | rbac: 27 | serviceaccount: nechecker-operator 28 | clusterrole: nechecker-operator 29 | clusterrolebinding: nechecker-operator 30 | -------------------------------------------------------------------------------- /pkg/extensions/apis/v1/register.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package v1 16 | 17 | import ( 18 | meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 19 | "k8s.io/apimachinery/pkg/runtime" 20 | "k8s.io/apimachinery/pkg/runtime/schema" 21 | ) 22 | 23 | var ( 24 | // SchemeBuilder is an instance of schema constructor 25 | SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) 26 | // AddToScheme is a method for runtime 27 | AddToScheme = SchemeBuilder.AddToScheme 28 | ) 29 | 30 | // GroupName is the group name use in this package 31 | const GroupName = "network-checker.ext" 32 | 33 | // SchemeGroupVersion is group version used to register these objects 34 | var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1"} 35 | 36 | // Resource takes an unqualified resource and returns a Group qualified GroupResource 37 | func Resource(resource string) schema.GroupResource { 38 | return SchemeGroupVersion.WithResource(resource).GroupResource() 39 | } 40 | 41 | func init() { 42 | // We only register manually written functions here. The registration of the 43 | // generated functions takes place in the generated files. The separation 44 | // makes the code compile even when the generated files are missing. 45 | SchemeBuilder.Register(addKnownTypes) 46 | } 47 | 48 | // Adds the list of known types to api.Scheme. 49 | func addKnownTypes(scheme *runtime.Scheme) error { 50 | scheme.AddKnownTypes(SchemeGroupVersion, 51 | &Agent{}, 52 | &AgentList{}, 53 | 54 | &meta_v1.ListOptions{}, 55 | &meta_v1.DeleteOptions{}, 56 | ) 57 | 58 | meta_v1.AddToGroupVersion(scheme, SchemeGroupVersion) 59 | 60 | return nil 61 | } 62 | -------------------------------------------------------------------------------- /pkg/extensions/apis/v1/types.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package v1 16 | 17 | import ( 18 | "time" 19 | 20 | meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21 | ) 22 | 23 | // AgentResourcePlural is a constant for plural form naming 24 | const AgentResourcePlural = "agents" 25 | 26 | // AgentSpec is a payload to keep Agent info 27 | type AgentSpec struct { 28 | ReportInterval int `json:"report_interval"` 29 | NodeName string `json:"nodename"` 30 | PodName string `json:"podname"` 31 | HostDate time.Time `json:"hostdate"` 32 | Uptime uint64 `json:"uptime"` 33 | LastUpdated time.Time `json:"last_updated"` 34 | LookupHost map[string][]string `json:"nslookup"` 35 | NetworkProbes []ProbeResult `json:"network_probes"` 36 | IPs map[string][]string `json:"ips"` 37 | } 38 | 39 | // ProbeResult structure for network probing results 40 | type ProbeResult struct { 41 | URL string 42 | ConnectionResult int 43 | HTTPCode int 44 | Total int 45 | ContentTransfer int 46 | TCPConnection int 47 | DNSLookup int 48 | Connect int 49 | ServerProcessing int 50 | } 51 | 52 | // Agent struct to store AgentSpec info as json 53 | type Agent struct { 54 | meta_v1.TypeMeta `json:",inline"` 55 | meta_v1.ObjectMeta `json:"metadata"` 56 | Spec AgentSpec `json:"spec"` 57 | } 58 | 59 | // AgentList struct to store many of agents 60 | type AgentList struct { 61 | meta_v1.TypeMeta `json:",inline"` 62 | meta_v1.ListMeta `json:"metadata"` 63 | Items []Agent `json:"items"` 64 | } 65 | -------------------------------------------------------------------------------- /pkg/extensions/client/agent.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package client 16 | 17 | import ( 18 | "reflect" 19 | ext_v1 "github.com/Mirantis/k8s-netchecker-server/pkg/extensions/apis/v1" 20 | apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" 21 | apiextensionsv1beta1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1" 22 | meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 | ) 24 | 25 | // CreateAgentCustomResourceDefinition is a function to initialize schema for custom reource 26 | func CreateAgentCustomResourceDefinition(clientset apiextensionsclient.Interface) error { 27 | agent := &apiextensionsv1beta1.CustomResourceDefinition{ 28 | ObjectMeta: meta_v1.ObjectMeta{ 29 | Name: ext_v1.AgentResourcePlural + "." + ext_v1.GroupName, 30 | }, 31 | Spec: apiextensionsv1beta1.CustomResourceDefinitionSpec{ 32 | Group: ext_v1.GroupName, 33 | Version: ext_v1.SchemeGroupVersion.Version, 34 | Scope: apiextensionsv1beta1.NamespaceScoped, 35 | Names: apiextensionsv1beta1.CustomResourceDefinitionNames{ 36 | Plural: ext_v1.AgentResourcePlural, 37 | Kind: reflect.TypeOf(ext_v1.Agent{}).Name(), 38 | }, 39 | }, 40 | } 41 | _, err := clientset.ApiextensionsV1beta1(). 42 | CustomResourceDefinitions(). 43 | Create(agent) 44 | return err 45 | } 46 | -------------------------------------------------------------------------------- /pkg/extensions/client/client.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package client 16 | 17 | import ( 18 | "bytes" 19 | "encoding/json" 20 | 21 | "k8s.io/apimachinery/pkg/runtime" 22 | "k8s.io/apimachinery/pkg/runtime/serializer" 23 | "k8s.io/client-go/kubernetes" 24 | api_v1 "k8s.io/api/core/v1" 25 | "k8s.io/client-go/rest" 26 | 27 | ext_v1 "github.com/Mirantis/k8s-netchecker-server/pkg/extensions/apis/v1" 28 | ) 29 | 30 | // WrapClientsetWithExtensions is a wrapper function for clientset 31 | func WrapClientsetWithExtensions(clientset *kubernetes.Clientset, config *rest.Config) (*WrappedClientset, error) { 32 | restConfig := &rest.Config{} 33 | *restConfig = *config 34 | rest, scheme, err := ExtensionClient(restConfig) 35 | if err != nil { 36 | return nil, err 37 | } 38 | return &WrappedClientset{ 39 | Client: rest, 40 | Scheme: scheme, 41 | }, nil 42 | } 43 | 44 | // ExtensionClient is a client initialization function 45 | func ExtensionClient(cfg *rest.Config) (*rest.RESTClient, *runtime.Scheme, error) { 46 | scheme := runtime.NewScheme() 47 | if err := ext_v1.AddToScheme(scheme); err != nil { 48 | return nil, nil, err 49 | } 50 | 51 | config := *cfg 52 | config.GroupVersion = &ext_v1.SchemeGroupVersion 53 | config.APIPath = "/apis" 54 | config.ContentType = runtime.ContentTypeJSON 55 | config.NegotiatedSerializer = serializer.DirectCodecFactory{CodecFactory: serializer.NewCodecFactory(scheme)} 56 | 57 | client, err := rest.RESTClientFor(&config) 58 | if err != nil { 59 | return nil, nil, err 60 | } 61 | 62 | return client, scheme, nil 63 | } 64 | 65 | // Clientset interface 66 | type Clientset interface { 67 | Agents() AgentsInterface 68 | } 69 | 70 | // WrappedClientset structure 71 | type WrappedClientset struct { 72 | Client *rest.RESTClient 73 | Scheme *runtime.Scheme 74 | } 75 | 76 | // AgentsInterface interface 77 | type AgentsInterface interface { 78 | Create(*ext_v1.Agent) (*ext_v1.Agent, error) 79 | Get(name string) (*ext_v1.Agent, error) 80 | List() (*ext_v1.AgentList, error) 81 | Update(*ext_v1.Agent) (*ext_v1.Agent, error) 82 | Delete(string, *api_v1.DeleteOptions) error 83 | } 84 | 85 | // Agents function 86 | func (w *WrappedClientset) Agents() AgentsInterface { 87 | return &AgentsClient{w.Client} 88 | } 89 | 90 | // AgentsClient structure 91 | type AgentsClient struct { 92 | client *rest.RESTClient 93 | } 94 | 95 | func decodeResponseInto(resp []byte, obj interface{}) error { 96 | return json.NewDecoder(bytes.NewReader(resp)).Decode(obj) 97 | } 98 | 99 | // Create agent function 100 | func (c *AgentsClient) Create(agent *ext_v1.Agent) (result *ext_v1.Agent, err error) { 101 | result = &ext_v1.Agent{} 102 | resp, err := c.client.Post(). 103 | Namespace("default"). 104 | Resource("agents"). 105 | Body(agent). 106 | DoRaw() 107 | if err != nil { 108 | return result, err 109 | } 110 | return result, decodeResponseInto(resp, result) 111 | } 112 | 113 | // List agents function 114 | func (c *AgentsClient) List() (result *ext_v1.AgentList, err error) { 115 | result = &ext_v1.AgentList{} 116 | resp, err := c.client.Get(). 117 | Namespace("default"). 118 | Resource("agents"). 119 | DoRaw() 120 | if err != nil { 121 | return result, err 122 | } 123 | return result, decodeResponseInto(resp, result) 124 | } 125 | 126 | // Update agents function 127 | func (c *AgentsClient) Update(agent *ext_v1.Agent) (result *ext_v1.Agent, err error) { 128 | result = &ext_v1.Agent{} 129 | resp, err := c.client.Put(). 130 | Namespace("default"). 131 | Resource("agents"). 132 | Name(agent.ObjectMeta.Name). 133 | Body(agent). 134 | DoRaw() 135 | if err != nil { 136 | return result, err 137 | } 138 | return result, decodeResponseInto(resp, result) 139 | } 140 | 141 | // Delete agent function 142 | func (c *AgentsClient) Delete(name string, options *api_v1.DeleteOptions) error { 143 | return c.client.Delete(). 144 | Namespace("default"). 145 | Resource("agents"). 146 | Name(name). 147 | Body(options). 148 | Do(). 149 | Error() 150 | } 151 | 152 | // Get agent function 153 | func (c *AgentsClient) Get(name string) (result *ext_v1.Agent, err error) { 154 | result = &ext_v1.Agent{} 155 | resp, err := c.client.Get(). 156 | Namespace("default"). 157 | Resource("agents"). 158 | Name(name). 159 | DoRaw() 160 | if err != nil { 161 | return result, err 162 | } 163 | return result, decodeResponseInto(resp, result) 164 | } 165 | -------------------------------------------------------------------------------- /pkg/utils/config.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "encoding/json" 19 | "github.com/golang/glog" 20 | "gopkg.in/yaml.v2" 21 | "sync" 22 | "time" 23 | ) 24 | 25 | type AppConfig struct { 26 | sync.Mutex // ensures atomic writes; protects the following fields 27 | UseKubeClient bool // use k8s TPR (true) or etcd (false) as a data storage 28 | EtcdEndpoints string // endpoints (IPaddress1:PORT1[,IPaddress2:PORT2]) of etcd server 29 | // when etcd is being used as a data storage 30 | EtcdTree string // Root of NetChecker server etcd tree 31 | EtcdCertFile string // SSL certificate file when using HTTPS to connect to etcd 32 | EtcdKeyFile string // SSL key file when using HTTPS to connect to etcd 33 | EtcdCAFile string // SSL CA file when using HTTPS to connect to etcd 34 | HttpListen string // REST API endpoint (IPaddress:PORT) for netchecker server to listen to 35 | PingTimeout time.Duration // etcd ping timeout (sec) 36 | ReportTTL time.Duration // TTL for Agent report data when etcd is in use (sec) 37 | CheckInterval time.Duration // Interval of checking that agents data is up-to-date 38 | } 39 | 40 | var main_config *AppConfig 41 | 42 | func (c *AppConfig) ToJson() ([]byte, error) { 43 | var ( 44 | rv []byte 45 | err error 46 | ) 47 | if rv, err = json.Marshal(c); err != nil { 48 | glog.Errorln(err.Error()) 49 | } 50 | return rv, err 51 | } 52 | 53 | func (c *AppConfig) ToYaml() ([]byte, error) { 54 | var ( 55 | rv []byte 56 | err error 57 | ) 58 | if rv, err = yaml.Marshal(c); err != nil { 59 | glog.Errorln(err.Error()) 60 | } 61 | return rv, err 62 | } 63 | 64 | func GetOrCreateConfig() *AppConfig { 65 | return main_config 66 | } 67 | 68 | func init() { 69 | main_config = new(AppConfig) 70 | } 71 | -------------------------------------------------------------------------------- /pkg/utils/data.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "time" 19 | 20 | "github.com/prometheus/client_golang/prometheus" 21 | ) 22 | 23 | // AgentInfo is payload structure for keepalive message received from agent. 24 | type AgentInfo struct { 25 | ReportInterval int `json:"report_interval"` 26 | NodeName string `json:"nodename"` 27 | PodName string `json:"podname"` 28 | HostDate time.Time `json:"hostdate"` 29 | LastUpdated time.Time `json:"last_updated"` 30 | LookupHost map[string][]string `json:"nslookup"` 31 | NetworkProbes []ProbeResult `json:"network_probes"` 32 | IPs map[string][]string `json:"ips"` 33 | } 34 | 35 | // ProbeResult structure for network probing results 36 | type ProbeResult struct { 37 | URL string 38 | ConnectionResult int 39 | HTTPCode int 40 | Total int 41 | ContentTransfer int 42 | TCPConnection int 43 | DNSLookup int 44 | Connect int 45 | ServerProcessing int 46 | } 47 | 48 | // CheckConnectivityInfo is payload structure for server answer to connectivity 49 | // check request. 50 | type CheckConnectivityInfo struct { 51 | Message string `json="message"` 52 | Absent []string `json="outdated,omitempty"` 53 | Outdated []string `json="absent,omitempty"` 54 | } 55 | 56 | // AgentMetrics contains Prometheus entities and agent data required for 57 | // reporting metrics for particular agent. 58 | type AgentMetrics struct { 59 | ErrorCount prometheus.Counter 60 | ReportCount prometheus.Counter 61 | PodName string 62 | ErrorsFromLastReport int 63 | ProbeConnectionResult *prometheus.GaugeVec 64 | ProbeHTTPCode *prometheus.GaugeVec 65 | ProbeTotal *prometheus.GaugeVec 66 | ProbeContentTransfer *prometheus.GaugeVec 67 | ProbeTCPConnection *prometheus.GaugeVec 68 | ProbeDNSLookup *prometheus.GaugeVec 69 | ProbeConnect *prometheus.GaugeVec 70 | ProbeServerProcessing *prometheus.GaugeVec 71 | } 72 | -------------------------------------------------------------------------------- /pkg/utils/handler.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "fmt" 19 | "net/http" 20 | "time" 21 | 22 | "github.com/golang/glog" 23 | "github.com/julienschmidt/httprouter" 24 | "github.com/prometheus/client_golang/prometheus/promhttp" 25 | "github.com/urfave/negroni" 26 | ) 27 | 28 | func NewHandler(useKubeClient bool) (*Handler, error) { 29 | h := &Handler{ 30 | Metrics: NcAgentMetrics{}, 31 | } 32 | 33 | var err error 34 | 35 | if useKubeClient { 36 | // use k8s TPR as a persistent storage for agents data 37 | h.Agents, err = NewK8sStorer() 38 | } else { 39 | // use etcd as a persistent storage for agents data 40 | h.Agents, err = NewEtcdStorer() 41 | } 42 | 43 | if err == nil { 44 | h.SetupRouter() 45 | h.AddMiddleware() 46 | } 47 | 48 | return h, err 49 | } 50 | 51 | func (h *Handler) SetupRouter() { 52 | glog.V(10).Info("Setting up the url multiplexer") 53 | 54 | router := httprouter.New() 55 | router.POST("/api/v1/agents/:name", h.UpdateAgents) 56 | router.GET("/api/v1/agents/:name", h.CleanCache(h.Agents.GetSingleAgent)) 57 | router.GET("/api/v1/agents/", h.CleanCache(h.Agents.GetAgents)) 58 | router.GET("/api/v1/connectivity_check", h.CleanCache(h.ConnectivityCheck)) 59 | router.GET("/api/v1/ping", func(_ http.ResponseWriter, _ *http.Request, _ httprouter.Params) { 60 | }) 61 | router.Handler("GET", "/metrics", promhttp.Handler()) 62 | h.HTTPHandler = router 63 | } 64 | 65 | func (h *Handler) AddMiddleware() { 66 | n := negroni.New() 67 | n.Use(negroni.NewLogger()) 68 | n.Use(negroni.NewRecovery()) 69 | n.UseHandler(h.HTTPHandler) 70 | h.HTTPHandler = n 71 | } 72 | 73 | func (h *Handler) UpdateAgents(rw http.ResponseWriter, r *http.Request, rp httprouter.Params) { 74 | agentName := rp.ByName("name") 75 | 76 | agentData, err := h.Agents.UpdateAgents(rw, r, rp) 77 | if err != nil { 78 | glog.Error(err) 79 | } 80 | 81 | h.Metrics[agentName] = NewAgentMetrics(&agentData) 82 | UpdateAgentBaseMetrics(h.Metrics, agentName, true, false) 83 | UpdateAgentProbeMetrics(agentData, h.Metrics[agentName]) 84 | } 85 | 86 | func (h *Handler) ConnectivityCheck(rw http.ResponseWriter, r *http.Request, _ httprouter.Params) { 87 | res := &CheckConnectivityInfo{ 88 | Message: fmt.Sprintf( 89 | "All %v pods successfully reported back to the server", 90 | len(h.Agents.AgentCache())), 91 | } 92 | status := http.StatusOK 93 | errMsg := "Connectivity check fails. Reason: %v" 94 | 95 | absent, outdated, err := h.Agents.CheckAgents() 96 | if err != nil { 97 | message := fmt.Sprintf( 98 | "Error occurred while checking the agents. Details: %v", err) 99 | glog.Error(message) 100 | http.Error(rw, message, http.StatusInternalServerError) 101 | return 102 | } 103 | 104 | if len(absent) != 0 || len(outdated) != 0 { 105 | glog.V(5).Infof( 106 | "Absent|outdated agents detected. Absent -> %v; outdated -> %v", 107 | absent, outdated, 108 | ) 109 | res.Message = fmt.Sprintf(errMsg, 110 | "there are absent or outdated pods; look up the payload") 111 | res.Absent = absent 112 | res.Outdated = outdated 113 | 114 | status = http.StatusBadRequest 115 | } 116 | 117 | glog.V(10).Infof("Connectivity check result: %v", res) 118 | glog.V(10).Infof("Connectivity check HTTP response status code: %v", status) 119 | 120 | rw.WriteHeader(status) 121 | 122 | ProcessResponse(rw, res) 123 | } 124 | 125 | func (h *Handler) CleanCache(handle httprouter.Handle) httprouter.Handle { 126 | return func(rw http.ResponseWriter, r *http.Request, rp httprouter.Params) { 127 | h.Agents.CleanCacheOnDemand(rw) 128 | 129 | handle(rw, r, rp) 130 | } 131 | } 132 | 133 | func (h *Handler) CollectAgentsMetrics(checkInterval time.Duration, useKubeClient bool) { 134 | for { 135 | time.Sleep(checkInterval) 136 | if useKubeClient { 137 | agentsData := h.Agents.AgentCache() 138 | for name := range agentsData { 139 | if _, exists := h.Metrics[name]; exists { 140 | deltaInIntervals := time.Now().Sub(agentsData[name].LastUpdated).Seconds() / 141 | float64(agentsData[name].ReportInterval) 142 | if int(deltaInIntervals) > (h.Metrics[name].ErrorsFromLastReport + 1) { 143 | UpdateAgentBaseMetrics(h.Metrics, name, false, true) 144 | } 145 | } 146 | } 147 | } else { 148 | absent, _, err := h.Agents.CheckAgents() 149 | if err != nil { 150 | message := fmt.Sprintf( 151 | "Metrics update: error checking the agents: %v", err) 152 | glog.Error(message) 153 | } 154 | for _, name := range absent { 155 | if _, exists := h.Metrics[name]; exists { 156 | if h.Metrics[name].ErrorsFromLastReport == 0 { 157 | UpdateAgentBaseMetrics(h.Metrics, name, false, true) 158 | } 159 | } 160 | } 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /pkg/utils/handler_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "bytes" 19 | "encoding/json" 20 | "errors" 21 | "fmt" 22 | "net/http" 23 | "net/http/httptest" 24 | "strings" 25 | "testing" 26 | "time" 27 | 28 | "github.com/julienschmidt/httprouter" 29 | "github.com/prometheus/client_golang/prometheus/promhttp" 30 | 31 | meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 | "k8s.io/client-go/kubernetes" 33 | "k8s.io/client-go/kubernetes/fake" 34 | "k8s.io/client-go/pkg/api/v1" 35 | 36 | ext_v1 "github.com/Mirantis/k8s-netchecker-server/pkg/extensions/apis/v1" 37 | ) 38 | 39 | func newHandler() *Handler { 40 | h := Handler{ 41 | Metrics: NcAgentMetrics{}, //map[string]AgentMetrics{}, 42 | } 43 | h.Agents, _ = NewK8sStorer() 44 | 45 | return &h 46 | } 47 | 48 | func agentExample() ext_v1.AgentSpec { 49 | return ext_v1.AgentSpec{ 50 | ReportInterval: 5, 51 | NodeName: "test-node", 52 | PodName: "test", 53 | Uptime: 65536, 54 | HostDate: time.Now(), 55 | NetworkProbes: []ext_v1.ProbeResult{{"http://0.0.0.0:8081", 1, 200, 50, 1, 0, 0, 0, 0}}, 56 | } 57 | } 58 | 59 | func checkRespStatus(expected, actual int, t *testing.T) { 60 | if actual != expected { 61 | t.Errorf("Response status code %v is not as expected %v", actual, expected) 62 | } 63 | } 64 | 65 | func checkCacheKey(h *Handler, key string, expected bool, t *testing.T) { 66 | _, exists := h.Agents.AgentCache()[key] 67 | if exists != expected { 68 | t.Errorf("Presence of the key %v in AgentCache must be %v", key, expected) 69 | } 70 | } 71 | 72 | func readBodyBytesOrFail(resp *http.Response, t *testing.T) []byte { 73 | bData := make([]byte, resp.ContentLength) 74 | n, err := resp.Body.Read(bData) 75 | if n <= 0 && err != nil { 76 | t.Errorf("Error while reading response from UpdateAgents. Details: %v", err) 77 | } 78 | 79 | return bData 80 | } 81 | 82 | func marshalExpectedWithActualDate(expected, actual ext_v1.AgentSpec, t *testing.T) []byte { 83 | //time.Now() is always different 84 | expected.LastUpdated = actual.LastUpdated 85 | 86 | bExpected, err := json.Marshal(expected) 87 | if err != nil { 88 | t.Errorf("Failed to marshal expected data with last_updated field. Details: %v", err) 89 | } 90 | 91 | return bExpected 92 | } 93 | 94 | func TestUpdateAgents(t *testing.T) { 95 | t.Skip("Skip agent update") 96 | expectedAgent := agentExample() 97 | marshalled, err := json.Marshal(expectedAgent) 98 | if err != nil { 99 | t.Errorf("Failed to marshal expectedAgent. Details: %v", err) 100 | } 101 | 102 | handler := newHandler() 103 | router := httprouter.New() 104 | router.POST("/api/v1/agents/:name", handler.UpdateAgents) 105 | ts := httptest.NewServer(router) 106 | defer ts.Close() 107 | 108 | body := bytes.NewReader(marshalled) 109 | _, err = http.Post( 110 | ts.URL+"/api/v1/agents/"+expectedAgent.PodName, 111 | "application/json", 112 | body, 113 | ) 114 | if err != nil { 115 | t.Errorf("Failed to post example agent to server. Details: %v", err) 116 | } 117 | 118 | checkCacheKey(handler, "test", true, t) 119 | 120 | aData := handler.Agents.AgentCache()["test"] 121 | 122 | expected := marshalExpectedWithActualDate(expectedAgent, aData, t) 123 | 124 | actual, err := json.Marshal(aData) 125 | if err != nil { 126 | t.Errorf("Failed to marshal agent from the cache. Details: %v", err) 127 | } 128 | 129 | if !bytes.Equal(expected, actual) { 130 | t.Errorf( 131 | "Actual data from AgentCache %v is not as expected %v", 132 | handler.Agents.AgentCache()["test"], 133 | expectedAgent) 134 | } 135 | } 136 | 137 | func TestUpdateAgentsFailedUnmarshal(t *testing.T) { 138 | handler := newHandler() 139 | router := httprouter.New() 140 | router.POST("/api/v1/agents/:name", handler.UpdateAgents) 141 | ts := httptest.NewServer(router) 142 | defer ts.Close() 143 | 144 | resp, err := http.Post( 145 | ts.URL+"/api/v1/agents/test", "text", strings.NewReader("some text")) 146 | if err != nil { 147 | t.Errorf("Failed to perform POST request on UpdateAgents. Details: %v", err) 148 | } 149 | 150 | checkRespStatus(http.StatusInternalServerError, resp.StatusCode, t) 151 | 152 | bData := readBodyBytesOrFail(resp, t) 153 | s := string(bData) 154 | expected := "Error while unmarshaling data." 155 | if !strings.Contains(s, expected) { 156 | t.Errorf("Response data should contains following message '%v'. Instead it is '%v'", 157 | expected, s) 158 | } 159 | 160 | checkCacheKey(handler, "test", false, t) 161 | } 162 | 163 | type Body struct { 164 | Message string 165 | } 166 | 167 | func (b *Body) Read(p []byte) (n int, err error) { 168 | return 0, errors.New(b.Message) 169 | } 170 | 171 | func TestUpdateAgentsFailReadBody(t *testing.T) { 172 | body := &Body{Message: "test error message"} 173 | r := httptest.NewRequest( 174 | "POST", "/api/v1/agents/test", body) 175 | r.ContentLength = 0 176 | rw := httptest.NewRecorder() 177 | 178 | handler := newHandler() 179 | handler.UpdateAgents(rw, r, httprouter.Params{httprouter.Param{Key: "name", Value: "test"}}) 180 | 181 | checkRespStatus(http.StatusInternalServerError, rw.Code, t) 182 | 183 | s := string(rw.Body.Bytes()) 184 | expected := "Error while reading bytes from the request's body." 185 | if !strings.Contains(s, expected) { 186 | t.Errorf("Response data should contains following message '%v'. Instead it is '%v'", 187 | expected, s) 188 | } 189 | checkCacheKey(handler, "test", false, t) 190 | } 191 | 192 | func TestGetAgents(t *testing.T) { 193 | t.Skip("Skip get agents") 194 | handler := newHandler() 195 | age := agentExample() 196 | handler.Agents.AgentCacheUpdate("test", &age) 197 | expected, err := json.Marshal(handler.Agents.AgentCache()) 198 | if err != nil { 199 | t.Errorf("Failed to marshal AgentCache (making expected byte array). Details: %v", err) 200 | } 201 | 202 | router := httprouter.New() 203 | router.GET("/api/v1/agents/", handler.CleanCache(handler.Agents.GetAgents)) 204 | ts := httptest.NewServer(router) 205 | defer ts.Close() 206 | 207 | resp, err := http.Get(ts.URL + "/api/v1/agents/") 208 | if err != nil { 209 | t.Errorf("Failed to GET agents' data from server. Details: %v", err) 210 | } 211 | 212 | actual := readBodyBytesOrFail(resp, t) 213 | if !bytes.Equal(expected, actual) { 214 | t.Error("Response body for GET agents is not as expected") 215 | } 216 | } 217 | 218 | func TestGetSingleAgent(t *testing.T) { 219 | t.Skip("Skip get single agent") 220 | handler := newHandler() 221 | age := agentExample() 222 | handler.Agents.AgentCacheUpdate("test", &age) 223 | 224 | router := httprouter.New() 225 | router.GET("/api/v1/agents/:name", handler.Agents.GetSingleAgent) 226 | ts := httptest.NewServer(router) 227 | defer ts.Close() 228 | 229 | resp, err := http.Get(ts.URL + "/api/v1/agents/test") 230 | if err != nil { 231 | t.Errorf("Failed to GET agents' data from server. Details: %v", err) 232 | } 233 | 234 | actual := readBodyBytesOrFail(resp, t) 235 | 236 | bExpected, err := json.Marshal(handler.Agents.AgentCache()["test"]) 237 | if err != nil { 238 | t.Errorf("Failed to marshal expected data with last_updated field. Details: %v", err) 239 | } 240 | 241 | if !bytes.Equal(bExpected, actual) { 242 | t.Error("Response body for GET agents is not as expected") 243 | } 244 | } 245 | 246 | func TestGetSingleAgentCleanCache(t *testing.T) { 247 | t.Skip("Skip get single agent cache") 248 | handler := newHandler() 249 | age := agentExample() 250 | handler.Agents.AgentCacheUpdate("test", &age) 251 | handler.Agents.AgentCacheUpdate("test-pod", &age) 252 | 253 | handler.Agents.SetKubeClient(&KubeProxy{Client: CSwithPods()}) 254 | 255 | router := httprouter.New() 256 | router.GET("/api/v1/agents/:name", handler.CleanCache(handler.Agents.GetSingleAgent)) 257 | ts := httptest.NewServer(router) 258 | defer ts.Close() 259 | 260 | resp, err := http.Get(ts.URL + "/api/v1/agents/test") 261 | if err != nil { 262 | t.Errorf("Failed to GET agents' data from server. Details: %v", err) 263 | } 264 | 265 | readBodyBytesOrFail(resp, t) 266 | 267 | if _, exists := handler.Agents.AgentCache()["test"]; exists { 268 | t.Errorf("Key %v should not be present in the cache", "test") 269 | } 270 | } 271 | 272 | func CSwithPods() kubernetes.Interface { 273 | return fake.NewSimpleClientset( 274 | &v1.Pod{ 275 | ObjectMeta: meta_v1.ObjectMeta{ 276 | Name: "agent-pod", 277 | Labels: map[string]string{"app": AgentLabelValues[0]}, 278 | Namespace: v1.NamespaceDefault, 279 | }, 280 | }, 281 | &v1.Pod{ 282 | ObjectMeta: meta_v1.ObjectMeta{ 283 | Name: "agent-pod-hostnet", 284 | Labels: map[string]string{"app": AgentLabelValues[0]}, 285 | Namespace: v1.NamespaceDefault, 286 | }, 287 | }, 288 | &v1.Pod{ 289 | ObjectMeta: meta_v1.ObjectMeta{ 290 | Name: "agent-pod-test", 291 | Labels: map[string]string{"app": "test"}, 292 | Namespace: v1.NamespaceDefault, 293 | }, 294 | }, 295 | ) 296 | } 297 | 298 | func createCnntyCheckTestServer(handler *Handler) *httptest.Server { 299 | router := httprouter.New() 300 | router.GET("/api/v1/connectivity_check", handler.CleanCache(handler.ConnectivityCheck)) 301 | router.Handler("GET", "/metrics", promhttp.Handler()) 302 | return httptest.NewServer(router) 303 | } 304 | 305 | func cnntyRespOrFail(serverURL string, expectedStatus int, t *testing.T) *http.Response { 306 | res, err := http.Get(serverURL + "/api/v1/connectivity_check") 307 | if err != nil { 308 | t.Errorf("Failed to GET successful connectivity check from server. Details: %v", err) 309 | } 310 | checkRespStatus(expectedStatus, res.StatusCode, t) 311 | return res 312 | } 313 | 314 | func metricsRespOrFail(serverURL string, expectedStatus int, t *testing.T) *http.Response { 315 | res, err := http.Get(serverURL + "/metrics") 316 | if err != nil { 317 | t.Errorf("Failed to GET metrics from server. Details: %v", err) 318 | } 319 | checkRespStatus(expectedStatus, res.StatusCode, t) 320 | return res 321 | } 322 | 323 | func decodeCnntyRespOrFail(resp *http.Response, t *testing.T) *CheckConnectivityInfo { 324 | info := &CheckConnectivityInfo{} 325 | decoder := json.NewDecoder(resp.Body) 326 | err := decoder.Decode(info) 327 | if err != nil { 328 | t.Errorf( 329 | "Failed to decode connectivity check successful response body. Details: %v", 330 | err) 331 | } 332 | return info 333 | } 334 | 335 | func TestConnectivityCheckSuccess(t *testing.T) { 336 | t.Skip("Skip get single agent cache") 337 | handler := newHandler() 338 | handler.Agents.SetKubeClient(&KubeProxy{Client: CSwithPods()}) 339 | 340 | agent := agentExample() 341 | agent.LastUpdated = agent.HostDate 342 | 343 | agent.PodName = "agent-pod" 344 | handler.Agents.AgentCacheUpdate(agent.PodName, &agent) 345 | 346 | agent.PodName = "agent-pod-hostnet" 347 | handler.Agents.AgentCacheUpdate(agent.PodName, &agent) 348 | 349 | ts := createCnntyCheckTestServer(handler) 350 | defer ts.Close() 351 | 352 | actual := decodeCnntyRespOrFail(cnntyRespOrFail(ts.URL, http.StatusOK, t), t) 353 | successfulMsg := fmt.Sprintf( 354 | "All %v pods successfully reported back to the server", len(handler.Agents.AgentCache())) 355 | if actual.Message != successfulMsg { 356 | t.Errorf( 357 | "Unexpected message from successful result payload. Actual: %v", 358 | actual.Message) 359 | } 360 | } 361 | 362 | func TestMetricsGetSuccess(t *testing.T) { 363 | t.Skip("Skip get single agent cache") 364 | handler := newHandler() 365 | handler.Agents.SetKubeClient(&KubeProxy{Client: CSwithPods()}) 366 | 367 | agent := agentExample() 368 | agent.PodName = "agent-pod" 369 | handler.Agents.AgentCacheUpdate(agent.PodName, &agent) 370 | 371 | ts := createCnntyCheckTestServer(handler) 372 | defer ts.Close() 373 | 374 | metricsRespOrFail(ts.URL, http.StatusOK, t) 375 | } 376 | 377 | func TestConnectivityCheckFail(t *testing.T) { 378 | t.Skip("Skip get single agent cache") 379 | handler := newHandler() 380 | handler.Agents.SetKubeClient(&KubeProxy{Client: CSwithPods()}) 381 | 382 | agent := agentExample() 383 | 384 | agent.PodName = "agent-pod-hostnet" 385 | //back to the past 386 | agent.LastUpdated = agent.HostDate.Add( 387 | -time.Second * time.Duration(agent.ReportInterval*2+1)) 388 | 389 | handler.Agents.AgentCacheUpdate(agent.PodName, &agent) 390 | 391 | ts := createCnntyCheckTestServer(handler) 392 | defer ts.Close() 393 | 394 | actual := decodeCnntyRespOrFail(cnntyRespOrFail(ts.URL, http.StatusBadRequest, t), t) 395 | failMsg := fmt.Sprintf( 396 | "Connectivity check fails. Reason: %v", 397 | "there are absent or outdated pods; look up the payload") 398 | 399 | if actual.Message != failMsg { 400 | t.Errorf( 401 | "Unexpected message from bad request result payload. Actual: %v", 402 | actual.Message) 403 | } 404 | if actual.Outdated[0] != "agent-pod-hostnet" { 405 | t.Errorf("agent-pod-hostnet must be returned in the payload in the 'outdated' array") 406 | } 407 | if actual.Absent[0] != "agent-pod" { 408 | t.Errorf("agent-pod must be returned in the payload in the 'absent' array") 409 | } 410 | } 411 | 412 | type FakeProxy struct { 413 | } 414 | 415 | func (fp *FakeProxy) Pods() (*v1.PodList, error) { 416 | return nil, errors.New("test error") 417 | } 418 | 419 | func TestConnectivityCheckFailDueError(t *testing.T) { 420 | handler := newHandler() 421 | handler.Agents.SetKubeClient(&FakeProxy{}) 422 | 423 | ts := createCnntyCheckTestServer(handler) 424 | defer ts.Close() 425 | 426 | resp := cnntyRespOrFail(ts.URL, http.StatusInternalServerError, t) 427 | bData := readBodyBytesOrFail(resp, t) 428 | actual := string(bData) 429 | 430 | failMsg := fmt.Sprintf( 431 | "Failed to get pods from k8s cluster. Details: test error\n") 432 | 433 | if !strings.Contains(actual, failMsg) { 434 | t.Errorf( 435 | "Unexpected message from bad request result payload. Actual: %v", 436 | actual) 437 | } 438 | } 439 | -------------------------------------------------------------------------------- /pkg/utils/k8s.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "github.com/golang/glog" 19 | 20 | meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21 | "k8s.io/apimachinery/pkg/labels" 22 | "k8s.io/apimachinery/pkg/selection" 23 | "k8s.io/client-go/kubernetes" 24 | "k8s.io/client-go/pkg/api/v1" 25 | "k8s.io/client-go/rest" 26 | ) 27 | 28 | const AgentLabelKey = "app" 29 | 30 | var AgentLabelValues = []string{"netchecker-agent", "netchecker-agent-hostnet"} 31 | 32 | type Proxy interface { 33 | Pods() (*v1.PodList, error) 34 | } 35 | 36 | type KubeProxy struct { 37 | Client kubernetes.Interface 38 | } 39 | 40 | // SetupClientSet is a function for initialize kubernetes clientset 41 | func (kp *KubeProxy) SetupClientSet(config *rest.Config) (*kubernetes.Clientset, error) { 42 | clientSet, err := kubernetes.NewForConfig(config) 43 | 44 | if err != nil { 45 | return nil, err 46 | } 47 | 48 | kp.Client = clientSet 49 | 50 | return clientSet, nil 51 | } 52 | 53 | func (kp *KubeProxy) buildConfig() (*rest.Config, error) { 54 | return rest.InClusterConfig() 55 | } 56 | 57 | func (kp *KubeProxy) Pods() (*v1.PodList, error) { 58 | requirement, err := labels.NewRequirement(AgentLabelKey, selection.In, AgentLabelValues) 59 | if err != nil { 60 | return nil, err 61 | } 62 | glog.V(10).Infof("Selector for kubernetes pods: %v", requirement.String()) 63 | 64 | pods, err := kp.Client.Core().Pods("").List(meta_v1.ListOptions{LabelSelector: requirement.String()}) 65 | return pods, err 66 | } 67 | -------------------------------------------------------------------------------- /pkg/utils/metrics.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "fmt" 19 | "reflect" 20 | "strings" 21 | 22 | "github.com/golang/glog" 23 | "github.com/prometheus/client_golang/prometheus" 24 | 25 | ext_v1 "github.com/Mirantis/k8s-netchecker-server/pkg/extensions/apis/v1" 26 | ) 27 | 28 | // NewAgentMetrics setup prometheus metrics 29 | func NewAgentMetrics(ai *ext_v1.AgentSpec) AgentMetrics { 30 | am := AgentMetrics{ 31 | PodName: ai.PodName, 32 | } 33 | 34 | suffix := "private_network" 35 | if strings.Contains(ai.PodName, "hostnet") { 36 | suffix = "host_network" 37 | } 38 | name := ai.NodeName 39 | 40 | // Basic Counter metrics 41 | am.ErrorCount = prometheus.NewCounter(prometheus.CounterOpts{ 42 | Namespace: "ncagent", 43 | Name: "error_count_total", 44 | ConstLabels: prometheus.Labels{"agent": fmt.Sprintf("%s-%s", name, suffix)}, 45 | Help: "Total number of errors (keepalive miss count) for the agent.", 46 | }) 47 | am.ReportCount = prometheus.NewCounter(prometheus.CounterOpts{ 48 | Namespace: "ncagent", 49 | Name: "report_count_total", 50 | ConstLabels: prometheus.Labels{"agent": fmt.Sprintf("%s-%s", name, suffix)}, 51 | Help: "Total number of reports (keepalive messages) from the agent.", 52 | }) 53 | 54 | // GaugeVec metrics for HTTP probes 55 | am.ProbeConnectionResult = prometheus.NewGaugeVec( 56 | prometheus.GaugeOpts{ 57 | Namespace: "ncagent", 58 | Name: "http_probe_connection_result", 59 | Help: "Connection result: 0 - error, 1 - success", 60 | }, 61 | []string{"agent", "url"}, 62 | ) 63 | am.ProbeHTTPCode = prometheus.NewGaugeVec( 64 | prometheus.GaugeOpts{ 65 | Namespace: "ncagent", 66 | Name: "http_probe_code", 67 | Help: "HTTP status code.", 68 | }, 69 | []string{"agent", "url"}, 70 | ) 71 | am.ProbeTotal = prometheus.NewGaugeVec( 72 | prometheus.GaugeOpts{ 73 | Namespace: "ncagent", 74 | Name: "http_probe_total_time_ms", 75 | Help: "The total duration of http request.", 76 | }, 77 | []string{"agent", "url"}, 78 | ) 79 | am.ProbeContentTransfer = prometheus.NewGaugeVec( 80 | prometheus.GaugeOpts{ 81 | Namespace: "ncagent", 82 | Name: "http_probe_content_transfer_time_ms", 83 | Help: fmt.Sprint( 84 | "The duration of content transfer, from the first ", 85 | "response byte till the end (in ms).", 86 | ), 87 | }, 88 | []string{"agent", "url"}, 89 | ) 90 | am.ProbeTCPConnection = prometheus.NewGaugeVec( 91 | prometheus.GaugeOpts{ 92 | Namespace: "ncagent", 93 | Name: "http_probe_tcp_connection_time_ms", 94 | Help: "TCP establishing time in ms.", 95 | }, 96 | []string{"agent", "url"}, 97 | ) 98 | am.ProbeDNSLookup = prometheus.NewGaugeVec( 99 | prometheus.GaugeOpts{ 100 | Namespace: "ncagent", 101 | Name: "http_probe_dns_lookup_time_ms", 102 | Help: "DNS lookup time in ms.", 103 | }, 104 | []string{"agent", "url"}, 105 | ) 106 | am.ProbeConnect = prometheus.NewGaugeVec( 107 | prometheus.GaugeOpts{ 108 | Namespace: "ncagent", 109 | Name: "http_probe_connect_time_ms", 110 | Help: "Connection time in ms", 111 | }, 112 | []string{"agent", "url"}, 113 | ) 114 | am.ProbeServerProcessing = prometheus.NewGaugeVec( 115 | prometheus.GaugeOpts{ 116 | Namespace: "ncagent", 117 | Name: "http_probe_server_processing_time_ms", 118 | Help: "Server processing time in ms.", 119 | }, 120 | []string{"agent", "url"}, 121 | ) 122 | 123 | // Let's register all the metrics now 124 | params := reflect.ValueOf(&am).Elem() 125 | for i := 0; i < params.NumField(); i++ { 126 | if e, ok := params.Field(i).Interface().(*prometheus.GaugeVec); ok { 127 | if exists, ok := tryRegisterGaugeVec(e); !ok { 128 | params.Field(i).Set(reflect.ValueOf(exists)) 129 | } 130 | } else if e, ok := params.Field(i).Interface().(prometheus.Counter); ok { 131 | if exists, ok := tryRegisterCounter(e); !ok { 132 | params.Field(i).Set(reflect.ValueOf(exists)) 133 | } 134 | } else { 135 | glog.V(10).Infof("Skipping %v since it's not prometheus metric.", params.Type().Field(i).Name) 136 | } 137 | } 138 | 139 | return am 140 | } 141 | 142 | // returns true if registering went fine, false if counter was registered already, 143 | // panics on other register errors 144 | func tryRegisterCounter(m prometheus.Counter) (prometheus.Counter, bool) { 145 | if err := prometheus.Register(m); err != nil { 146 | if are, ok := err.(prometheus.AlreadyRegisteredError); ok { 147 | // A counter for that metric has been registered before. 148 | existing := are.ExistingCollector.(prometheus.Counter) 149 | glog.V(10).Infof("Counter %v has been registered already.", existing.Desc()) 150 | return existing, false 151 | } 152 | // Something else went wrong! 153 | panic(err) 154 | } 155 | return m, true 156 | } 157 | 158 | // returns true if registering went fine, false if GaugeVec was registered already, 159 | // panics on other register errors 160 | func tryRegisterGaugeVec(m *prometheus.GaugeVec) (*prometheus.GaugeVec, bool) { 161 | if err := prometheus.Register(m); err != nil { 162 | if are, ok := err.(prometheus.AlreadyRegisteredError); ok { 163 | // A gauge for that metric has been registered before. 164 | existing := are.ExistingCollector.(*prometheus.GaugeVec) 165 | return existing, false 166 | } 167 | // Something else went wrong! 168 | panic(err) 169 | } 170 | return m, true 171 | } 172 | 173 | // UpdateAgentBaseMetrics function updates basic metrics with reports and 174 | // error counters 175 | func UpdateAgentBaseMetrics(am NcAgentMetrics, name string, report, error bool) { 176 | agent := am[name] 177 | if report { 178 | agent.ReportCount.Inc() 179 | agent.ErrorsFromLastReport = 0 180 | } 181 | if error { 182 | agent.ErrorCount.Inc() 183 | agent.ErrorsFromLastReport += 1 184 | } 185 | am[name] = agent 186 | } 187 | 188 | // UpdateAgentProbeMetrics function updates HTTP probe metrics. 189 | func UpdateAgentProbeMetrics(ai ext_v1.AgentSpec, am AgentMetrics) { 190 | 191 | suffix := "private_network" 192 | if strings.Contains(ai.PodName, "hostnet") { 193 | suffix = "host_network" 194 | } 195 | name := fmt.Sprintf("%s-%s", ai.NodeName, suffix) 196 | 197 | for _, pr := range ai.NetworkProbes { 198 | am.ProbeConnectionResult.WithLabelValues(name, pr.URL).Set(float64(pr.ConnectionResult)) 199 | am.ProbeHTTPCode.WithLabelValues(name, pr.URL).Set(float64(pr.HTTPCode)) 200 | am.ProbeTotal.WithLabelValues(name, pr.URL).Set(float64(pr.Total)) 201 | am.ProbeContentTransfer.WithLabelValues(name, pr.URL).Set(float64(pr.ContentTransfer)) 202 | am.ProbeTCPConnection.WithLabelValues(name, pr.URL).Set(float64(pr.TCPConnection)) 203 | am.ProbeDNSLookup.WithLabelValues(name, pr.URL).Set(float64(pr.DNSLookup)) 204 | am.ProbeConnect.WithLabelValues(name, pr.URL).Set(float64(pr.Connect)) 205 | am.ProbeServerProcessing.WithLabelValues(name, pr.URL).Set(float64(pr.ServerProcessing)) 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /pkg/utils/storer_etcd.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "context" 19 | "crypto/tls" 20 | "encoding/json" 21 | "fmt" 22 | "net/http" 23 | "strings" 24 | "sync" 25 | "time" 26 | 27 | ext_v1 "github.com/Mirantis/k8s-netchecker-server/pkg/extensions/apis/v1" 28 | 29 | etcd "github.com/coreos/etcd/client" 30 | "github.com/golang/glog" 31 | "github.com/julienschmidt/httprouter" 32 | ) 33 | 34 | type EtcdConfig struct { 35 | client etcd.Client 36 | kAPI etcd.KeysAPI 37 | } 38 | 39 | type K8sConnection struct { 40 | KubeClient Proxy 41 | } 42 | 43 | type EtcdAgentStorage struct { 44 | sync.Mutex // ensures atomic writes; protects the following fields 45 | config *AppConfig 46 | etcd EtcdConfig 47 | k8s K8sConnection 48 | NcAgentCache NcAgentCache 49 | } 50 | 51 | func NewEtcdStorer() (*EtcdAgentStorage, error) { 52 | var err error 53 | 54 | cfg := GetOrCreateConfig() 55 | glog.Infof("Endpoints '%s' will be used to connect to etcd.", cfg.EtcdEndpoints) 56 | 57 | rv := &EtcdAgentStorage{ 58 | NcAgentCache: NcAgentCache{}, 59 | config: cfg, 60 | } 61 | 62 | // setup http/https transport compatible with self-signed certs 63 | tlsConfig := &tls.Config{ 64 | InsecureSkipVerify: true, 65 | } 66 | if (cfg.EtcdKeyFile != "") && (cfg.EtcdCertFile != "") { 67 | cert, err := tls.LoadX509KeyPair(cfg.EtcdCertFile, cfg.EtcdKeyFile) 68 | if err != nil { 69 | glog.Fatalf("Error loading X509 key pair: %s", err) 70 | } 71 | tlsConfig = &tls.Config{ 72 | Certificates: []tls.Certificate{cert}, 73 | InsecureSkipVerify: true, 74 | } 75 | } 76 | 77 | httpsTransport := &http.Transport{ 78 | TLSHandshakeTimeout: 10 * time.Second, 79 | TLSClientConfig: tlsConfig, 80 | } 81 | 82 | etcdConfig := etcd.Config{ 83 | Endpoints: strings.Split(cfg.EtcdEndpoints, ","), 84 | Transport: httpsTransport, 85 | } 86 | 87 | // Configure ETCD client 88 | if rv.etcd.client, err = etcd.New(etcdConfig); err != nil { 89 | return nil, err 90 | } 91 | rv.etcd.kAPI = etcd.NewKeysAPI(rv.etcd.client) 92 | 93 | // Check etcd is accessible 94 | if err = rv.PingETCD(); err != nil { 95 | return nil, err 96 | } 97 | 98 | // Configure connection to k8s API 99 | rv.k8s.KubeClient, _, err = connect2k8s(false) 100 | 101 | return rv, err 102 | } 103 | 104 | func (s *EtcdAgentStorage) PingETCD() error { 105 | var rv error 106 | ctx, cancel := context.WithTimeout(context.Background(), s.config.PingTimeout) 107 | defer cancel() 108 | // set a new key ignoring its previous state 109 | _, err := s.etcd.kAPI.Set(ctx, fmt.Sprintf("%s/ping", s.config.EtcdTree), "pong", nil) 110 | if err != nil { 111 | if err == context.DeadlineExceeded { 112 | rv = fmt.Errorf("Etcd ping timeout (no answer for %d seconds)", s.config.PingTimeout) 113 | } else { 114 | rv = fmt.Errorf("Etcd ping failed: %v", err.Error()) 115 | } 116 | } 117 | return rv 118 | } 119 | 120 | func (s *EtcdAgentStorage) agentTreeRoot(agentName string) string { 121 | return fmt.Sprintf("%s/agents/%s", s.config.EtcdTree, agentName) 122 | } 123 | 124 | func (s *EtcdAgentStorage) agentReportNodeName(agentData *ext_v1.AgentSpec) string { 125 | return fmt.Sprintf("%s/%d", s.agentTreeRoot(agentData.PodName), agentData.Uptime) 126 | } 127 | 128 | func (s *EtcdAgentStorage) createOrUpdateAgentTree(ctx context.Context, dirName string, update bool) { 129 | var ( 130 | prevExists etcd.PrevExistType 131 | oper string 132 | refresh bool 133 | ) 134 | if update { 135 | prevExists = etcd.PrevExist 136 | oper = "TTL update" 137 | refresh = true 138 | } else { 139 | prevExists = etcd.PrevNoExist 140 | refresh = false 141 | oper = "create" 142 | } 143 | _, err := s.etcd.kAPI.Set(ctx, dirName, "", &etcd.SetOptions{ 144 | Dir: true, 145 | PrevExist: prevExists, 146 | Refresh: refresh, 147 | TTL: s.config.ReportTTL, 148 | }) 149 | if err != nil { 150 | glog.Errorf("Directory '%s' update failed: %v", dirName, err) 151 | } else { 152 | glog.Infof("Directory '%s' %sd successfully", dirName, oper) 153 | } 154 | } 155 | 156 | func (s *EtcdAgentStorage) checkOrCreateAgentTree(ctx context.Context, dirName string) { 157 | resp, err := s.etcd.kAPI.Get(ctx, dirName, &etcd.GetOptions{Quorum: true}) 158 | if err == nil && !resp.Node.Dir { 159 | glog.Errorf("Key '%s' exists, but it's not a directory! Key will be re-created", dirName) 160 | if _, err := s.etcd.kAPI.Delete(ctx, dirName, &etcd.DeleteOptions{Dir: false}); err != nil { 161 | glog.Errorf("Can't remove etcd node: %v", err) 162 | } else { 163 | s.createOrUpdateAgentTree(ctx, dirName, false) 164 | } 165 | } else if err == nil && resp.Node.Dir { 166 | // all OK, update TTL 167 | s.createOrUpdateAgentTree(ctx, dirName, true) 168 | } else if err != nil && etcd.IsKeyNotFound(err) { 169 | // key not found, create directory 170 | s.createOrUpdateAgentTree(ctx, dirName, false) 171 | } else { 172 | glog.Fatalf("Unhandled error with etcd data structure '%s' failed: %v", dirName, err) 173 | } 174 | } 175 | 176 | func (s *EtcdAgentStorage) UpdateAgents(rw http.ResponseWriter, r *http.Request, rp httprouter.Params) (ext_v1.AgentSpec, error) { 177 | var err error 178 | agentData := ext_v1.AgentSpec{} 179 | 180 | if err = ProcessRequest(r, &agentData, rw); err != nil { 181 | return ext_v1.AgentSpec{}, err 182 | } 183 | 184 | agentData.LastUpdated = time.Now() 185 | glog.V(10).Infof("Updating the agents resource with value: %v", agentData) 186 | 187 | dirName := s.agentTreeRoot(agentData.PodName) 188 | nodeName := s.agentReportNodeName(&agentData) 189 | 190 | ctx := context.Background() // TODO: handle timeout for bunch of operations 191 | 192 | // create/update agent's derectory 193 | s.checkOrCreateAgentTree(ctx, dirName) 194 | 195 | // create report node for agent 196 | hhj, _ := json.Marshal(agentData) 197 | _, err = s.etcd.kAPI.Set(ctx, nodeName, string(hhj), &etcd.SetOptions{ 198 | Dir: false, 199 | PrevExist: etcd.PrevNoExist, 200 | TTL: s.config.ReportTTL, 201 | }) 202 | if err != nil { 203 | glog.Errorf("Creating REC '%s' failed: %v", nodeName, err) 204 | } else { 205 | glog.Infof("Record '%s' created successfully", nodeName) 206 | } 207 | 208 | return agentData, nil 209 | } 210 | 211 | func (s *EtcdAgentStorage) getAgents() NcAgentCache { 212 | var ( 213 | dirName string 214 | agentsData NcAgentCache 215 | max_AgentSpec ext_v1.AgentSpec 216 | last_AgentSpec ext_v1.AgentSpec 217 | err error 218 | ) 219 | 220 | agentsData = NcAgentCache{} 221 | dirName = fmt.Sprintf("%s/agents", s.config.EtcdTree) 222 | glog.V(5).Infof("Get agents data from etcd tree '%s'", dirName) 223 | 224 | ctx := context.Background() 225 | resp, err := s.etcd.kAPI.Get(ctx, dirName, &etcd.GetOptions{Quorum: true, Recursive: true}) 226 | if err != nil { 227 | glog.Errorf("Can't fetch tree '%s' recursively from etcd: %v", dirName, err) 228 | return agentsData 229 | } 230 | 231 | // Iterate to nodes 232 | for _, node := range resp.Node.Nodes { 233 | npath := strings.Split(node.Key, "/") 234 | nname := npath[len(npath)-1] 235 | max_AgentSpec.Uptime = 0 236 | // Iterate to uptime records 237 | for _, n := range node.Nodes { 238 | if err = json.Unmarshal([]byte(n.Value), &last_AgentSpec); err != nil { 239 | glog.Error(err) 240 | continue 241 | } 242 | if last_AgentSpec.Uptime > max_AgentSpec.Uptime { 243 | max_AgentSpec = last_AgentSpec 244 | } 245 | } 246 | if max_AgentSpec.Uptime > 0 { 247 | agentsData[nname] = max_AgentSpec 248 | glog.V(10).Infof("%s: %#v", nname, last_AgentSpec) 249 | } 250 | } 251 | return agentsData 252 | } 253 | 254 | func (s *EtcdAgentStorage) GetAgents(rw http.ResponseWriter, r *http.Request, _ httprouter.Params) { 255 | ProcessResponse(rw, s.getAgents()) 256 | } 257 | 258 | func (s *EtcdAgentStorage) getSingleAgent(name string) *ext_v1.AgentSpec { 259 | agentsData := s.getAgents() 260 | agentData := agentsData[name] 261 | return &agentData 262 | 263 | } 264 | 265 | func (s *EtcdAgentStorage) GetSingleAgent(rw http.ResponseWriter, r *http.Request, rp httprouter.Params) { 266 | agentName := rp.ByName("name") 267 | agentData := s.getSingleAgent(agentName) 268 | 269 | ProcessResponse(rw, agentData) 270 | } 271 | 272 | func (s *EtcdAgentStorage) CheckAgents() ([]string, []string, error) { 273 | 274 | absent := []string{} 275 | agents := s.getAgents() 276 | 277 | pods, err := s.k8s.KubeClient.Pods() 278 | if err != nil { 279 | return nil, nil, err 280 | } 281 | 282 | for _, pod := range pods.Items { 283 | agentName := pod.ObjectMeta.Name 284 | if _, ok := agents[agentName]; !ok { 285 | absent = append(absent, agentName) 286 | } 287 | } 288 | 289 | return absent, nil, nil 290 | } 291 | 292 | func (s *EtcdAgentStorage) AgentCache() NcAgentCache { 293 | rv := s.getAgents() 294 | return rv 295 | } 296 | 297 | func (h *EtcdAgentStorage) AgentCacheUpdate(key string, ag *ext_v1.AgentSpec) { 298 | //todo: Whether should I implement this, or not??? 299 | } 300 | 301 | func (h *EtcdAgentStorage) SetKubeClient(cl Proxy) { 302 | // Required for tests 303 | h.k8s.KubeClient = cl 304 | } 305 | 306 | func (h *EtcdAgentStorage) CleanCacheOnDemand(rw http.ResponseWriter) { 307 | // Do nothing, because no cache. 308 | // All data auto-purged by ETCD TTL feature 309 | } 310 | -------------------------------------------------------------------------------- /pkg/utils/storer_k8s.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "fmt" 19 | "net/http" 20 | "time" 21 | 22 | "github.com/golang/glog" 23 | "github.com/julienschmidt/httprouter" 24 | 25 | ext_v1 "github.com/Mirantis/k8s-netchecker-server/pkg/extensions/apis/v1" 26 | ext_client "github.com/Mirantis/k8s-netchecker-server/pkg/extensions/client" 27 | api_errors "k8s.io/apimachinery/pkg/api/errors" 28 | meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 | apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" 30 | "k8s.io/client-go/kubernetes" 31 | ) 32 | 33 | type k8sAgentStorage struct { 34 | NcAgentCache NcAgentCache 35 | KubeClient Proxy 36 | ExtensionsClientset ext_client.Clientset 37 | } 38 | 39 | func connect2k8s(createCRD bool) (Proxy, ext_client.Clientset, error) { 40 | var err error 41 | var clientset *kubernetes.Clientset 42 | 43 | proxy := &KubeProxy{} 44 | 45 | config, err := proxy.buildConfig() 46 | if err != nil { 47 | glog.Error(err) 48 | return nil, nil, err 49 | } 50 | 51 | clientset, err = proxy.SetupClientSet(config) 52 | if err != nil { 53 | glog.Error(err) 54 | return nil, nil, err 55 | } 56 | apiextensionsclientset, err := apiextensionsclient.NewForConfig(config) 57 | if err != nil { 58 | glog.Error(err) 59 | return nil, nil, err 60 | } 61 | 62 | if !createCRD { 63 | return proxy, nil, err 64 | } 65 | 66 | err = ext_client.CreateAgentCustomResourceDefinition(apiextensionsclientset) 67 | if err != nil && !api_errors.IsAlreadyExists(err) { 68 | glog.Error(err) 69 | return nil, nil, err 70 | } 71 | 72 | ext, err := ext_client.WrapClientsetWithExtensions(clientset, config) 73 | if err != nil { 74 | glog.Error(err) 75 | return nil, nil, err 76 | } 77 | 78 | return proxy, ext, err 79 | } 80 | 81 | func NewK8sStorer() (*k8sAgentStorage, error) { 82 | var err error 83 | 84 | rv := &k8sAgentStorage{ 85 | NcAgentCache: map[string]ext_v1.AgentSpec{}, 86 | } 87 | 88 | rv.KubeClient, rv.ExtensionsClientset, err = connect2k8s(true) 89 | 90 | return rv, err 91 | } 92 | 93 | func (h *k8sAgentStorage) UpdateAgents(rw http.ResponseWriter, r *http.Request, rp httprouter.Params) (ext_v1.AgentSpec, error) { 94 | var err error 95 | agentData := ext_v1.AgentSpec{} 96 | 97 | if err = ProcessRequest(r, &agentData, rw); err != nil { 98 | return ext_v1.AgentSpec{}, err 99 | } 100 | 101 | agentData.LastUpdated = time.Now() 102 | glog.V(10).Infof("Updating the agents resource with value: %v", agentData) 103 | 104 | agentName := rp.ByName("name") 105 | 106 | // Try to get current agent 107 | curAgent, err := h.ExtensionsClientset.Agents().Get(agentName) 108 | 109 | if err != nil { 110 | glog.Error(err) 111 | } 112 | 113 | agent := &ext_v1.Agent{ 114 | ObjectMeta: meta_v1.ObjectMeta{ 115 | Name: agentName, 116 | }, 117 | Spec: agentData, 118 | } 119 | 120 | // If agent does not exist, let's create it. 121 | // Otherwise we need to update it using proper ResourceVersion 122 | if api_errors.IsNotFound(err) { 123 | h.CleanCacheOnDemand(nil) 124 | agent, err = h.ExtensionsClientset.Agents().Create(agent) 125 | glog.Infoln("Created agent", agentName, err) 126 | } else { 127 | agent.ObjectMeta.ResourceVersion = curAgent.ObjectMeta.ResourceVersion 128 | agent, err = h.ExtensionsClientset.Agents().Update(agent) 129 | glog.Infoln("Updated agent", agentName, err) 130 | } 131 | 132 | if err != nil { 133 | glog.Error(err) 134 | } 135 | 136 | h.NcAgentCache[agentName] = agent.Spec 137 | 138 | return agentData, nil 139 | } 140 | 141 | func (h *k8sAgentStorage) GetAgents(rw http.ResponseWriter, r *http.Request, _ httprouter.Params) { 142 | agentsData := map[string]ext_v1.AgentSpec{} 143 | agents, err := h.ExtensionsClientset.Agents().List() 144 | 145 | if err != nil { 146 | glog.Error(err) 147 | } 148 | 149 | for _, agent := range agents.Items { 150 | agentsData[agent.ObjectMeta.Name] = agent.Spec 151 | } 152 | 153 | ProcessResponse(rw, agentsData) 154 | } 155 | 156 | func (h *k8sAgentStorage) GetSingleAgent(rw http.ResponseWriter, r *http.Request, rp httprouter.Params) { 157 | agentName := rp.ByName("name") 158 | agent, err := h.ExtensionsClientset.Agents().Get(agentName) 159 | 160 | if err != nil { 161 | glog.Error(err) 162 | return 163 | } 164 | 165 | if api_errors.IsNotFound(err) { 166 | glog.V(5).Infof("Agent with name %v is not found in the cache", agentName) 167 | http.Error(rw, "There is no such entry in the agent cache", http.StatusNotFound) 168 | return 169 | } 170 | 171 | ProcessResponse(rw, agent) 172 | } 173 | 174 | func (h *k8sAgentStorage) CheckAgents() ([]string, []string, error) { 175 | if h.KubeClient == nil { 176 | return nil, nil, nil 177 | } 178 | 179 | absent := []string{} 180 | outdated := []string{} 181 | 182 | pods, err := h.KubeClient.Pods() 183 | if err != nil { 184 | return nil, nil, err 185 | } 186 | for _, pod := range pods.Items { 187 | agentName := pod.ObjectMeta.Name 188 | agent, err := h.ExtensionsClientset.Agents().Get(agentName) 189 | 190 | if api_errors.IsNotFound(err) { 191 | absent = append(absent, agentName) 192 | continue 193 | } 194 | 195 | if err != nil { 196 | return nil, nil, err 197 | } 198 | 199 | delta := time.Now().Sub(agent.Spec.LastUpdated).Seconds() 200 | if delta > float64(agent.Spec.ReportInterval*2) { 201 | outdated = append(outdated, agentName) 202 | } 203 | } 204 | 205 | return absent, outdated, nil 206 | } 207 | 208 | func (h *k8sAgentStorage) AgentCache() NcAgentCache { 209 | return h.NcAgentCache 210 | } 211 | 212 | func (h *k8sAgentStorage) AgentCacheUpdate(key string, ag *ext_v1.AgentSpec) { 213 | // Required for tests 214 | h.NcAgentCache[key] = *ag 215 | } 216 | 217 | func (h *k8sAgentStorage) SetKubeClient(cl Proxy) { 218 | // Required for tests 219 | h.KubeClient = cl 220 | } 221 | 222 | func (h *k8sAgentStorage) CleanCacheOnDemand(rw http.ResponseWriter) { 223 | if h.KubeClient != nil { 224 | pods, err := h.KubeClient.Pods() 225 | if err != nil { 226 | msg := fmt.Sprintf("Failed to get pods from k8s cluster. Details: %v", err) 227 | glog.Error(msg) 228 | if rw != nil { 229 | http.Error(rw, msg, http.StatusInternalServerError) 230 | } 231 | return 232 | } 233 | 234 | type empty struct{} 235 | 236 | podMap := make(map[string]empty) 237 | toRemove := []string{} 238 | 239 | for _, pod := range pods.Items { 240 | podMap[pod.ObjectMeta.Name] = empty{} 241 | } 242 | 243 | for agentName := range h.NcAgentCache { 244 | if _, exists := podMap[agentName]; !exists { 245 | toRemove = append(toRemove, agentName) 246 | } 247 | } 248 | 249 | glog.V(5).Infof("Data cache for agents %v is to be cleaned up.", toRemove) 250 | for _, agentName := range toRemove { 251 | delete(h.NcAgentCache, agentName) 252 | // delete(h.Metrics, agentName) 253 | } 254 | } 255 | } 256 | -------------------------------------------------------------------------------- /pkg/utils/storer_types.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | ext_v1 "github.com/Mirantis/k8s-netchecker-server/pkg/extensions/apis/v1" 19 | "github.com/julienschmidt/httprouter" 20 | "net/http" 21 | ) 22 | 23 | type NcAgentCache map[string]ext_v1.AgentSpec 24 | type NcAgentMetrics map[string]AgentMetrics 25 | 26 | type AgentStorer interface { 27 | UpdateAgents(http.ResponseWriter, *http.Request, httprouter.Params) (ext_v1.AgentSpec, error) 28 | GetSingleAgent(http.ResponseWriter, *http.Request, httprouter.Params) 29 | GetAgents(http.ResponseWriter, *http.Request, httprouter.Params) 30 | CleanCacheOnDemand(http.ResponseWriter) 31 | CheckAgents() ([]string, []string, error) 32 | // 33 | AgentCache() NcAgentCache // Returns Agent Cache map (RO) 34 | AgentCacheUpdate(string, *ext_v1.AgentSpec) // (agentName, agent.Spec) may be interface{} should be used, because format is storage-specific 35 | // required for tests 36 | SetKubeClient(cl Proxy) 37 | } 38 | 39 | type Handler struct { 40 | Agents AgentStorer 41 | Metrics NcAgentMetrics 42 | HTTPHandler http.Handler 43 | } 44 | -------------------------------------------------------------------------------- /pkg/utils/utils.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "encoding/json" 19 | "errors" 20 | "fmt" 21 | "net/http" 22 | 23 | "github.com/golang/glog" 24 | "io/ioutil" 25 | ) 26 | 27 | type errProcessor struct { 28 | err error 29 | } 30 | 31 | func (ep *errProcessor) ReadBody(req *http.Request) []byte { 32 | if ep.err != nil { 33 | return nil 34 | } 35 | body, err := ioutil.ReadAll(req.Body) 36 | if err != nil { 37 | ep.err = errors.New( 38 | fmt.Sprintf( 39 | "Error while reading bytes from the request's body. Details: %v", err)) 40 | } else { 41 | req.Body.Close() 42 | } 43 | if len(body) < int(req.ContentLength) { 44 | ep.err = errors.New( 45 | fmt.Sprintf("%v out of %v bytes were read from the request's body.", 46 | len(body), req.ContentLength)) 47 | } 48 | return body 49 | } 50 | 51 | func (ep *errProcessor) UnmarshalBytes(data []byte, dst interface{}) { 52 | if ep.err != nil { 53 | return 54 | } 55 | 56 | err := json.Unmarshal(data, dst) 57 | if err != nil { 58 | ep.err = errors.New( 59 | fmt.Sprintf("Error while unmarshaling data. Details: %v", err)) 60 | } 61 | } 62 | 63 | func (ep *errProcessor) MarshalBytes(src interface{}) []byte { 64 | if ep.err != nil { 65 | return nil 66 | } 67 | 68 | marshaled, err := json.Marshal(src) 69 | if err != nil { 70 | ep.err = errors.New( 71 | fmt.Sprintf("Error while marshaling the agents' cache data. Details: %v", err)) 72 | } 73 | return marshaled 74 | } 75 | 76 | func (ep *errProcessor) WriteBody(rw http.ResponseWriter, data []byte) { 77 | if ep.err != nil { 78 | return 79 | } 80 | 81 | _, err := rw.Write(data) 82 | if err != nil { 83 | ep.err = errors.New( 84 | fmt.Sprintf( 85 | "Error while writing the response's body. Details: %v", err)) 86 | } 87 | } 88 | 89 | func ProcessRequest(r *http.Request, dst interface{}, rw http.ResponseWriter) error { 90 | ep := &errProcessor{} 91 | body := ep.ReadBody(r) 92 | ep.UnmarshalBytes(body, dst) 93 | if ep.err != nil { 94 | glog.Errorf("Failed to process the request's data. %v", ep.err) 95 | http.Error(rw, ep.err.Error(), http.StatusInternalServerError) 96 | } 97 | return ep.err 98 | } 99 | 100 | func ProcessResponse(rw http.ResponseWriter, data interface{}) error { 101 | ep := &errProcessor{} 102 | marshaled := ep.MarshalBytes(data) 103 | ep.WriteBody(rw, marshaled) 104 | 105 | if ep.err != nil { 106 | glog.Errorf("Failed to prepare the response. %v", ep.err) 107 | http.Error(rw, ep.err.Error(), http.StatusInternalServerError) 108 | } 109 | return ep.err 110 | } 111 | -------------------------------------------------------------------------------- /scripts/build_image_server_or_agent.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 Mirantis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -o xtrace 17 | set -o pipefail 18 | set -o errexit 19 | set -o nounset 20 | 21 | 22 | NETCHECKER_REPO=${NETCHECKER_REPO:-} 23 | NETCHECKER_BRANCH=${NETCHECKER_BRANCH:-master} 24 | BUILD_IMAGE_MARKER=${BUILD_IMAGE_MARKER:-.build-image.complete} 25 | 26 | 27 | function build-image-server-or-agent { 28 | if [ -z "${NETCHECKER_REPO}" ]; then 29 | echo "NETCHECKER_REPO is not set!" 30 | exit 1 31 | else 32 | pushd "../" &> /dev/null 33 | if [ ! -d "${NETCHECKER_REPO}" ]; then 34 | git clone --branch "${NETCHECKER_BRANCH}" \ 35 | --depth 1 --single-branch "https://github.com/Mirantis/${NETCHECKER_REPO}.git" 36 | fi 37 | fi 38 | pushd "./${NETCHECKER_REPO}" &> /dev/null 39 | make build-image 40 | rm -f "${BUILD_IMAGE_MARKER}" 41 | popd &> /dev/null 42 | popd &> /dev/null 43 | } 44 | 45 | build-image-server-or-agent 46 | -------------------------------------------------------------------------------- /scripts/docker_publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 Mirantis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -o xtrace 17 | set -o pipefail 18 | set -o errexit 19 | set -o nounset 20 | 21 | 22 | TRAVIS_PULL_REQUEST_BRANCH=${TRAVIS_PULL_REQUEST_BRANCH:-} 23 | TRAVIS_TEST_RESULT=${TRAVIS_TEST_RESULT:-} 24 | TRAVIS_BRANCH=${TRAVIS_BRANCH:-} 25 | IMAGE_REPO=${IMAGE_REPO:-} 26 | TRAVIS_TAG=${TRAVIS_TAG:-} 27 | 28 | 29 | function push-to-docker { 30 | if [ -z "${TRAVIS_TEST_RESULT}" ]; then 31 | echo "TRAVIS_TEST_RESULT is not set!" 32 | exit 1 33 | else 34 | if [ "${TRAVIS_TEST_RESULT}" -ne 0 ]; then 35 | echo "Some of the previous steps ended with an errors! The build is broken!" 36 | exit 1 37 | fi 38 | fi 39 | 40 | if [ -n "${TRAVIS_PULL_REQUEST_BRANCH}" ]; then 41 | echo "Processing PR ${TRAVIS_PULL_REQUEST_BRANCH}" 42 | exit 0 43 | else 44 | set +o xtrace 45 | docker login -u="${DOCKER_USERNAME}" -p="${DOCKER_PASSWORD}" 46 | set -o xtrace 47 | fi 48 | 49 | if [ -z "${IMAGE_REPO}" ]; then 50 | echo "IMAGE_REPO is not set!" 51 | exit 1 52 | fi 53 | 54 | if [ -n "${TRAVIS_TAG}" ]; then 55 | echo "Pushing with tag - ${TRAVIS_TAG}" 56 | docker tag "${IMAGE_REPO}" "${IMAGE_REPO}":"${TRAVIS_TAG}" 57 | docker push "${IMAGE_REPO}":"${TRAVIS_TAG}" 58 | exit 59 | fi 60 | 61 | if [ "${TRAVIS_BRANCH}" == "master" ]; then 62 | echo "Pushing with tag - latest" 63 | docker push "${IMAGE_REPO}":latest 64 | exit 65 | fi 66 | 67 | echo "Pushing with tag - ${TRAVIS_BRANCH}" 68 | docker tag "${IMAGE_REPO}" "${IMAGE_REPO}":"${TRAVIS_BRANCH}" 69 | docker push "${IMAGE_REPO}":"${TRAVIS_BRANCH}" 70 | } 71 | 72 | push-to-docker 73 | -------------------------------------------------------------------------------- /scripts/helm_install_and_deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 Mirantis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -o xtrace 17 | set -o pipefail 18 | set -o errexit 19 | set -o nounset 20 | 21 | 22 | HELM_SCRIPT_URL=${HELM_SCRIPT_URL:-https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get} 23 | HELM_SCRIPT_NAME=${HELM_SCRIPT_NAME:-get_helm.sh} 24 | HELM_SERVER_PATH=${HELM_SERVER_PATH:-helm-chart/netchecker-server} 25 | HELM_AGENT_PATH=${HELM_AGENT_PATH:-helm-chart/netchecker-agent} 26 | HELM_DEBUG=${HELM_DEBUG:-"--debug"} 27 | NETCHECKER_REPO=${NETCHECKER_REPO:-} 28 | KUBECTL_DIR="${KUBECTL_DIR:-${HOME}/.kubeadm-dind-cluster}" 29 | PATH="${KUBECTL_DIR}:${PATH}" 30 | NS=${NS:-netchecker} 31 | REAL_NS="--namespace=${1:-$NS}" 32 | 33 | 34 | function wait-for-tiller-pod-ready() { 35 | local name="${1}" 36 | local timeout_secs=60 37 | local increment_secs=1 38 | local waited_time=0 39 | 40 | while [ "${waited_time}" -lt "${timeout_secs}" ]; do 41 | tiller_replicas="$(kubectl get deployment "${name}" \ 42 | -o 'go-template={{.status.availableReplicas}}' \ 43 | --namespace kube-system)" 44 | 45 | if [ "${tiller_replicas}" == "1" ]; then 46 | return 0 47 | fi 48 | 49 | sleep "${increment_secs}" 50 | (( waited_time += increment_secs )) 51 | 52 | if [ "${waited_time}" -ge "${timeout_secs}" ]; then 53 | echo "${name} was never ready." 54 | exit 1 55 | fi 56 | echo -n . 1>&2 57 | done 58 | } 59 | 60 | 61 | function install-helm { 62 | pushd "./scripts" &> /dev/null 63 | wget -O "${HELM_SCRIPT_NAME}" "${HELM_SCRIPT_URL}" 64 | chmod +x ./"${HELM_SCRIPT_NAME}" 65 | set +o errexit 66 | bash -x ./"${HELM_SCRIPT_NAME}" 67 | echo "Uninstall tiller-deploy if exists" 68 | kubectl delete deployment "tiller-deploy" --namespace "kube-system" &> /dev/null || true 69 | set -o errexit 70 | helm "${HELM_DEBUG}" init 71 | wait-for-tiller-pod-ready "tiller-deploy" 72 | helm "${HELM_DEBUG}" version 73 | popd &> /dev/null 74 | } 75 | 76 | 77 | function lint-helm { 78 | if [ -z "${NETCHECKER_REPO}" ]; then 79 | echo "NETCHECKER_REPO is not set!" 80 | exit 1 81 | fi 82 | if [ "${NETCHECKER_REPO}" == "k8s-netchecker-server" ]; then 83 | helm "${HELM_DEBUG}" lint ./"${HELM_AGENT_PATH}"/ 84 | else 85 | helm "${HELM_DEBUG}" lint ./"${HELM_SERVER_PATH}"/ 86 | fi 87 | } 88 | 89 | 90 | function deploy-helm { 91 | if [ "${NETCHECKER_REPO}" == "k8s-netchecker-server" ]; then 92 | pushd "../${NETCHECKER_REPO}" &> /dev/null 93 | helm "${HELM_DEBUG}" install ${REAL_NS} ./"${HELM_SERVER_PATH}"/ 94 | popd &> /dev/null 95 | helm "${HELM_DEBUG}" install ${REAL_NS} ./"${HELM_AGENT_PATH}"/ 96 | else 97 | helm "${HELM_DEBUG}" install ${REAL_NS} ./"${HELM_SERVER_PATH}"/ 98 | pushd "../${NETCHECKER_REPO}" &> /dev/null 99 | helm "${HELM_DEBUG}" install ${REAL_NS} ./"${HELM_AGENT_PATH}"/ 100 | popd &> /dev/null 101 | fi 102 | helm "${HELM_DEBUG}" list 103 | } 104 | 105 | 106 | install-helm 107 | lint-helm 108 | deploy-helm 109 | -------------------------------------------------------------------------------- /scripts/import_images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 Mirantis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -o xtrace 17 | set -o pipefail 18 | set -o errexit 19 | set -o nounset 20 | 21 | 22 | IMAGE_REPO_SERVER=${IMAGE_REPO_SERVER:-mirantis/k8s-netchecker-server} 23 | IMAGE_REPO_AGENT=${IMAGE_REPO_AGENT:-mirantis/k8s-netchecker-agent} 24 | IMAGE_TAG=${IMAGE_TAG:-latest} 25 | NUM_NODES=${NUM_NODES:-3} 26 | TMP_IMAGE_PATH=${TMP_IMAGE_PATH:-/tmp/netchecker-all.tar} 27 | # export MASTER_NAME=kube-master if you need 28 | # to import images in kube-master node 29 | MASTER_NAME=${MASTER_NAME:-} 30 | SLAVE_NAME=${SLAVE_NAME:-"kube-node-"} 31 | 32 | 33 | function import-images { 34 | docker save -o "${TMP_IMAGE_PATH}" \ 35 | "${IMAGE_REPO_SERVER}":"${IMAGE_TAG}" "${IMAGE_REPO_AGENT}":"${IMAGE_TAG}" 36 | 37 | if [ ! -z "${MASTER_NAME}" ]; then 38 | docker cp "${TMP_IMAGE_PATH}" "${MASTER_NAME}":/netchecker-all.tar 39 | docker exec -ti "${MASTER_NAME}" docker load -i /netchecker-all.tar 40 | docker exec -ti "${MASTER_NAME}" docker images 41 | fi 42 | 43 | for node in $(seq 1 "${NUM_NODES}"); do 44 | docker cp "${TMP_IMAGE_PATH}" "${SLAVE_NAME}""${node}":/netchecker-all.tar 45 | docker exec -ti "${SLAVE_NAME}""${node}" docker load -i /netchecker-all.tar 46 | docker exec -ti "${SLAVE_NAME}""${node}" docker images 47 | done 48 | echo "Finished copying docker images to dind nodes" 49 | } 50 | 51 | import-images 52 | -------------------------------------------------------------------------------- /scripts/kubeadm_dind_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 Mirantis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -o xtrace 17 | set -o pipefail 18 | set -o errexit 19 | set -o nounset 20 | 21 | 22 | NUM_NODES=${NUM_NODES:-3} 23 | KUBEADM_SCRIPT_URL=${KUBEADM_SCRIPT_URL:-https://cdn.rawgit.com/Mirantis/kubeadm-dind-cluster/master/fixed/dind-cluster} 24 | # kubeadm-dind-cluster supports k8s versions: 25 | # "v1.6", "v1.7" and "v1.8". 26 | DIND_CLUSTER_VERSION=${DIND_CLUSTER_VERSION:-v1.8} 27 | 28 | 29 | function kubeadm-dind-cluster { 30 | pushd "./scripts" &> /dev/null 31 | wget "${KUBEADM_SCRIPT_URL}-${DIND_CLUSTER_VERSION}.sh" 32 | chmod +x ./dind-cluster-"${DIND_CLUSTER_VERSION}".sh 33 | NUM_NODES="${NUM_NODES}" bash ./dind-cluster-"${DIND_CLUSTER_VERSION}".sh down 34 | NUM_NODES="${NUM_NODES}" bash ./dind-cluster-"${DIND_CLUSTER_VERSION}".sh up 35 | popd &> /dev/null 36 | } 37 | 38 | kubeadm-dind-cluster 39 | -------------------------------------------------------------------------------- /test/e2e/basic_suite_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package e2e 16 | 17 | import ( 18 | "testing" 19 | 20 | "github.com/onsi/ginkgo" 21 | "github.com/onsi/gomega" 22 | ) 23 | 24 | func TestBasicFeatures(t *testing.T) { 25 | gomega.RegisterFailHandler(ginkgo.Fail) 26 | ginkgo.RunSpecs(t, "Basic") 27 | } 28 | -------------------------------------------------------------------------------- /test/e2e/basic_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package e2e 16 | 17 | import ( 18 | "encoding/json" 19 | "fmt" 20 | "net/http" 21 | "strings" 22 | "time" 23 | 24 | "github.com/Mirantis/k8s-netchecker-server/pkg/utils" 25 | testutils "github.com/Mirantis/k8s-netchecker-server/test/e2e/utils" 26 | 27 | "github.com/onsi/ginkgo" 28 | "github.com/onsi/gomega" 29 | 30 | "io/ioutil" 31 | meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 | rbac "k8s.io/client-go/pkg/apis/rbac/v1beta1" 33 | "k8s.io/apimachinery/pkg/labels" 34 | "k8s.io/apimachinery/pkg/util/intstr" 35 | "k8s.io/client-go/kubernetes" 36 | "k8s.io/client-go/pkg/api/v1" 37 | "k8s.io/client-go/pkg/apis/extensions/v1beta1" 38 | ) 39 | 40 | var _ = ginkgo.Describe("Basic", func() { 41 | var clientset *kubernetes.Clientset 42 | var ns *v1.Namespace 43 | var cr *rbac.ClusterRole 44 | var crb *rbac.ClusterRoleBinding 45 | var serverPort int = 8989 46 | 47 | ginkgo.BeforeEach(func() { 48 | var err error 49 | clientset, err = testutils.KubeClient() 50 | gomega.Expect(err).NotTo(gomega.HaveOccurred()) 51 | namespaceObj := &v1.Namespace{ 52 | ObjectMeta: meta_v1.ObjectMeta{ 53 | GenerateName: "e2e-tests-netchecker-", 54 | Namespace: "", 55 | }, 56 | Status: v1.NamespaceStatus{}, 57 | } 58 | cr_body := newClusterRole( 59 | "netchecker-server", 60 | []rbac.PolicyRule{ 61 | {Verbs: []string{"*"}, APIGroups: []string{"apiextensions.k8s.io"}, Resources: []string{"customresourcedefinitions"}}, 62 | {Verbs: []string{"*"}, APIGroups: []string{"network-checker.ext"}, Resources: []string{"agents"}}, 63 | {Verbs: []string{"get", "list"}, APIGroups: []string{""}, Resources: []string{"pods"}}, 64 | }, 65 | ) 66 | cr, err = clientset.Rbac().ClusterRoles().Create(cr_body) 67 | crb_body := newClusterRoleBinding( 68 | "netchecker", "rbac.authorization.k8s.io", "ClusterRole", 69 | "netchecker-server", "rbac.authorization.k8s.io", "Group", "system:serviceaccounts") 70 | crb, err = clientset.Rbac().ClusterRoleBindings().Create(crb_body) 71 | ns, err = clientset.Namespaces().Create(namespaceObj) 72 | gomega.Expect(err).NotTo(gomega.HaveOccurred()) 73 | }) 74 | 75 | ginkgo.AfterEach(func() { 76 | podList, _ := clientset.Core().Pods(ns.Name).List(meta_v1.ListOptions{LabelSelector: labels.Everything().String()}) 77 | if ginkgo.CurrentGinkgoTestDescription().Failed { 78 | testutils.DumpLogs(clientset, podList.Items...) 79 | } 80 | for _, pod := range podList.Items { 81 | clientset.Core().Pods(pod.Namespace).Delete(pod.Name, &meta_v1.DeleteOptions{}) 82 | } 83 | clientset.Namespaces().Delete(ns.Name, &meta_v1.DeleteOptions{}) 84 | clientset.Rbac().ClusterRoleBindings().Delete(crb.Name, &meta_v1.DeleteOptions{}) 85 | clientset.Rbac().ClusterRoles().Delete(cr.Name, &meta_v1.DeleteOptions{}) 86 | }) 87 | 88 | ginkgo.It("Connectivity check should pass", func() { 89 | ginkgo.By("deploying netchecker server pod") 90 | endpointArg := fmt.Sprintf("--endpoint=0.0.0.0:%d", serverPort) 91 | serverLabels := map[string]string{"app": "netchecker-server"} 92 | serverPod := newPod( 93 | "netchecker-server", "netchecker-server", "mirantis/k8s-netchecker-server", 94 | []string{"netchecker-server", "--kubeproxyinit", "--logtostderr", "--v=5", endpointArg}, serverLabels, false, true, nil) 95 | pod, err := clientset.Pods(ns.Name).Create(serverPod) 96 | gomega.Expect(err).Should(gomega.BeNil()) 97 | testutils.WaitForReady(clientset, pod) 98 | 99 | ginkgo.By("deploying netchecker service") 100 | servicePorts := []v1.ServicePort{{Protocol: v1.ProtocolTCP, Port: int32(serverPort), TargetPort: intstr.FromInt(serverPort)}} 101 | serverSvc := newService("netchecker-service", serverLabels, servicePorts, []string{}) 102 | _, err = clientset.Services(ns.Name).Create(serverSvc) 103 | gomega.Expect(err).Should(gomega.BeNil()) 104 | 105 | ginkgo.By("deploying netchecker agent daemonset") 106 | var ncAgentLabels = map[string]string{"app": "netchecker-agent"} 107 | serverEndpointArg := fmt.Sprintf("--serverendpoint=netchecker-service:%d", serverPort) 108 | cmd := []string{"netchecker-agent", "--alsologtostderr=true", "--v=5", serverEndpointArg, "--reportinterval=10"} 109 | agentDS := newDaemonSet("netchecker-agent", "netchecker-agent", "mirantis/k8s-netchecker-agent", 110 | []string{"sh", "-c", strings.Join(cmd, " ")}, ncAgentLabels, false, true, 111 | []v1.EnvVar{ 112 | {Name: "MY_NODE_NAME", ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "spec.nodeName"}}}, 113 | {Name: "MY_POD_NAME", ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "metadata.name"}}}, 114 | }, 115 | ) 116 | _, err = clientset.Extensions().DaemonSets(ns.Name).Create(agentDS) 117 | gomega.Expect(err).NotTo(gomega.HaveOccurred()) 118 | 119 | // ensure agents are up and they have sent their reports to the server 120 | time.Sleep(45 * time.Second) 121 | 122 | services := getServices(clientset, ns) 123 | ncService := false 124 | for _, svc := range services { 125 | if svc.ObjectMeta.Name == "netchecker-service" { 126 | ncService = true 127 | break 128 | } 129 | } 130 | gomega.Expect(ncService).To(gomega.BeTrue()) 131 | 132 | pods := getPods(clientset, ns) 133 | ncServerIP := "" 134 | ncAgentNames := map[string]bool{} 135 | for _, pod := range pods { 136 | if pod.ObjectMeta.Name == "netchecker-server" { 137 | ncServerIP = pod.Status.PodIP 138 | } else if pod.ObjectMeta.Name[:16] == "netchecker-agent" { 139 | ncAgentNames[pod.ObjectMeta.Name] = true 140 | } 141 | } 142 | gomega.Expect(ncServerIP).NotTo(gomega.BeEmpty()) 143 | 144 | ginkgo.By("verifying that server is fed by all the agents") 145 | agentsResp := map[string]utils.AgentInfo{} 146 | httpServiceGet(serverPort, ncServerIP, "api/v1/agents/", &agentsResp) 147 | for agentName := range agentsResp { 148 | // server has reports from every agent 149 | gomega.Expect(ncAgentNames[agentName]).To(gomega.BeTrue()) 150 | } 151 | // agent count in server's data is the same as agent pod count 152 | gomega.Expect(len(ncAgentNames)).To(gomega.BeEquivalentTo(len(agentsResp))) 153 | 154 | ginkgo.By("verifying connectivity in cluster") 155 | ccResp := utils.CheckConnectivityInfo{} 156 | httpServiceGet(serverPort, ncServerIP, "api/v1/connectivity_check", &ccResp) 157 | // server has reports from all the agents 158 | gomega.Expect(ccResp.Absent).To(gomega.BeEmpty()) 159 | // all the agents reports are up to date 160 | gomega.Expect(ccResp.Outdated).To(gomega.BeEmpty()) 161 | }) 162 | }) 163 | 164 | func newPrivilegedPodSpec(containerName, imageName string, cmd []string, hostNetwork, privileged bool, env []v1.EnvVar) v1.PodSpec { 165 | return v1.PodSpec{ 166 | HostNetwork: hostNetwork, 167 | Containers: []v1.Container{ 168 | { 169 | Name: containerName, 170 | Image: imageName, 171 | Command: cmd, 172 | SecurityContext: &v1.SecurityContext{Privileged: &privileged}, 173 | ImagePullPolicy: v1.PullIfNotPresent, 174 | Env: env, 175 | }, 176 | }, 177 | } 178 | } 179 | 180 | func newPod(podName, containerName, imageName string, cmd []string, labels map[string]string, hostNetwork bool, privileged bool, env []v1.EnvVar) *v1.Pod { 181 | return &v1.Pod{ 182 | ObjectMeta: meta_v1.ObjectMeta{ 183 | Name: podName, 184 | Labels: labels, 185 | }, 186 | Spec: newPrivilegedPodSpec(containerName, imageName, cmd, hostNetwork, privileged, env), 187 | } 188 | } 189 | 190 | func newDaemonSet(dsName, containerName, imageName string, cmd []string, labels map[string]string, hostNetwork, privileged bool, env []v1.EnvVar) *v1beta1.DaemonSet { 191 | return &v1beta1.DaemonSet{ 192 | ObjectMeta: meta_v1.ObjectMeta{ 193 | Name: dsName, 194 | Labels: labels, 195 | }, 196 | Spec: v1beta1.DaemonSetSpec{ 197 | Template: v1.PodTemplateSpec{ 198 | ObjectMeta: meta_v1.ObjectMeta{ 199 | Labels: labels, 200 | }, 201 | Spec: newPrivilegedPodSpec(containerName, imageName, cmd, hostNetwork, privileged, env), 202 | }, 203 | }, 204 | } 205 | } 206 | 207 | func newDeployment(deploymentName string, replicas int32, podLabels map[string]string, imageName string, image string, cmd []string, env []v1.EnvVar) *v1beta1.Deployment { 208 | return &v1beta1.Deployment{ 209 | ObjectMeta: meta_v1.ObjectMeta{Name: deploymentName}, 210 | Spec: v1beta1.DeploymentSpec{ 211 | Replicas: &replicas, 212 | Template: v1.PodTemplateSpec{ 213 | ObjectMeta: meta_v1.ObjectMeta{ 214 | Labels: podLabels, 215 | }, 216 | Spec: newPrivilegedPodSpec(image, imageName, cmd, false, false, env), 217 | }, 218 | }, 219 | } 220 | } 221 | 222 | func newService(serviceName string, labels map[string]string, ports []v1.ServicePort, externalIPs []string) *v1.Service { 223 | return &v1.Service{ 224 | ObjectMeta: meta_v1.ObjectMeta{ 225 | Name: serviceName, 226 | }, 227 | Spec: v1.ServiceSpec{ 228 | Selector: labels, 229 | Type: v1.ServiceTypeNodePort, 230 | Ports: ports, 231 | ExternalIPs: externalIPs, 232 | }, 233 | } 234 | } 235 | 236 | func newClusterRole(roleName string, rules []rbac.PolicyRule) *rbac.ClusterRole { 237 | return &rbac.ClusterRole{ 238 | ObjectMeta: meta_v1.ObjectMeta{Name: roleName}, 239 | Rules: rules, 240 | } 241 | } 242 | 243 | func newClusterRoleBinding(bindName string, roleApigroup string, roleKind string, roleName string, subjApigroup string, subjKind string, subjName string) *rbac.ClusterRoleBinding { 244 | return &rbac.ClusterRoleBinding{ 245 | ObjectMeta: meta_v1.ObjectMeta{Name: bindName}, 246 | RoleRef: rbac.RoleRef{ 247 | APIGroup: roleApigroup, 248 | Kind: roleKind, 249 | Name: roleName, 250 | }, 251 | Subjects: []rbac.Subject{ 252 | { 253 | APIGroup: subjApigroup, 254 | Kind: subjKind, 255 | Name: subjName, 256 | }, 257 | }, 258 | } 259 | } 260 | 261 | func httpServiceGet(port int, ip string, uri string, dst interface{}) { 262 | timeout := time.Duration(1 * time.Second) 263 | client := http.Client{ 264 | Timeout: timeout, 265 | } 266 | gomega.Eventually(func() error { 267 | resp, err := client.Get(fmt.Sprintf("http://%s:%d/%s", ip, port, uri)) 268 | if err != nil { 269 | return err 270 | } 271 | if resp.StatusCode > 200 { 272 | return fmt.Errorf("Unexpected error from nginx service: %s", resp.Status) 273 | } 274 | 275 | body, err := ioutil.ReadAll(resp.Body) 276 | if err != nil { 277 | return err 278 | } 279 | resp.Body.Close() 280 | err = json.Unmarshal(body, dst) 281 | return err 282 | }, 10*time.Second, 1*time.Second).Should(gomega.BeNil()) 283 | } 284 | 285 | func getPods(clientset *kubernetes.Clientset, ns *v1.Namespace) []v1.Pod { 286 | pods, err := clientset.Pods(ns.Name).List(meta_v1.ListOptions{}) 287 | gomega.Expect(err).NotTo(gomega.HaveOccurred()) 288 | return pods.Items 289 | } 290 | 291 | func getServices(clientset *kubernetes.Clientset, ns *v1.Namespace) []v1.Service { 292 | services, err := clientset.Services(ns.Name).List(meta_v1.ListOptions{}) 293 | gomega.Expect(err).NotTo(gomega.HaveOccurred()) 294 | return services.Items 295 | } 296 | -------------------------------------------------------------------------------- /test/e2e/utils/utils.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Mirantis 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package utils 16 | 17 | import ( 18 | "bytes" 19 | "flag" 20 | "fmt" 21 | "io" 22 | "net/url" 23 | "strings" 24 | "time" 25 | 26 | meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 | "k8s.io/apimachinery/pkg/util/httpstream/spdy" 28 | "k8s.io/client-go/kubernetes" 29 | "k8s.io/client-go/pkg/api" 30 | "k8s.io/client-go/pkg/api/v1" 31 | "k8s.io/client-go/rest" 32 | "k8s.io/client-go/tools/clientcmd" 33 | "k8s.io/client-go/tools/remotecommand" 34 | remotecommandserver "k8s.io/apimachinery/pkg/util/remotecommand" 35 | 36 | "github.com/onsi/ginkgo" 37 | "github.com/onsi/gomega" 38 | ) 39 | 40 | var apiMaster string 41 | 42 | func init() { 43 | flag.StringVar(&apiMaster, "master", "http://localhost:8080", "apiserver address to use with restclient") 44 | } 45 | 46 | // Logf - write formatted logs using GinkgoWriter 47 | func Logf(format string, a ...interface{}) { 48 | fmt.Fprintf(ginkgo.GinkgoWriter, format, a...) 49 | } 50 | 51 | func loadConfig() *rest.Config { 52 | config, err := clientcmd.BuildConfigFromFlags(apiMaster, "") 53 | gomega.Expect(err).NotTo(gomega.HaveOccurred()) 54 | return config 55 | } 56 | 57 | // KubeClient - get kubernetes API Clientset 58 | func KubeClient() (*kubernetes.Clientset, error) { 59 | Logf("Using master %v\n", apiMaster) 60 | config := loadConfig() 61 | clientset, err := kubernetes.NewForConfig(config) 62 | gomega.Expect(err).NotTo(gomega.HaveOccurred()) 63 | return clientset, nil 64 | } 65 | 66 | // WaitForReady - 67 | func WaitForReady(clientset *kubernetes.Clientset, pod *v1.Pod) { 68 | gomega.Eventually(func() error { 69 | podUpdated, err := clientset.Core().Pods(pod.Namespace).Get(pod.Name, meta_v1.GetOptions{}) 70 | if err != nil { 71 | return err 72 | } 73 | if podUpdated.Status.Phase != v1.PodRunning { 74 | return fmt.Errorf("pod %v is not running phase: %v", podUpdated.Name, podUpdated.Status.Phase) 75 | } 76 | return nil 77 | }, 120*time.Second, 5*time.Second).Should(gomega.BeNil()) 78 | } 79 | 80 | // DumpLogs - dump pods logs using GinkgoWriter 81 | func DumpLogs(clientset *kubernetes.Clientset, pods ...v1.Pod) { 82 | for _, pod := range pods { 83 | dumpLogs(clientset, pod) 84 | } 85 | } 86 | 87 | func dumpLogs(clientset *kubernetes.Clientset, pod v1.Pod) { 88 | req := clientset.Core().Pods(pod.Namespace).GetLogs(pod.Name, &v1.PodLogOptions{}) 89 | readCloser, err := req.Stream() 90 | gomega.Expect(err).NotTo(gomega.HaveOccurred()) 91 | defer readCloser.Close() 92 | Logf("\n Dumping logs for %v:%v \n", pod.Namespace, pod.Name) 93 | _, err = io.Copy(ginkgo.GinkgoWriter, readCloser) 94 | gomega.Expect(err).NotTo(gomega.HaveOccurred()) 95 | } 96 | 97 | // ExecInPod - execute command in a pod 98 | func ExecInPod(clientset *kubernetes.Clientset, pod v1.Pod, cmd ...string) (string, string, error) { 99 | Logf("Running %v in %v\n", cmd, pod.Name) 100 | 101 | container := pod.Spec.Containers[0].Name 102 | var stdout, stderr bytes.Buffer 103 | config := loadConfig() 104 | client := clientset.CoreV1Client.RESTClient() 105 | req := client.Post(). 106 | Resource("pods"). 107 | Name(pod.Name). 108 | Namespace(pod.Namespace). 109 | SubResource("exec"). 110 | Param("container", container) 111 | req.VersionedParams(&api.PodExecOptions{ 112 | Container: container, 113 | Command: cmd, 114 | TTY: false, 115 | Stdin: false, 116 | Stdout: true, 117 | Stderr: true, 118 | }, api.ParameterCodec) 119 | err := execute("POST", req.URL(), config, nil, &stdout, &stderr, false) 120 | Logf("Error %v: %v\n", cmd, stderr.String()) 121 | Logf("Output %v: %v\n", cmd, stdout.String()) 122 | return strings.TrimSpace(stdout.String()), strings.TrimSpace(stderr.String()), err 123 | } 124 | 125 | func execute(method string, url *url.URL, config *rest.Config, stdin io.Reader, stdout, stderr io.Writer, tty bool) error { 126 | tlsConfig, err := rest.TLSConfigFor(config) 127 | if err != nil { 128 | return err 129 | } 130 | upgrader := spdy.NewRoundTripper(tlsConfig, true) 131 | exec, err := remotecommand.NewStreamExecutor(upgrader, nil, method, url) 132 | if err != nil { 133 | return err 134 | } 135 | return exec.Stream(remotecommand.StreamOptions{ 136 | SupportedProtocols: remotecommandserver.SupportedStreamingProtocols, 137 | Stdin: stdin, 138 | Stdout: stdout, 139 | Stderr: stderr, 140 | Tty: tty, 141 | }) 142 | } 143 | --------------------------------------------------------------------------------