├── .gitignore ├── K8s-Resources.png ├── LICENSE ├── Makefile ├── README.md ├── docker-compose.yml ├── docker ├── carbon-relay │ ├── Dockerfile │ ├── conf │ │ ├── carbon.conf.template │ │ └── storage-schemas.conf │ ├── kube-watch.js │ └── supervisord.conf ├── graphite-master │ ├── .DS_Store │ ├── Dockerfile │ ├── README.md │ ├── conf │ │ ├── carbon.conf │ │ ├── storage-aggregation.conf │ │ └── storage-schemas.conf │ ├── entrypoint.sh │ ├── kube-watch.js │ ├── nginx │ │ └── nginx.conf │ ├── supervisord.conf │ └── webapp │ │ ├── initial_data.json │ │ └── local_settings.py.template ├── graphite-node │ ├── .DS_Store │ ├── Dockerfile │ ├── README.md │ ├── conf │ │ ├── carbon.conf │ │ ├── storage-aggregation.conf │ │ └── storage-schemas.conf │ ├── curator │ │ ├── cron │ │ └── run.sh │ ├── entrypoint.sh │ ├── nginx │ │ └── nginx.conf │ ├── supervisord.conf │ └── webapp │ │ ├── initial_data.json │ │ └── local_settings.py ├── statsd-daemon │ ├── Dockerfile │ └── config.js └── statsd-proxy │ ├── Dockerfile │ ├── kube-watch.js │ ├── proxyConfig.js │ └── statsd-proxy.json └── kube ├── carbon-relay ├── dep.yml └── svc.yml ├── graphite-master ├── dep.yml └── svc.yml ├── graphite-node ├── stateful.set.yml └── svc.yml ├── rbac ├── role-binding.yml ├── role.yml └── serviceaccount.yml ├── statsd-daemon ├── dep.yml └── svc.yml └── statsd-proxy ├── dep.yml └── svc.yml /.gitignore: -------------------------------------------------------------------------------- 1 | .byebug_history 2 | -------------------------------------------------------------------------------- /K8s-Resources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nanit/kubernetes-graphite-cluster/900a89cce137b167345c162a6554e7a30ea9b3e9/K8s-Resources.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Nanit 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DOCKER_REPOSITORY?=nanit 2 | SUDO?=sudo 3 | 4 | STATSD_PROXY_APP_NAME=statsd 5 | STATSD_PROXY_DIR_NAME=statsd-proxy 6 | STATSD_PROXY_DOCKER_DIR=docker/$(STATSD_PROXY_DIR_NAME) 7 | STATSD_PROXY_IMAGE_TAG=$(shell git log -n 1 --pretty=format:%h $(STATSD_PROXY_DOCKER_DIR)) 8 | STATSD_PROXY_IMAGE_NAME=$(DOCKER_REPOSITORY)/$(STATSD_PROXY_APP_NAME):$(STATSD_PROXY_IMAGE_TAG) 9 | STATSD_PROXY_REPLICAS?=$(shell curl -s config/$(NANIT_ENV)/$(STATSD_PROXY_APP_NAME)/replicas) 10 | STATSD_PROXY_ADDITIONAL_YAML?=$(shell curl -s config/$(NANIT_ENV)/$(STATSD_PROXY_APP_NAME)/additional_yaml) 11 | 12 | define generate-statsd-proxy-svc 13 | sed -e 's/{{APP_NAME}}/$(STATSD_PROXY_APP_NAME)/g' kube/$(STATSD_PROXY_DIR_NAME)/svc.yml 14 | endef 15 | 16 | define generate-statsd-proxy-dep 17 | if [ -z "$(STATSD_PROXY_REPLICAS)" ]; then echo "ERROR: STATSD_PROXY_REPLICAS is empty!"; exit 1; fi 18 | sed -e 's/{{APP_NAME}}/$(STATSD_PROXY_APP_NAME)/g;s,{{IMAGE_NAME}},$(STATSD_PROXY_IMAGE_NAME),g;s/{{REPLICAS}}/$(STATSD_PROXY_REPLICAS)/g;s@{{ADDITIONAL_YAML}}@$(STATSD_PROXY_ADDITIONAL_YAML)@g' kube/$(STATSD_PROXY_DIR_NAME)/dep.yml 19 | endef 20 | 21 | deploy-statsd-proxy: docker-statsd-proxy 22 | kubectl get svc $(STATSD_PROXY_APP_NAME) || $(call generate-statsd-proxy-svc) | kubectl create -f - 23 | $(call generate-statsd-proxy-dep) | kubectl apply -f - 24 | 25 | docker-statsd-proxy: 26 | $(SUDO) docker pull $(STATSD_PROXY_IMAGE_NAME) || ($(SUDO) docker build -t $(STATSD_PROXY_IMAGE_NAME) $(STATSD_PROXY_DOCKER_DIR) && $(SUDO) docker push $(STATSD_PROXY_IMAGE_NAME)) 27 | 28 | clean-statsd-proxy: 29 | kubectl delete deployment $(STATSD_PROXY_APP_NAME) || true 30 | 31 | #------------------------------------------------------------------------------------------------------------------------------------------------- 32 | STATSD_DAEMON_APP_NAME=statsd-daemon 33 | STATSD_DAEMON_DIR_NAME=statsd-daemon 34 | STATSD_DAEMON_DOCKER_DIR=docker/$(STATSD_DAEMON_DIR_NAME) 35 | STATSD_DAEMON_IMAGE_TAG=$(shell git log -n 1 --pretty=format:%h $(STATSD_DAEMON_DOCKER_DIR)) 36 | STATSD_DAEMON_IMAGE_NAME=$(DOCKER_REPOSITORY)/$(STATSD_DAEMON_APP_NAME):$(STATSD_DAEMON_IMAGE_TAG) 37 | STATSD_DAEMON_REPLICAS?=$(shell curl -s config/$(NANIT_ENV)/$(STATSD_DAEMON_APP_NAME)/replicas) 38 | STATSD_DAEMON_ADDITIONAL_YAML?=$(shell curl -s config/$(NANIT_ENV)/$(STATSD_DAEMON_APP_NAME)/additional_yaml) 39 | 40 | define generate-statsd-daemon-svc 41 | sed -e 's/{{APP_NAME}}/$(STATSD_DAEMON_APP_NAME)/g' kube/$(STATSD_DAEMON_DIR_NAME)/svc.yml 42 | endef 43 | 44 | define generate-statsd-daemon-dep 45 | if [ -z "$(STATSD_DAEMON_REPLICAS)" ]; then echo "ERROR: STATSD_DAEMON_REPLICAS is empty!"; exit 1; fi 46 | sed -e 's/{{APP_NAME}}/$(STATSD_DAEMON_APP_NAME)/g;s,{{IMAGE_NAME}},$(STATSD_DAEMON_IMAGE_NAME),g;s/{{REPLICAS}}/$(STATSD_DAEMON_REPLICAS)/g;s@{{ADDITIONAL_YAML}}@$(STATSD_DAEMON_ADDITIONAL_YAML)@g' kube/$(STATSD_DAEMON_DIR_NAME)/dep.yml 47 | endef 48 | 49 | deploy-statsd-daemon: docker-statsd-daemon 50 | kubectl get svc $(STATSD_DAEMON_APP_NAME) || $(call generate-statsd-daemon-svc) | kubectl create -f - 51 | $(call generate-statsd-daemon-dep) | kubectl apply -f - 52 | 53 | docker-statsd-daemon: 54 | $(SUDO) docker pull $(STATSD_DAEMON_IMAGE_NAME) || ($(SUDO) docker build -t $(STATSD_DAEMON_IMAGE_NAME) $(STATSD_DAEMON_DOCKER_DIR) && $(SUDO) docker push $(STATSD_DAEMON_IMAGE_NAME)) 55 | 56 | clean-statsd-daemon: 57 | kubectl delete deployment $(STATSD_DAEMON_APP_NAME) || true 58 | 59 | #------------------------------------------------------------------------------------------------------------------------------------------------- 60 | CARBON_RELAY_APP_NAME=carbon-relay 61 | CARBON_RELAY_DIR_NAME=carbon-relay 62 | CARBON_RELAY_DOCKER_DIR=docker/$(CARBON_RELAY_DIR_NAME) 63 | CARBON_RELAY_IMAGE_TAG=$(shell git log -n 1 --pretty=format:%h $(CARBON_RELAY_DOCKER_DIR)) 64 | CARBON_RELAY_IMAGE_NAME=$(DOCKER_REPOSITORY)/$(CARBON_RELAY_APP_NAME):$(CARBON_RELAY_IMAGE_TAG) 65 | CARBON_RELAY_REPLICAS?=$(shell curl -s config/$(NANIT_ENV)/$(CARBON_RELAY_APP_NAME)/replicas) 66 | CARBON_RELAY_ADDITIONAL_YAML?=$(shell curl -s config/$(NANIT_ENV)/$(CARBON_RELAY_APP_NAME)/additional_yaml) 67 | 68 | define generate-carbon-relay-svc 69 | sed -e 's/{{APP_NAME}}/$(CARBON_RELAY_APP_NAME)/g' kube/$(CARBON_RELAY_DIR_NAME)/svc.yml 70 | endef 71 | 72 | define generate-carbon-relay-dep 73 | if [ -z "$(CARBON_RELAY_REPLICAS)" ]; then echo "ERROR: CARBON_RELAY_REPLICAS is empty!"; exit 1; fi 74 | sed -e 's/{{APP_NAME}}/$(CARBON_RELAY_APP_NAME)/g;s,{{IMAGE_NAME}},$(CARBON_RELAY_IMAGE_NAME),g;s/{{REPLICAS}}/$(CARBON_RELAY_REPLICAS)/g;s@{{ADDITIONAL_YAML}}@$(CARBON_RELAY_ADDITIONAL_YAML)@g' kube/$(CARBON_RELAY_DIR_NAME)/dep.yml 75 | endef 76 | 77 | deploy-carbon-relay: docker-carbon-relay 78 | kubectl get svc $(CARBON_RELAY_APP_NAME) || $(call generate-carbon-relay-svc) | kubectl create -f - 79 | $(call generate-carbon-relay-dep) | kubectl apply -f - 80 | 81 | docker-carbon-relay: 82 | $(SUDO) docker pull $(CARBON_RELAY_IMAGE_NAME) || ($(SUDO) docker build -t $(CARBON_RELAY_IMAGE_NAME) $(CARBON_RELAY_DOCKER_DIR) && $(SUDO) docker push $(CARBON_RELAY_IMAGE_NAME)) 83 | 84 | clean-carbon-relay: 85 | kubectl delete deployment $(CARBON_RELAY_APP_NAME) || true 86 | kubectl delete svc $(CARBON_RELAY_APP_NAME) || true 87 | 88 | #------------------------------------------------------------------------------------------------------------------------------------------------- 89 | GRAPHITE_NODE_APP_NAME=graphite-node 90 | GRAPHITE_NODE_DIR_NAME=graphite-node 91 | GRAPHITE_NODE_DOCKER_DIR=docker/$(GRAPHITE_NODE_DIR_NAME) 92 | GRAPHITE_NODE_IMAGE_TAG=$(shell git log -n 1 --pretty=format:%h $(GRAPHITE_NODE_DOCKER_DIR)) 93 | GRAPHITE_NODE_IMAGE_NAME=$(DOCKER_REPOSITORY)/$(GRAPHITE_NODE_APP_NAME):$(GRAPHITE_NODE_IMAGE_TAG) 94 | GRAPHITE_NODE_REPLICAS?=$(shell curl -s config/$(NANIT_ENV)/$(GRAPHITE_NODE_APP_NAME)/replicas) 95 | GRAPHITE_NODE_DISK_SIZE?=$(shell curl -s config/$(NANIT_ENV)/$(GRAPHITE_NODE_APP_NAME)/disk_size) 96 | GRAPHITE_NODE_CURATOR_RETENTION?=$(shell curl -s config/$(NANIT_ENV)/$(GRAPHITE_NODE_APP_NAME)/curator_retention) 97 | GRAPHITE_NODE_STORAGE_CLASS?=$(shell curl -s config/$(NANIT_ENV)/$(GRAPHITE_NODE_APP_NAME)/storage_class) 98 | GRAPHITE_NODE_ADDITIONAL_YAML?=$(shell curl -s config/$(NANIT_ENV)/$(GRAPHITE_NODE_APP_NAME)/additional_yaml) 99 | 100 | define generate-graphite-node-svc 101 | sed -e 's/{{APP_NAME}}/$(GRAPHITE_NODE_APP_NAME)/g' kube/$(GRAPHITE_NODE_DIR_NAME)/svc.yml 102 | endef 103 | 104 | define generate-graphite-node-dep 105 | if [ -z "$(GRAPHITE_NODE_REPLICAS)" ]; then echo "ERROR: GRAPHITE_NODE_REPLICAS is empty!"; exit 1; fi 106 | if [ -z "$(GRAPHITE_NODE_DISK_SIZE)" ]; then echo "ERROR: GRAPHITE_NODE_DISK_SIZE is empty!"; exit 1; fi 107 | if [ -z "$(GRAPHITE_NODE_STORAGE_CLASS)" ]; then echo "ERROR: GRAPHITE_NODE_STORAGE_CLASS is empty!"; exit 1; fi 108 | sed -e 's/{{APP_NAME}}/$(GRAPHITE_NODE_APP_NAME)/g;s,{{STORAGE_CLASS}},$(GRAPHITE_NODE_STORAGE_CLASS),g;s,{{IMAGE_NAME}},$(GRAPHITE_NODE_IMAGE_NAME),g;s/{{REPLICAS}}/$(GRAPHITE_NODE_REPLICAS)/g;s/{{CURATOR_RETENTION}}/$(GRAPHITE_NODE_CURATOR_RETENTION)/g;s/{{DISK_SIZE}}/$(GRAPHITE_NODE_DISK_SIZE)/g;s@{{ADDITIONAL_YAML}}@$(GRAPHITE_NODE_ADDITIONAL_YAML)@g' kube/$(GRAPHITE_NODE_DIR_NAME)/stateful.set.yml 109 | endef 110 | 111 | deploy-graphite-node: docker-graphite-node 112 | kubectl get svc $(GRAPHITE_NODE_APP_NAME) || $(call generate-graphite-node-svc) | kubectl create -f - 113 | $(call generate-graphite-node-dep) | kubectl apply -f - 114 | 115 | docker-graphite-node: 116 | $(SUDO) docker pull $(GRAPHITE_NODE_IMAGE_NAME) || ($(SUDO) docker build -t $(GRAPHITE_NODE_IMAGE_NAME) $(GRAPHITE_NODE_DOCKER_DIR) && $(SUDO) docker push $(GRAPHITE_NODE_IMAGE_NAME)) 117 | 118 | clean-graphite-node: 119 | kubectl delete statefulset $(GRAPHITE_NODE_APP_NAME) || true 120 | kubectl delete pvc -l app=$(GRAPHITE_NODE_APP_NAME) || true 121 | 122 | #------------------------------------------------------------------------------------------------------------------------------------------------- 123 | GRAPHITE_MASTER_APP_NAME=graphite 124 | GRAPHITE_MASTER_DIR_NAME=graphite-master 125 | GRAPHITE_MASTER_DOCKER_DIR=docker/$(GRAPHITE_MASTER_DIR_NAME) 126 | GRAPHITE_MASTER_IMAGE_TAG=$(shell git log -n 1 --pretty=format:%h $(GRAPHITE_MASTER_DOCKER_DIR)) 127 | GRAPHITE_MASTER_IMAGE_NAME=$(DOCKER_REPOSITORY)/$(GRAPHITE_MASTER_APP_NAME):$(GRAPHITE_MASTER_IMAGE_TAG) 128 | GRAPHITE_MASTER_REPLICAS?=$(shell curl -s config/$(NANIT_ENV)/$(GRAPHITE_MASTER_APP_NAME)/replicas) 129 | 130 | define generate-graphite-master-svc 131 | sed -e 's/{{APP_NAME}}/$(GRAPHITE_MASTER_APP_NAME)/g' kube/$(GRAPHITE_MASTER_DIR_NAME)/svc.yml 132 | endef 133 | 134 | define generate-graphite-master-dep 135 | if [ -z "$(GRAPHITE_MASTER_REPLICAS)" ]; then echo "ERROR: GRAPHITE_MASTER_REPLICAS is empty!"; exit 1; fi 136 | sed -e 's/{{APP_NAME}}/$(GRAPHITE_MASTER_APP_NAME)/g;s,{{IMAGE_NAME}},$(GRAPHITE_MASTER_IMAGE_NAME),g;s/{{REPLICAS}}/$(GRAPHITE_MASTER_REPLICAS)/g' kube/$(GRAPHITE_MASTER_DIR_NAME)/dep.yml 137 | endef 138 | 139 | RBAC_DIR_NAME=rbac 140 | RBAC_API_VERSION=$(shell (kubectl api-versions | grep rbac. | grep -sE v1$$) || (kubectl api-versions | grep rbac. | grep -sE v1beta1$$) || (kubectl api-versions | grep rbac. | grep -sE v1alpha1$$) || echo "") 141 | 142 | define generate-rbac-role 143 | sed -e 's;{{RBAC_API_VERSION}};$(RBAC_API_VERSION);g' kube/$(RBAC_DIR_NAME)/role.yml 144 | endef 145 | 146 | define generate-rbac-rolebinding 147 | sed -e 's;{{RBAC_API_VERSION}};$(RBAC_API_VERSION);g' kube/$(RBAC_DIR_NAME)/role-binding.yml 148 | endef 149 | 150 | deploy-rbac: 151 | if [ -z "$(RBAC_API_VERSION)" ]; then exit 1; fi 152 | kubectl apply -f kube/rbac/serviceaccount.yml 153 | $(call generate-rbac-role) | kubectl apply -f - 154 | $(call generate-rbac-rolebinding) | kubectl apply -f - 155 | 156 | clean-rbac: 157 | if [ -z "$(RBAC_API_VERSION)" ]; then exit 1; fi 158 | kubectl delete serviceaccount graphite-cluster-sa || true 159 | kubectl delete rolebinding read-endpoints || true 160 | kubectl delete role endpoints-reader || true 161 | 162 | deploy-graphite-master: docker-graphite-master 163 | kubectl get svc $(GRAPHITE_MASTER_APP_NAME) || $(call generate-graphite-master-svc) | kubectl create -f - 164 | $(call generate-graphite-master-dep) | kubectl apply -f - 165 | 166 | docker-graphite-master: 167 | $(SUDO) docker pull $(GRAPHITE_MASTER_IMAGE_NAME) || ($(SUDO) docker build -t $(GRAPHITE_MASTER_IMAGE_NAME) $(GRAPHITE_MASTER_DOCKER_DIR) && $(SUDO) docker push $(GRAPHITE_MASTER_IMAGE_NAME)) 168 | 169 | clean-graphite-master: 170 | kubectl delete deployment $(GRAPHITE_MASTER_APP_NAME) || true 171 | 172 | 173 | deploy: deploy-rbac deploy-graphite-node deploy-statsd-daemon deploy-statsd-proxy deploy-carbon-relay deploy-graphite-master 174 | 175 | clean: clean-statsd-proxy clean-statsd-daemon clean-carbon-relay clean-graphite-node clean-graphite-master clean-rbac 176 | 177 | verify-statsd: 178 | kubectl exec $(name) -- cat proxyConfig.js | grep host 179 | 180 | verify-carbon: 181 | kubectl exec $(name) -- cat /opt/graphite/conf/carbon.conf | grep DESTINATIONS 182 | 183 | verify-graphite: 184 | kubectl exec $(name) -- cat /opt/graphite/webapp/graphite/local_settings.py | grep CLUSTER_SERVERS 185 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kubernetes-graphite-cluster 2 | 3 | A deployment-ready graphite cluster on top of Kubernetes. 4 | Find the full details [here](https://medium.com/@erezrabih/creating-a-graphite-cluster-on-kubernetes-6b402a8a7438#.yyaz16gzq) 5 | 6 | ## Contents: 7 | 1. A **statsd proxy** deployment and service for metric collection 8 | 2. A **statsd daemon** deployment and service for metric aggregation and shipping 9 | 2. **Carbon relay** deployment and service to spread metrics across several Graphite data nodes 10 | 3. **Graphite data nodes** as a stateful set with persistent volumes 11 | 4. **Graphite query node** to be used as a query gateway to the data nodes 12 | 13 | ## Requirements: 14 | 1. Kubernetes version 1.5.X (We're using StatefulSet) 15 | 2. kubectl configured to work with your Kubernetes API 16 | 3. Tested on Kubernetes 1.5.X/1.6.X (Without RBAC) on top of AWS/[GKE](https://github.com/nanit/kubernetes-graphite-cluster/issues/6) 17 | 4. Optional - Access to your own docker repository to store your own images. That's relevant if you don't want to use the default images offered here. 18 | 19 | ## Environment Variables: 20 | | Name | Default Value | Purpose 21 | |---------------------------------|---------------|-------------------------------------------------------------------------------------------------------------------------------------- 22 | | DOCKER_REPOSITORY | nanit | Change it if you want to build and use custom docker repository. nanit images are public so leaving it as it is should work out of the box. 23 | | SUDO | sudo | Should docker commands be prefixed with sudo. Change to "" to omit sudo. 24 | | STATSD_PROXY_REPLICAS | None | Number of replicas for statsd proxy 25 | | STATSD_DAEMON_REPLICAS | None | Number of StatsD daemons running behind the proxies. 26 | | CARBON_RELAY_REPLICAS | None | Number of replicas for carbon relay 27 | | GRAPHITE_NODE_REPLICAS | None | The number of Graphite data nodes in the cluster. This number affects both carbon relay and graphite master configuration. 28 | | GRAPHITE_NODE_DISK_SIZE | None | The size of the persistent disk to be allocated for each Graphite node. 29 | | GRAPHITE_NODE_CURATOR_RETENTION | None | Set this variable to run a cronjob which deletes metrics that haven't been written for X days. Leaving it blank will not run the curator 30 | | GRAPHITE_NODE_STORAGE_CLASS | None | The storage class for the persistent volumen claims of the Graphite node stateful set 31 | | GRAPHITE_MASTER_REPLICAS | None | Number of replicas for graphite query node 32 | 33 | ## Deployment: 34 | 1. Clone this repository 35 | 2. Run: 36 | ``` 37 | export DOCKER_REPOSITORY=nanit && \ 38 | export STATSD_PROXY_REPLICAS=3 && \ 39 | export STATSD_DAEMON_REPLICAS=2 && \ 40 | export CARBON_RELAY_REPLICAS=3 && \ 41 | export GRAPHITE_NODE_REPLICAS=3 && \ 42 | export GRAPHITE_NODE_DISK_SIZE=30G && \ 43 | export GRAPHITE_NODE_CURATOR_RETENTION=5 && \ 44 | export GRAPHITE_MASTER_REPLICAS=1 && \ 45 | export GRAPHITE_NODE_STORAGE_CLASS=default && \ 46 | export STATSD_PROXY_ADDITIONAL_YAML="" && \ 47 | export STATSD_DAEMON_ADDITIONAL_YAML="" && \ 48 | export CARBON_RELAY_ADDITIONAL_YAML="" && \ 49 | export GRAPHITE_NODE_ADDITIONAL_YAML="" && \ 50 | export SUDO="" && \ 51 | make deploy 52 | ``` 53 | ## Usage: 54 | After the deployment is done there are two endpoints of interest: 55 | 56 | 1. **statsd:8125** is the host for your metrics collection. It points the statsd proxies. 57 | 2. **graphite:80** is the host for you metrics queries. It points to the graphite query node which queries all data nodes in the cluster. 58 | 59 | Run `kubectl get pods,statefulsets,svc` and expect to see the following resources: 60 | 61 | ![K8s resources on a clean cluster](https://github.com/nanit/kubernetes-graphite-cluster/blob/master/K8s-Resources.png) 62 | 63 | The replicas of each resource may change according to your environment variables of course. 64 | 65 | 66 | ## Verifying The Deployment: 67 | To verify everything works as expected just paste the following into your terminal: 68 | 69 | ``` 70 | POD_NAME=$(kubectl get pods -l app=statsd -o jsonpath="{.items[0].metadata.name}") 71 | kubectl exec -it $POD_NAME bash 72 | for i in {1..10} 73 | do 74 | echo "test_counter:1|c" | nc -w1 -u statsd 8125 75 | sleep 1 76 | done 77 | 78 | apk --update add curl 79 | curl 'graphite/render?target=stats.counters.test_counter.count&from=-10min&format=json' 80 | ``` 81 | You should see a lot of null values along with your few increments at the end. 82 | 83 | ## Building your own images 84 | If you want to build use your own images make sure to change the DOCKER_REPOSITORY environment variable to your own docker repository. 85 | It will build the images, push them to your docker repository and use them to create all the needed kubernetes deployments. 86 | 87 | ## Changing an active cluster configuration 88 | 89 | Graphite nodes and StatsD daemons are deployed as StatefulSets. 90 | The StatsD proxies continuously watch the Kubernetes API for StatsD daemon endpoints and updates the configuration. 91 | Both Graphite master and carbon relays continuously watch the Kubernetes API for Graphite nodes endpoints and update the configuration. 92 | 93 | That means you can scale each part independently, and the system reacts to your changes by updating its config file accordingly. 94 | 95 | ## Acknowledgement 96 | 97 | 1. I have learnt a lot about Graphite clustering from [this excellent article](https://grey-boundary.io/the-architecture-of-clustering-graphite) 98 | 2. The docker images for the graphite nodes are based on [this repository](https://github.com/nickstenning/docker-graphite) 99 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | services: 3 | statsd-proxy: 4 | build: statsd-proxy 5 | links: 6 | - statsd-1 7 | - statsd-2 8 | ports: 9 | - "8125:8125/udp" 10 | - "8126:8126" 11 | statsd-1: 12 | build: statsd 13 | links: 14 | - relay 15 | statsd-2: 16 | build: statsd 17 | links: 18 | - relay 19 | relay: 20 | build: graphite/relay 21 | links: 22 | - node-1 23 | - node-2 24 | - node-3 25 | node-1: 26 | build: graphite/node 27 | node-2: 28 | build: graphite/node 29 | node-3: 30 | build: graphite/node 31 | master: 32 | build: graphite/master 33 | links: 34 | - node-1 35 | - node-2 36 | - node-3 37 | ports: 38 | - "80:80" 39 | -------------------------------------------------------------------------------- /docker/carbon-relay/Dockerfile: -------------------------------------------------------------------------------- 1 | from ubuntu:14.04 2 | run echo 'deb http://us.archive.ubuntu.com/ubuntu/ trusty universe' >> /etc/apt/sources.list 3 | # Install required packages 4 | RUN apt-get -y update &&\ 5 | apt-get -y install software-properties-common python-django-tagging python-simplejson \ 6 | python-memcache python-ldap python-cairo python-pysqlite2 python-support python-pip \ 7 | gunicorn supervisor nginx-light git wget curl build-essential python-dev libffi-dev vim jq 8 | run curl -sL https://deb.nodesource.com/setup_8.x | sudo -E bash - 9 | run apt-get install -y nodejs 10 | RUN pip install Twisted==13.2.0 11 | RUN pip install pytz 12 | RUN git clone https://github.com/graphite-project/whisper.git /src/whisper &&\ 13 | cd /src/whisper &&\ 14 | git checkout 1.0.x &&\ 15 | python setup.py install 16 | 17 | RUN git clone https://github.com/graphite-project/carbon.git /src/carbon &&\ 18 | cd /src/carbon &&\ 19 | git checkout 1.0.x &&\ 20 | python setup.py install 21 | 22 | 23 | add conf/carbon.conf.template /opt/graphite/conf/carbon.conf.template 24 | add conf/storage-schemas.conf /opt/graphite/conf/storage-schemas.conf 25 | add ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf 26 | 27 | RUN mkdir /kube-watch 28 | RUN cd /kube-watch && npm install hashring kubernetes-client@5 json-stream 29 | add kube-watch.js /kube-watch/kube-watch.js 30 | 31 | EXPOSE 2003 32 | 33 | CMD ["/usr/bin/supervisord"] 34 | -------------------------------------------------------------------------------- /docker/carbon-relay/conf/carbon.conf.template: -------------------------------------------------------------------------------- 1 | [relay] 2 | LINE_RECEIVER_INTERFACE = 0.0.0.0 3 | LINE_RECEIVER_PORT = 2003 4 | PICKLE_RECEIVER_INTERFACE = 0.0.0.0 5 | PICKLE_RECEIVER_PORT = 2004 6 | 7 | # Carbon-relay has several options for metric routing controlled by RELAY_METHOD 8 | # 9 | # Use relay-rules.conf to route metrics to destinations based on pattern rules 10 | #RELAY_METHOD = rules 11 | # 12 | # Use consistent-hashing for even distribution of metrics between destinations 13 | RELAY_METHOD = consistent-hashing 14 | # 15 | # Use consistent-hashing but take into account an aggregation-rules.conf shared 16 | # by downstream carbon-aggregator daemons. This will ensure that all metrics 17 | # that map to a given aggregation rule are sent to the same carbon-aggregator 18 | # instance. 19 | # Enable this for carbon-relays that send to a group of carbon-aggregators 20 | #RELAY_METHOD = aggregated-consistent-hashing 21 | # 22 | # You can also use fast-hashing and fast-aggregated-hashing which are in O(1) 23 | # and will always redirect the metrics to the same destination but do not try 24 | # to minimize rebalancing when the list of destinations is changing. 25 | # RELAY_METHOD = rules 26 | 27 | # If you use consistent-hashing you can add redundancy by replicating every 28 | # datapoint to more than one machine. 29 | REPLICATION_FACTOR = 2 30 | 31 | # For REPLICATION_FACTOR >=2, set DIVERSE_REPLICAS to True to guarantee replicas 32 | # across distributed hosts. With this setting disabled, it's possible that replicas 33 | # may be sent to different caches on the same host. This has been the default 34 | # behavior since introduction of 'consistent-hashing' relay method. 35 | # Note that enabling this on an existing pre-0.9.14 cluster will require rebalancing 36 | # your metrics across the cluster nodes using a tool like Carbonate. 37 | #DIVERSE_REPLICAS = True 38 | 39 | # This is a list of carbon daemons we will send any relayed or 40 | # generated metrics to. The default provided would send to a single 41 | # carbon-cache instance on the default port. However if you 42 | # use multiple carbon-cache instances then it would look like this: 43 | # 44 | # DESTINATIONS = 127.0.0.1:2004:a, 127.0.0.1:2104:b 45 | # 46 | # The general form is IP:PORT:INSTANCE where the :INSTANCE part is 47 | # optional and refers to the "None" instance if omitted. 48 | # 49 | # Note that if the destinations are all carbon-caches then this should 50 | # exactly match the webapp's CARBONLINK_HOSTS setting in terms of 51 | # instances listed (order matters!). 52 | # 53 | # If using RELAY_METHOD = rules, all destinations used in relay-rules.conf 54 | # must be defined in this list 55 | DESTINATIONS = @@GRAPHITE_NODES@@ 56 | 57 | # This is the maximum number of datapoints that can be queued up 58 | # for a single destination. Once this limit is hit, we will 59 | # stop accepting new data if USE_FLOW_CONTROL is True, otherwise 60 | # we will drop any subsequently received datapoints. 61 | MAX_QUEUE_SIZE = 10000 62 | 63 | # This defines the maximum "message size" between carbon daemons. If 64 | # your queue is large, setting this to a lower number will cause the 65 | # relay to forward smaller discrete chunks of stats, which may prevent 66 | # overloading on the receiving side after a disconnect. 67 | MAX_DATAPOINTS_PER_MESSAGE = 500 68 | 69 | # Limit the number of open connections the receiver can handle as any time. 70 | # Default is no limit. Setting up a limit for sites handling high volume 71 | # traffic may be recommended to avoid running out of TCP memory or having 72 | # thousands of TCP connections reduce the throughput of the service. 73 | #MAX_RECEIVER_CONNECTIONS = inf 74 | 75 | # Specify the user to drop privileges to 76 | # If this is blank carbon-relay runs as the user that invokes it 77 | # USER = 78 | 79 | # This is the percentage that the queue must be empty before it will accept 80 | # more messages. For a larger site, if the queue is very large it makes sense 81 | # to tune this to allow for incoming stats. So if you have an average 82 | # flow of 100k stats/minute, and a MAX_QUEUE_SIZE of 3,000,000, it makes sense 83 | # to allow stats to start flowing when you've cleared the queue to 95% since 84 | # you should have space to accommodate the next minute's worth of stats 85 | # even before the relay incrementally clears more of the queue 86 | QUEUE_LOW_WATERMARK_PCT = 0.8 87 | 88 | # To allow for batch efficiency from the pickle protocol and to benefit from 89 | # other batching advantages, all writes are deferred by putting them into a queue, 90 | # and then the queue is flushed and sent a small fraction of a second later. 91 | TIME_TO_DEFER_SENDING = 0.0001 92 | 93 | # Set this to False to drop datapoints when any send queue (sending datapoints 94 | # to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the 95 | # default) then sockets over which metrics are received will temporarily stop accepting 96 | # data until the send queues fall below QUEUE_LOW_WATERMARK_PCT * MAX_QUEUE_SIZE. 97 | USE_FLOW_CONTROL = True 98 | 99 | # If enabled this setting is used to timeout metric client connection if no 100 | # metrics have been sent in specified time in seconds 101 | #METRIC_CLIENT_IDLE_TIMEOUT = None 102 | 103 | # Set this to True to enable whitelisting and blacklisting of metrics in 104 | # CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is 105 | # missing or empty, all metrics will pass through 106 | # USE_WHITELIST = False 107 | 108 | # By default, carbon itself will log statistics (such as a count, 109 | # metricsReceived) with the top level prefix of 'carbon' at an interval of 60 110 | # seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation 111 | # CARBON_METRIC_PREFIX = carbon 112 | # CARBON_METRIC_INTERVAL = 60 113 | # 114 | # In order to turn off logging of successful connections for the line 115 | # receiver, set this to False 116 | LOG_LISTENER_CONN_SUCCESS = False 117 | 118 | # If you're connecting from the relay to a destination that's over the 119 | # internet or similarly iffy connection, a backlog can develop because 120 | # of internet weather conditions, e.g. acks getting lost or similar issues. 121 | # To deal with that, you can enable USE_RATIO_RESET which will let you 122 | # re-set the connection to an individual destination. Defaults to being off. 123 | USE_RATIO_RESET=False 124 | 125 | # When there is a small number of stats flowing, it's not desirable to 126 | # perform any actions based on percentages - it's just too "twitchy". 127 | MIN_RESET_STAT_FLOW=1000 128 | 129 | # When the ratio of stats being sent in a reporting interval is far 130 | # enough from 1.0, we will disconnect the socket and reconnecto to 131 | # clear out queued stats. The default ratio of 0.9 indicates that 10% 132 | # of stats aren't being delivered within one CARBON_METRIC_INTERVAL 133 | # (default of 60 seconds), which can lead to a queue backup. Under 134 | # some circumstances re-setting the connection can fix this, so 135 | # set this according to your tolerance, and look in the logs for 136 | # "resetConnectionForQualityReasons" to observe whether this is kicking 137 | # in when your sent queue is building up. 138 | MIN_RESET_RATIO=0.9 139 | 140 | # The minimum time between resets. When a connection is re-set, we 141 | # need to wait before another reset is performed. 142 | # (2*CARBON_METRIC_INTERVAL) + 1 second is the minimum time needed 143 | # before stats for the new connection will be available. Setting this 144 | # below (2*CARBON_METRIC_INTERVAL) + 1 second will result in a lot of 145 | # reset connections for no good reason. 146 | MIN_RESET_INTERVAL=121 147 | -------------------------------------------------------------------------------- /docker/carbon-relay/conf/storage-schemas.conf: -------------------------------------------------------------------------------- 1 | # Schema definitions for Whisper files. Entries are scanned in order, 2 | # and first match wins. This file is scanned for changes every 60 seconds. 3 | # 4 | # Definition Syntax: 5 | # 6 | # [name] 7 | # pattern = regex 8 | # retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ... 9 | # 10 | # Remember: To support accurate aggregation from higher to lower resolution 11 | # archives, the precision of a longer retention archive must be 12 | # cleanly divisible by precision of next lower retention archive. 13 | # 14 | # Valid: 60s:7d,300s:30d (300/60 = 5) 15 | # Invalid: 180s:7d,300s:30d (300/180 = 3.333) 16 | # 17 | 18 | # Carbon's internal metrics. This entry should match what is specified in 19 | # CARBON_METRIC_PREFIX and CARBON_METRIC_INTERVAL settings 20 | 21 | [carbon] 22 | pattern = ^carbon\. 23 | retentions = 10s:6h,1m:90d 24 | 25 | [default_1min_for_1day] 26 | pattern = .* 27 | retentions = 5s:1d,1m:7d,10m:1800d 28 | -------------------------------------------------------------------------------- /docker/carbon-relay/kube-watch.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const Client = require('kubernetes-client').Client; 3 | const config = require('kubernetes-client').config; 4 | const client = new Client({ config: config.getInCluster() }); 5 | const JSONStream = require('json-stream'); 6 | const jsonStream = new JSONStream(); 7 | const configFileTemplate="/opt/graphite/conf/carbon.conf.template"; 8 | const configFileTarget="/opt/graphite/conf/carbon.conf"; 9 | const processToRestart="carbon-relay"; 10 | const configTemplate = fs.readFileSync(configFileTemplate, 'utf8'); 11 | const exec = require('child_process').exec; 12 | const namespace = fs.readFileSync('/var/run/secrets/kubernetes.io/serviceaccount/namespace', 'utf8').toString(); 13 | 14 | function restartProcess() { 15 | exec(`supervisorctl restart ${processToRestart}`, (error, stdout, stderr) => { 16 | if (error) { 17 | console.error(error); 18 | return; 19 | } 20 | console.log(stdout); 21 | console.error(stderr); 22 | }); 23 | } 24 | 25 | function getNodes(endpoints) { 26 | return endpoints.subsets ? endpoints.subsets[0].addresses.map(e => `${e.ip}:2004`).join(",") : ""; 27 | } 28 | 29 | function changeConfig(endpoints) { 30 | var result = configTemplate.replace(/@@GRAPHITE_NODES@@/g, getNodes(endpoints)); 31 | fs.writeFileSync(configFileTarget, result); 32 | restartProcess(); 33 | } 34 | 35 | async function main() { 36 | await client.loadSpec(); 37 | const stream = client.apis.v1.ns(namespace).endpoints.getStream({ qs: { watch: true, fieldSelector: 'metadata.name=graphite-node' } }); 38 | stream.pipe(jsonStream); 39 | jsonStream.on('data', obj => { 40 | if (!obj) { 41 | return; 42 | } 43 | console.log('Received update:', JSON.stringify(obj)); 44 | changeConfig(obj.object); 45 | }); 46 | } 47 | 48 | try { 49 | main(); 50 | } catch (error) { 51 | console.error(error); 52 | process.exit(1); 53 | } 54 | -------------------------------------------------------------------------------- /docker/carbon-relay/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon = true 3 | environment = GRAPHITE_STORAGE_DIR='/opt/graphite/storage',GRAPHITE_CONF_DIR='/opt/graphite/conf' 4 | 5 | [program:carbon-relay] 6 | user = root 7 | command = /opt/graphite/bin/carbon-relay.py --debug start 8 | stdout_logfile = /var/log/supervisor/%(program_name)s.log 9 | stderr_logfile = /var/log/supervisor/%(program_name)s.log 10 | autorestart = true 11 | 12 | [program:kube-watch] 13 | user = root 14 | directory = /kube-watch 15 | command = node kube-watch.js 16 | stdout_logfile = /var/log/supervisor/%(program_name)s.log 17 | stderr_logfile = /var/log/supervisor/%(program_name)s.log 18 | autorestart = true 19 | -------------------------------------------------------------------------------- /docker/graphite-master/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nanit/kubernetes-graphite-cluster/900a89cce137b167345c162a6554e7a30ea9b3e9/docker/graphite-master/.DS_Store -------------------------------------------------------------------------------- /docker/graphite-master/Dockerfile: -------------------------------------------------------------------------------- 1 | from ubuntu:14.04 2 | run echo 'deb http://us.archive.ubuntu.com/ubuntu/ trusty universe' >> /etc/apt/sources.list 3 | run apt-get -y update 4 | 5 | # Install required packages 6 | RUN apt-get -y update &&\ 7 | apt-get -y install software-properties-common python-django-tagging python-simplejson \ 8 | python-memcache python-ldap python-cairo python-pysqlite2 python-support python-pip \ 9 | gunicorn supervisor nginx-light git wget curl build-essential python-dev libffi-dev vim jq 10 | run curl -sL https://deb.nodesource.com/setup_8.x | sudo -E bash - 11 | run apt-get install -y nodejs 12 | RUN pip install Twisted==13.2.0 13 | RUN pip install pytz 14 | RUN git clone https://github.com/graphite-project/whisper.git /src/whisper &&\ 15 | cd /src/whisper &&\ 16 | git checkout 1.0.x &&\ 17 | python setup.py install 18 | 19 | RUN git clone https://github.com/graphite-project/carbon.git /src/carbon &&\ 20 | cd /src/carbon &&\ 21 | git checkout 1.0.x &&\ 22 | python setup.py install 23 | 24 | 25 | RUN git clone https://github.com/graphite-project/graphite-web.git /src/graphite-web &&\ 26 | cd /src/graphite-web &&\ 27 | git checkout 1.0.x &&\ 28 | python setup.py install &&\ 29 | pip install -r requirements.txt &&\ 30 | python check-dependencies.py 31 | 32 | # fixes fatal error "Your WhiteNoise configuration is incompatible with WhiteNoise v4.0" 33 | RUN /usr/bin/yes | pip uninstall whitenoise &&\ 34 | pip install "whitenoise<4" 35 | 36 | # Add system service config 37 | add ./nginx/nginx.conf /etc/nginx/nginx.conf 38 | add ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf 39 | 40 | # Add graphite config 41 | add ./webapp/initial_data.json /opt/graphite/webapp/graphite/initial_data.json 42 | add ./webapp/local_settings.py.template /opt/graphite/webapp/graphite/local_settings.py.template 43 | add ./conf/carbon.conf /opt/graphite/conf/carbon.conf 44 | add ./conf/storage-schemas.conf /opt/graphite/conf/storage-schemas.conf 45 | add ./conf/storage-aggregation.conf /opt/graphite/conf/storage-aggregation.conf 46 | run mkdir -p /opt/graphite/storage/whisper 47 | run touch /opt/graphite/storage/graphite.db /opt/graphite/storage/index 48 | run chmod 0775 /opt/graphite/storage /opt/graphite/storage/whisper 49 | run chmod 0664 /opt/graphite/storage/graphite.db 50 | run cp /src/graphite-web/webapp/manage.py /opt/graphite/webapp 51 | 52 | RUN mkdir /kube-watch 53 | RUN cd /kube-watch && npm install hashring kubernetes-client@5 json-stream 54 | add kube-watch.js /kube-watch/kube-watch.js 55 | 56 | add entrypoint.sh /entrypoint.sh 57 | # Nginx 58 | expose 80 59 | # Carbon line receiver port 60 | # expose 2003 61 | # Carbon UDP receiver port 62 | # expose 2003/udp 63 | # Carbon pickle receiver port 64 | # expose 2004 65 | # Carbon cache query port 66 | # expose 7002 67 | 68 | cmd ["/entrypoint.sh"] 69 | -------------------------------------------------------------------------------- /docker/graphite-master/README.md: -------------------------------------------------------------------------------- 1 | ## Graphite + Carbon 2 | 3 | An all-in-one image running graphite and carbon-cache. **Version**: 0.9.12. 4 | 5 | This image contains a sensible default configuration of graphite and 6 | carbon-cache. Starting this container will, by default, bind the the following 7 | host ports: 8 | 9 | - `80`: the graphite web interface 10 | - `2003`: the carbon-cache line receiver (the standard graphite protocol) 11 | - `2004`: the carbon-cache pickle receiver 12 | - `7002`: the carbon-cache query port (used by the web interface) 13 | 14 | With this image, you can get up and running with graphite by simply running: 15 | 16 | docker run -d nickstenning/graphite 17 | 18 | If you already have services running on the host on one or more of these ports, 19 | you may wish to allow docker to assign random ports on the host. You can do this 20 | easily by running: 21 | 22 | docker run -p 80 -p 2003 -p 2004 -p 7002 -d nickstenning/graphite 23 | 24 | You can log into the administrative interface of graphite-web (a Django 25 | application) with the username `admin` and password `admin`. These passwords can 26 | be changed through the web interface. 27 | 28 | **N.B.** Please be aware that by default docker will make the exposed ports 29 | accessible from anywhere if the host firewall is unconfigured. 30 | 31 | ### Data volumes 32 | 33 | Graphite data is stored at `/var/lib/graphite/storage/whisper` within the 34 | container. If you wish to store your metrics outside the container (highly 35 | recommended) you can use docker's data volumes feature. For example, to store 36 | graphite's metric database at `/data/graphite` on the host, you could use: 37 | 38 | docker run -v /data/graphite:/var/lib/graphite/storage/whisper \ 39 | -d nickstenning/graphite 40 | 41 | **N.B.** You will need to run the container with suitable permissions to write 42 | to the data volume directory. Carbon and the graphite webapp run as `www-data` 43 | inside the container, but this UID/GID may be mapped inconsistently on the host. 44 | 45 | ### Technical details 46 | 47 | By default, this instance of carbon-cache uses the following retention periods 48 | resulting in whisper files of approximately 2.5MiB. 49 | 50 | 10s:8d,1m:31d,10m:1y,1h:5y 51 | 52 | For more information, see [the 53 | repository](https://github.com/nickstenning/dockerfiles/tree/master/graphite). 54 | -------------------------------------------------------------------------------- /docker/graphite-master/conf/carbon.conf: -------------------------------------------------------------------------------- 1 | [cache] 2 | # Configure carbon directories. 3 | # 4 | # OS environment variables can be used to tell carbon where graphite is 5 | # installed, where to read configuration from and where to write data. 6 | # 7 | # GRAPHITE_ROOT - Root directory of the graphite installation. 8 | # Defaults to ../ 9 | # GRAPHITE_CONF_DIR - Configuration directory (where this file lives). 10 | # Defaults to $GRAPHITE_ROOT/conf/ 11 | # GRAPHITE_STORAGE_DIR - Storage directory for whisper/rrd/log/pid files. 12 | # Defaults to $GRAPHITE_ROOT/storage/ 13 | # 14 | # To change other directory paths, add settings to this file. The following 15 | # configuration variables are available with these default values: 16 | # 17 | # STORAGE_DIR = $GRAPHITE_STORAGE_DIR 18 | # LOCAL_DATA_DIR = %(STORAGE_DIR)s/whisper/ 19 | # WHITELISTS_DIR = %(STORAGE_DIR)s/lists/ 20 | # CONF_DIR = %(STORAGE_DIR)s/conf/ 21 | # LOG_DIR = %(STORAGE_DIR)s/log/ 22 | # PID_DIR = %(STORAGE_DIR)s/ 23 | # 24 | # For FHS style directory structures, use: 25 | # 26 | # STORAGE_DIR = /var/lib/carbon/ 27 | # CONF_DIR = /etc/carbon/ 28 | # LOG_DIR = /var/log/carbon/ 29 | # PID_DIR = /var/run/ 30 | # 31 | #LOCAL_DATA_DIR = /opt/graphite/storage/whisper/ 32 | 33 | # Specify the database library used to store metric data on disk. Each database 34 | # may have configurable options to change the behaviour of how it writes to 35 | # persistent storage. 36 | # 37 | # whisper - Fixed-size database, similar in design and purpose to RRD. This is 38 | # the default storage backend for carbon and the most rigorously tested. 39 | # 40 | # ceres - Experimental alternative database that supports storing data in sparse 41 | # files of arbitrary fixed-size resolutions. 42 | DATABASE = whisper 43 | 44 | # Enable daily log rotation. If disabled, a new file will be opened whenever the log file path no 45 | # longer exists (i.e. it is removed or renamed) 46 | ENABLE_LOGROTATION = True 47 | 48 | # Specify the user to drop privileges to 49 | # If this is blank carbon-cache runs as the user that invokes it 50 | # This user must have write access to the local data directory 51 | USER = 52 | 53 | # Limit the size of the cache to avoid swapping or becoming CPU bound. 54 | # Sorts and serving cache queries gets more expensive as the cache grows. 55 | # Use the value "inf" (infinity) for an unlimited cache size. 56 | # value should be an integer number of metric datapoints. 57 | MAX_CACHE_SIZE = inf 58 | 59 | # Limits the number of whisper update_many() calls per second, which effectively 60 | # means the number of write requests sent to the disk. This is intended to 61 | # prevent over-utilizing the disk and thus starving the rest of the system. 62 | # When the rate of required updates exceeds this, then carbon's caching will 63 | # take effect and increase the overall throughput accordingly. 64 | MAX_UPDATES_PER_SECOND = 500 65 | 66 | # If defined, this changes the MAX_UPDATES_PER_SECOND in Carbon when a 67 | # stop/shutdown is initiated. This helps when MAX_UPDATES_PER_SECOND is 68 | # relatively low and carbon has cached a lot of updates; it enables the carbon 69 | # daemon to shutdown more quickly. 70 | # MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1000 71 | 72 | # Softly limits the number of whisper files that get created each minute. 73 | # Setting this value low (e.g. 50) is a good way to ensure that your carbon 74 | # system will not be adversely impacted when a bunch of new metrics are 75 | # sent to it. The trade off is that any metrics received in excess of this 76 | # value will be silently dropped, and the whisper file will not be created 77 | # until such point as a subsequent metric is received and fits within the 78 | # defined rate limit. Setting this value high (like "inf" for infinity) will 79 | # cause carbon to create the files quickly but at the risk of increased I/O. 80 | MAX_CREATES_PER_MINUTE = 50 81 | 82 | # Set the interface and port for the line (plain text) listener. Setting the 83 | # interface to 0.0.0.0 listens on all interfaces. Port can be set to 0 to 84 | # disable this listener if it is not required. 85 | LINE_RECEIVER_INTERFACE = 0.0.0.0 86 | LINE_RECEIVER_PORT = 2003 87 | 88 | # Set this to True to enable the UDP listener. By default this is off 89 | # because it is very common to run multiple carbon daemons and managing 90 | # another (rarely used) port for every carbon instance is not fun. 91 | ENABLE_UDP_LISTENER = True 92 | UDP_RECEIVER_INTERFACE = 0.0.0.0 93 | UDP_RECEIVER_PORT = 2003 94 | 95 | # Set the interface and port for the pickle listener. Setting the interface to 96 | # 0.0.0.0 listens on all interfaces. Port can be set to 0 to disable this 97 | # listener if it is not required. 98 | PICKLE_RECEIVER_INTERFACE = 0.0.0.0 99 | PICKLE_RECEIVER_PORT = 2004 100 | 101 | # Limit the number of open connections the receiver can handle as any time. 102 | # Default is no limit. Setting up a limit for sites handling high volume 103 | # traffic may be recommended to avoid running out of TCP memory or having 104 | # thousands of TCP connections reduce the throughput of the service. 105 | #MAX_RECEIVER_CONNECTIONS = inf 106 | 107 | # Per security concerns outlined in Bug #817247 the pickle receiver 108 | # will use a more secure and slightly less efficient unpickler. 109 | # Set this to True to revert to the old-fashioned insecure unpickler. 110 | USE_INSECURE_UNPICKLER = False 111 | 112 | CACHE_QUERY_INTERFACE = 0.0.0.0 113 | CACHE_QUERY_PORT = 7002 114 | 115 | # Set this to False to drop datapoints received after the cache 116 | # reaches MAX_CACHE_SIZE. If this is True (the default) then sockets 117 | # over which metrics are received will temporarily stop accepting 118 | # data until the cache size falls below 95% MAX_CACHE_SIZE. 119 | USE_FLOW_CONTROL = True 120 | 121 | # If enabled this setting is used to timeout metric client connection if no 122 | # metrics have been sent in specified time in seconds 123 | #METRIC_CLIENT_IDLE_TIMEOUT = None 124 | 125 | # By default, carbon-cache will log every whisper update and cache hit. 126 | # This can be excessive and degrade performance if logging on the same 127 | # volume as the whisper data is stored. 128 | LOG_UPDATES = False 129 | LOG_CREATES = False 130 | LOG_CACHE_HITS = False 131 | LOG_CACHE_QUEUE_SORTS = False 132 | 133 | # The thread that writes metrics to disk can use one of the following strategies 134 | # determining the order in which metrics are removed from cache and flushed to 135 | # disk. The default option preserves the same behavior as has been historically 136 | # available in version 0.9.10. 137 | # 138 | # sorted - All metrics in the cache will be counted and an ordered list of 139 | # them will be sorted according to the number of datapoints in the cache at the 140 | # moment of the list's creation. Metrics will then be flushed from the cache to 141 | # disk in that order. 142 | # 143 | # max - The writer thread will always pop and flush the metric from cache 144 | # that has the most datapoints. This will give a strong flush preference to 145 | # frequently updated metrics and will also reduce random file-io. Infrequently 146 | # updated metrics may only ever be persisted to disk at daemon shutdown if 147 | # there are a large number of metrics which receive very frequent updates OR if 148 | # disk i/o is very slow. 149 | # 150 | # naive - Metrics will be flushed from the cache to disk in an unordered 151 | # fashion. This strategy may be desirable in situations where the storage for 152 | # whisper files is solid state, CPU resources are very limited or deference to 153 | # the OS's i/o scheduler is expected to compensate for the random write 154 | # pattern. 155 | # 156 | CACHE_WRITE_STRATEGY = sorted 157 | 158 | # On some systems it is desirable for whisper to write synchronously. 159 | # Set this option to True if you'd like to try this. Basically it will 160 | # shift the onus of buffering writes from the kernel into carbon's cache. 161 | WHISPER_AUTOFLUSH = False 162 | 163 | # By default new Whisper files are created pre-allocated with the data region 164 | # filled with zeros to prevent fragmentation and speed up contiguous reads and 165 | # writes (which are common). Enabling this option will cause Whisper to create 166 | # the file sparsely instead. Enabling this option may allow a large increase of 167 | # MAX_CREATES_PER_MINUTE but may have longer term performance implications 168 | # depending on the underlying storage configuration. 169 | # WHISPER_SPARSE_CREATE = False 170 | 171 | # Only beneficial on linux filesystems that support the fallocate system call. 172 | # It maintains the benefits of contiguous reads/writes, but with a potentially 173 | # much faster creation speed, by allowing the kernel to handle the block 174 | # allocation and zero-ing. Enabling this option may allow a large increase of 175 | # MAX_CREATES_PER_MINUTE. If enabled on an OS or filesystem that is unsupported 176 | # this option will gracefully fallback to standard POSIX file access methods. 177 | WHISPER_FALLOCATE_CREATE = True 178 | 179 | # Enabling this option will cause Whisper to lock each Whisper file it writes 180 | # to with an exclusive lock (LOCK_EX, see: man 2 flock). This is useful when 181 | # multiple carbon-cache daemons are writing to the same files. 182 | # WHISPER_LOCK_WRITES = False 183 | 184 | # On systems which has a large number of metrics, an amount of Whisper write(2)'s 185 | # pageback sometimes cause disk thrashing due to memory shortage, so that abnormal 186 | # disk reads occur. Enabling this option makes it possible to decrease useless 187 | # page cache memory by posix_fadvise(2) with POSIX_FADVISE_RANDOM option. 188 | # WHISPER_FADVISE_RANDOM = False 189 | 190 | # By default all nodes stored in Ceres are cached in memory to improve the 191 | # throughput of reads and writes to underlying slices. Turning this off will 192 | # greatly reduce memory consumption for databases with millions of metrics, at 193 | # the cost of a steep increase in disk i/o, approximately an extra two os.stat 194 | # calls for every read and write. Reasons to do this are if the underlying 195 | # storage can handle stat() with practically zero cost (SSD, NVMe, zRAM). 196 | # Valid values are: 197 | # all - all nodes are cached 198 | # none - node caching is disabled 199 | # CERES_NODE_CACHING_BEHAVIOR = all 200 | 201 | # Ceres nodes can have many slices and caching the right ones can improve 202 | # performance dramatically. Note that there are many trade-offs to tinkering 203 | # with this, and unless you are a ceres developer you *really* should not 204 | # mess with this. Valid values are: 205 | # latest - only the most recent slice is cached 206 | # all - all slices are cached 207 | # none - slice caching is disabled 208 | # CERES_SLICE_CACHING_BEHAVIOR = latest 209 | 210 | # If a Ceres node accumulates too many slices, performance can suffer. 211 | # This can be caused by intermittently reported data. To mitigate 212 | # slice fragmentation there is a tolerance for how much space can be 213 | # wasted within a slice file to avoid creating a new one. That tolerance 214 | # level is determined by MAX_SLICE_GAP, which is the number of consecutive 215 | # null datapoints allowed in a slice file. 216 | # If you set this very low, you will waste less of the *tiny* bit disk space 217 | # that this feature wastes, and you will be prone to performance problems 218 | # caused by slice fragmentation, which can be pretty severe. 219 | # If you set this really high, you will waste a bit more disk space (each 220 | # null datapoint wastes 8 bytes, but keep in mind your filesystem's block 221 | # size). If you suffer slice fragmentation issues, you should increase this or 222 | # run the ceres-maintenance defrag plugin more often. However you should not 223 | # set it to be huge because then if a large but allowed gap occurs it has to 224 | # get filled in, which means instead of a simple 8-byte write to a new file we 225 | # could end up doing an (8 * MAX_SLICE_GAP)-byte write to the latest slice. 226 | # CERES_MAX_SLICE_GAP = 80 227 | 228 | # Enabling this option will cause Ceres to lock each Ceres file it writes to 229 | # to with an exclusive lock (LOCK_EX, see: man 2 flock). This is useful when 230 | # multiple carbon-cache daemons are writing to the same files. 231 | # CERES_LOCK_WRITES = False 232 | 233 | # Set this to True to enable whitelisting and blacklisting of metrics in 234 | # CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is 235 | # missing or empty, all metrics will pass through 236 | # USE_WHITELIST = False 237 | 238 | # By default, carbon itself will log statistics (such as a count, 239 | # metricsReceived) with the top level prefix of 'carbon' at an interval of 60 240 | # seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation 241 | # CARBON_METRIC_PREFIX = carbon 242 | # CARBON_METRIC_INTERVAL = 60 243 | 244 | # Enable AMQP if you want to receve metrics using an amqp broker 245 | # ENABLE_AMQP = False 246 | 247 | # Verbose means a line will be logged for every metric received 248 | # useful for testing 249 | # AMQP_VERBOSE = False 250 | 251 | # AMQP_HOST = localhost 252 | # AMQP_PORT = 5672 253 | # AMQP_VHOST = / 254 | # AMQP_USER = guest 255 | # AMQP_PASSWORD = guest 256 | # AMQP_EXCHANGE = graphite 257 | # AMQP_METRIC_NAME_IN_BODY = False 258 | 259 | # The manhole interface allows you to SSH into the carbon daemon 260 | # and get a python interpreter. BE CAREFUL WITH THIS! If you do 261 | # something like time.sleep() in the interpreter, the whole process 262 | # will sleep! This is *extremely* helpful in debugging, assuming 263 | # you are familiar with the code. If you are not, please don't 264 | # mess with this, you are asking for trouble :) 265 | # 266 | # ENABLE_MANHOLE = False 267 | # MANHOLE_INTERFACE = 127.0.0.1 268 | # MANHOLE_PORT = 7222 269 | # MANHOLE_USER = admin 270 | # MANHOLE_PUBLIC_KEY = ssh-rsa AAAAB3NzaC1yc2EAAAABiwAaAIEAoxN0sv/e4eZCPpi3N3KYvyzRaBaMeS2RsOQ/cDuKv11dlNzVeiyc3RFmCv5Rjwn/lQ79y0zyHxw67qLyhQ/kDzINc4cY41ivuQXm2tPmgvexdrBv5nsfEpjs3gLZfJnyvlcVyWK/lId8WUvEWSWHTzsbtmXAF2raJMdgLTbQ8wE= 271 | 272 | # Patterns for all of the metrics this machine will store. Read more at 273 | # http://en.wikipedia.org/wiki/Advanced_Message_Queuing_Protocol#Bindings 274 | # 275 | # Example: store all sales, linux servers, and utilization metrics 276 | # BIND_PATTERNS = sales.#, servers.linux.#, #.utilization 277 | # 278 | # Example: store everything 279 | # BIND_PATTERNS = # 280 | 281 | # To configure special settings for the carbon-cache instance 'b', uncomment this: 282 | #[cache:b] 283 | #LINE_RECEIVER_PORT = 2103 284 | #PICKLE_RECEIVER_PORT = 2104 285 | #CACHE_QUERY_PORT = 7102 286 | # and any other settings you want to customize, defaults are inherited 287 | # from the [cache] section. 288 | # You can then specify the --instance=b option to manage this instance 289 | # 290 | # In order to turn off logging of successful connections for the line 291 | # receiver, set this to False 292 | # LOG_LISTENER_CONN_SUCCESS = True 293 | 294 | -------------------------------------------------------------------------------- /docker/graphite-master/conf/storage-aggregation.conf: -------------------------------------------------------------------------------- 1 | # Aggregation methods for whisper files. Entries are scanned in order, 2 | # and first match wins. This file is scanned for changes every 60 seconds 3 | # 4 | # [name] 5 | # pattern = 6 | # xFilesFactor = 7 | # aggregationMethod = 8 | # 9 | # name: Arbitrary unique name for the rule 10 | # pattern: Regex pattern to match against the metric name 11 | # xFilesFactor: Ratio of valid data points required for aggregation to the next retention to occur 12 | # aggregationMethod: function to apply to data points for aggregation 13 | # 14 | [min] 15 | pattern = \.lower$ 16 | xFilesFactor = 0.1 17 | aggregationMethod = min 18 | 19 | [max] 20 | pattern = \.upper(_\d+)?$ 21 | xFilesFactor = 0.1 22 | aggregationMethod = max 23 | 24 | [sum] 25 | pattern = \.sum$ 26 | xFilesFactor = 0 27 | aggregationMethod = sum 28 | 29 | [avg_count] 30 | pattern = \.avg\.count$ 31 | xFilesFactor = 0 32 | aggregationMethod = average 33 | 34 | [count] 35 | pattern = \.count$ 36 | xFilesFactor = 0 37 | aggregationMethod = sum 38 | 39 | [count_legacy] 40 | pattern = ^stats_counts.* 41 | xFilesFactor = 0 42 | aggregationMethod = sum 43 | 44 | [default_average] 45 | pattern = .* 46 | xFilesFactor = 0 47 | aggregationMethod = average 48 | -------------------------------------------------------------------------------- /docker/graphite-master/conf/storage-schemas.conf: -------------------------------------------------------------------------------- 1 | # Schema definitions for Whisper files. Entries are scanned in order, 2 | # and first match wins. This file is scanned for changes every 60 seconds. 3 | # 4 | # Definition Syntax: 5 | # 6 | # [name] 7 | # pattern = regex 8 | # retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ... 9 | # 10 | # Remember: To support accurate aggregation from higher to lower resolution 11 | # archives, the precision of a longer retention archive must be 12 | # cleanly divisible by precision of next lower retention archive. 13 | # 14 | # Valid: 60s:7d,300s:30d (300/60 = 5) 15 | # Invalid: 180s:7d,300s:30d (300/180 = 3.333) 16 | # 17 | 18 | # Carbon's internal metrics. This entry should match what is specified in 19 | # CARBON_METRIC_PREFIX and CARBON_METRIC_INTERVAL settings 20 | 21 | [carbon] 22 | pattern = ^carbon\. 23 | retentions = 10s:6h,1m:90d 24 | 25 | [default_1min_for_1day] 26 | pattern = .* 27 | retentions = 5s:1d,1m:7d,10m:1800d 28 | -------------------------------------------------------------------------------- /docker/graphite-master/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | cd /opt/graphite/webapp/ && python manage.py migrate --run-syncdb --noinput 6 | exec /usr/bin/supervisord 7 | -------------------------------------------------------------------------------- /docker/graphite-master/kube-watch.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const Client = require('kubernetes-client').Client; 3 | const config = require('kubernetes-client').config; 4 | const client = new Client({ config: config.getInCluster() }); 5 | const JSONStream = require('json-stream'); 6 | const jsonStream = new JSONStream(); 7 | const configFileTemplate = "/opt/graphite/webapp/graphite/local_settings.py.template"; 8 | const configFileTarget = "/opt/graphite/webapp/graphite/local_settings.py"; 9 | const processToRestart = "graphite-webapp" 10 | const configTemplate = fs.readFileSync(configFileTemplate, 'utf8'); 11 | const exec = require('child_process').exec; 12 | const namespace = fs.readFileSync('/var/run/secrets/kubernetes.io/serviceaccount/namespace', 'utf8').toString(); 13 | 14 | function restartProcess() { 15 | exec(`supervisorctl restart ${processToRestart}`, (error, stdout, stderr) => { 16 | if (error) { 17 | console.error(error); 18 | return; 19 | } 20 | console.log(stdout); 21 | console.error(stderr); 22 | }); 23 | } 24 | 25 | function getNodes(endpoints) { 26 | return endpoints.subsets ? endpoints.subsets[0].addresses.map(e => `"${e.ip}:80"`).join(",") : ""; 27 | } 28 | 29 | function changeConfig(endpoints) { 30 | var result = configTemplate.replace(/@@GRAPHITE_NODES@@/g, getNodes(endpoints)); 31 | fs.writeFileSync(configFileTarget, result); 32 | restartProcess(); 33 | } 34 | 35 | async function main() { 36 | await client.loadSpec(); 37 | const stream = client.apis.v1.ns(namespace).endpoints.getStream({ qs: { watch: true, fieldSelector: 'metadata.name=graphite-node' } }); 38 | stream.pipe(jsonStream); 39 | jsonStream.on('data', obj => { 40 | if (!obj) { 41 | return; 42 | } 43 | console.log('Received update:', JSON.stringify(obj)); 44 | changeConfig(obj.object); 45 | }); 46 | } 47 | 48 | try { 49 | main(); 50 | } catch (error) { 51 | console.error(error); 52 | process.exit(1); 53 | } 54 | -------------------------------------------------------------------------------- /docker/graphite-master/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | daemon off; 2 | user root; 3 | worker_processes 1; 4 | pid /var/run/nginx.pid; 5 | 6 | events { 7 | worker_connections 1024; 8 | } 9 | 10 | http { 11 | sendfile on; 12 | tcp_nopush on; 13 | tcp_nodelay on; 14 | keepalive_timeout 65; 15 | types_hash_max_size 2048; 16 | server_tokens off; 17 | 18 | server_names_hash_bucket_size 32; 19 | 20 | include /etc/nginx/mime.types; 21 | default_type application/octet-stream; 22 | 23 | access_log /var/log/nginx/access.log; 24 | error_log /var/log/nginx/error.log; 25 | 26 | gzip on; 27 | gzip_disable "msie6"; 28 | 29 | server { 30 | listen 80 default_server; 31 | server_name _; 32 | 33 | open_log_file_cache max=1000 inactive=20s min_uses=2 valid=1m; 34 | 35 | location / { 36 | proxy_pass http://127.0.0.1:8000; 37 | proxy_set_header X-Real-IP $remote_addr; 38 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 39 | proxy_set_header X-Forwarded-Proto $scheme; 40 | proxy_set_header X-Forwarded-Server $host; 41 | proxy_set_header X-Forwarded-Host $http_host; 42 | proxy_set_header Host $http_host; 43 | 44 | client_max_body_size 10m; 45 | client_body_buffer_size 128k; 46 | 47 | proxy_connect_timeout 90; 48 | proxy_send_timeout 90; 49 | proxy_read_timeout 90; 50 | 51 | proxy_buffer_size 4k; 52 | proxy_buffers 4 32k; 53 | proxy_busy_buffers_size 64k; 54 | proxy_temp_file_write_size 64k; 55 | } 56 | 57 | add_header Access-Control-Allow-Origin "*"; 58 | add_header Access-Control-Allow-Methods "GET, OPTIONS"; 59 | add_header Access-Control-Allow-Headers "origin, authorization, accept"; 60 | 61 | location /content { 62 | alias /opt/graphite/webapp/content; 63 | } 64 | 65 | location /media { 66 | alias /usr/share/pyshared/django/contrib/admin/media; 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /docker/graphite-master/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon = true 3 | environment = GRAPHITE_STORAGE_DIR='/opt/graphite/storage',GRAPHITE_CONF_DIR='/opt/graphite/conf' 4 | 5 | [program:nginx] 6 | command = /usr/sbin/nginx 7 | stdout_logfile = /var/log/supervisor/%(program_name)s.log 8 | stderr_logfile = /var/log/supervisor/%(program_name)s.log 9 | autorestart = true 10 | 11 | [program:carbon-cache] 12 | user = root 13 | command = /opt/graphite/bin/carbon-cache.py --debug start 14 | stdout_logfile = /var/log/supervisor/%(program_name)s.log 15 | stderr_logfile = /var/log/supervisor/%(program_name)s.log 16 | autorestart = true 17 | 18 | [program:graphite-webapp] 19 | user = root 20 | directory = /opt/graphite/webapp 21 | environment = PYTHONPATH='/opt/graphite/webapp' 22 | command = /usr/bin/gunicorn -b127.0.0.1:8000 graphite.wsgi:application 23 | stdout_logfile = /var/log/supervisor/%(program_name)s.log 24 | stderr_logfile = /var/log/supervisor/%(program_name)s.log 25 | autorestart = true 26 | 27 | [program:kube-watch] 28 | user = root 29 | directory = /kube-watch 30 | command = node kube-watch.js 31 | stdout_logfile = /var/log/supervisor/%(program_name)s.log 32 | stderr_logfile = /var/log/supervisor/%(program_name)s.log 33 | autorestart = true 34 | -------------------------------------------------------------------------------- /docker/graphite-master/webapp/initial_data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": 1, 4 | "model": "auth.user", 5 | "fields": { 6 | "username": "admin", 7 | "first_name": "", 8 | "last_name": "", 9 | "is_active": true, 10 | "is_superuser": true, 11 | "is_staff": true, 12 | "last_login": "2011-09-20 17:02:14", 13 | "groups": [], 14 | "user_permissions": [], 15 | "password": "sha1$1b11b$edeb0a67a9622f1f2cfeabf9188a711f5ac7d236", 16 | "email": "root@example.com", 17 | "date_joined": "2011-09-20 17:02:14" 18 | } 19 | } 20 | ] 21 | -------------------------------------------------------------------------------- /docker/graphite-master/webapp/local_settings.py.template: -------------------------------------------------------------------------------- 1 | ## Graphite local_settings.py 2 | # Edit this file to customize the default Graphite webapp settings 3 | # 4 | # Additional customizations to Django settings can be added to this file as well 5 | 6 | ##################################### 7 | # General Configuration # 8 | ##################################### 9 | # 10 | # Set this to a long, random unique string to use as a secret key for this 11 | # install. This key is used for salting of hashes used in auth tokens, 12 | # CRSF middleware, cookie storage, etc. This should be set identically among 13 | # instances if used behind a load balancer. 14 | #SECRET_KEY = 'UNSAFE_DEFAULT' 15 | 16 | # In Django 1.5+ set this to the list of hosts your graphite instances is 17 | # accessible as. See: 18 | # https://docs.djangoproject.com/en/dev/ref/settings/#std:setting-ALLOWED_HOSTS 19 | #ALLOWED_HOSTS = [ '*' ] 20 | 21 | # Set your local timezone (Django's default is America/Chicago) 22 | # If your graphs appear to be offset by a couple hours then this probably 23 | # needs to be explicitly set to your local timezone. 24 | #TIME_ZONE = 'America/Los_Angeles' 25 | 26 | # Set the default short date format. See strftime(3) for supported sequences. 27 | #DATE_FORMAT = '%m/%d' 28 | 29 | # Override this to provide documentation specific to your Graphite deployment 30 | #DOCUMENTATION_URL = "http://graphite.readthedocs.io/" 31 | 32 | # Logging 33 | LOG_ROTATION = True 34 | LOG_ROTATION_COUNT = 1 35 | LOG_RENDERING_PERFORMANCE = True 36 | #LOG_CACHE_PERFORMANCE = True 37 | 38 | # Enable full debug page display on exceptions (Internal Server Error pages) 39 | #DEBUG = True 40 | 41 | # If using RRD files and rrdcached, set to the address or socket of the daemon 42 | #FLUSHRRDCACHED = 'unix:/var/run/rrdcached.sock' 43 | 44 | # This lists the memcached servers that will be used by this webapp. 45 | # If you have a cluster of webapps you should ensure all of them 46 | # have the *exact* same value for this setting. That will maximize cache 47 | # efficiency. Setting MEMCACHE_HOSTS to be empty will turn off use of 48 | # memcached entirely. 49 | # 50 | # You should not use the loopback address (127.0.0.1) here if using clustering 51 | # as every webapp in the cluster should use the exact same values to prevent 52 | # unneeded cache misses. Set to [] to disable caching of images and fetched data 53 | #MEMCACHE_HOSTS = ['10.10.10.10:11211', '10.10.10.11:11211', '10.10.10.12:11211'] 54 | 55 | # Metric data and graphs are cached for one minute by default. If defined, 56 | # DEFAULT_CACHE_POLICY is a list of tuples of minimum query time ranges mapped 57 | # to the cache duration for the results. This allows for larger queries to be 58 | # cached for longer periods of times. All times are in seconds. If the policy is 59 | # empty or undefined, all results will be cached for DEFAULT_CACHE_DURATION. 60 | #DEFAULT_CACHE_DURATION = 60 # Cache images and data for 1 minute 61 | #DEFAULT_CACHE_POLICY = [(0, 60), # default is 60 seconds 62 | # (7200, 120), # >= 2 hour queries are cached 2 minutes 63 | # (21600, 180)] # >= 6 hour queries are cached 3 minutes 64 | #MEMCACHE_KEY_PREFIX = 'graphite' 65 | 66 | 67 | # This lists the memcached options. Default is an empty dict. 68 | # Accepted options depend on the Memcached implementation and the Django version. 69 | # Until Django 1.10, options are used only for pylibmc. 70 | # Starting from 1.11, options are used for both python-memcached and pylibmc. 71 | #MEMCACHE_OPTIONS = { 'socket_timeout': 0.5 } 72 | 73 | # Set URL_PREFIX when deploying graphite-web to a non-root location 74 | #URL_PREFIX = '/graphite' 75 | 76 | # Graphite uses Django Tagging to support tags in Events. By default each 77 | # tag is limited to 50 characters in length. 78 | #MAX_TAG_LENGTH = 50 79 | 80 | # Interval for the Auto-Refresh feature in the Composer, measured in seconds. 81 | #AUTO_REFRESH_INTERVAL = 60 82 | 83 | ##################################### 84 | # Filesystem Paths # 85 | ##################################### 86 | # 87 | # Change only GRAPHITE_ROOT if your install is merely shifted from /opt/graphite 88 | # to somewhere else 89 | #GRAPHITE_ROOT = '/opt/graphite' 90 | 91 | # Most installs done outside of a separate tree such as /opt/graphite will 92 | # need to change these settings. Note that the default settings for each 93 | # of these is relative to GRAPHITE_ROOT. 94 | #CONF_DIR = '/opt/graphite/conf' 95 | #STORAGE_DIR = '/opt/graphite/storage' 96 | #STATIC_ROOT = '/opt/graphite/static' 97 | #LOG_DIR = '/opt/graphite/storage/log/webapp' 98 | #INDEX_FILE = '/opt/graphite/storage/index' # Search index file 99 | 100 | # To further or fully customize the paths, modify the following. Note that the 101 | # default settings for each of these are relative to CONF_DIR and STORAGE_DIR 102 | # 103 | ## Webapp config files 104 | #DASHBOARD_CONF = '/opt/graphite/conf/dashboard.conf' 105 | #GRAPHTEMPLATES_CONF = '/opt/graphite/conf/graphTemplates.conf' 106 | 107 | ## Data directories 108 | # 109 | # NOTE: If any directory is unreadable in STANDARD_DIRS it will break metric browsing 110 | # 111 | #CERES_DIR = '/opt/graphite/storage/ceres' 112 | #WHISPER_DIR = '/opt/graphite/storage/whisper' 113 | #RRD_DIR = '/opt/graphite/storage/rrd' 114 | # 115 | # Data directories using the "Standard" metrics finder (i.e. not Ceres) 116 | #STANDARD_DIRS = [WHISPER_DIR, RRD_DIR] # Default: set from the above variables 117 | 118 | 119 | ##################################### 120 | # Email Configuration # 121 | ##################################### 122 | # 123 | # This is used for emailing rendered graphs. The default backend is SMTP. 124 | #EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' 125 | # 126 | # To drop emails on the floor, enable the Dummy backend instead. 127 | #EMAIL_BACKEND = 'django.core.mail.backends.dummy.EmailBackend' 128 | 129 | #EMAIL_HOST = 'localhost' 130 | #EMAIL_PORT = 25 131 | #EMAIL_HOST_USER = '' 132 | #EMAIL_HOST_PASSWORD = '' 133 | #EMAIL_USE_TLS = False 134 | 135 | 136 | ##################################### 137 | # Authentication Configuration # 138 | ##################################### 139 | # 140 | ## LDAP / ActiveDirectory authentication setup 141 | #USE_LDAP_AUTH = True 142 | #LDAP_SERVER = "ldap.mycompany.com" 143 | #LDAP_PORT = 389 144 | #LDAP_USE_TLS = False 145 | 146 | ## Manual URI / query setup 147 | #LDAP_URI = "ldaps://ldap.mycompany.com:636" 148 | #LDAP_SEARCH_BASE = "OU=users,DC=mycompany,DC=com" 149 | #LDAP_BASE_USER = "CN=some_readonly_account,DC=mycompany,DC=com" 150 | #LDAP_BASE_PASS = "readonly_account_password" 151 | #LDAP_USER_QUERY = "(username=%s)" #For Active Directory use "(sAMAccountName=%s)" 152 | 153 | # User DN template to use for binding (and authentication) against the 154 | # LDAP server. %(username) is replaced with the username supplied at 155 | # graphite login. 156 | #LDAP_USER_DN_TEMPLATE = "CN=%(username)s,OU=users,DC=mycompany,DC=com" 157 | 158 | # If you want to further customize the ldap connection options you should 159 | # directly use ldap.set_option to set the ldap module's global options. 160 | # For example: 161 | # 162 | #import ldap 163 | #ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW) # Use ldap.OPT_X_TLS_DEMAND to force TLS 164 | #ldap.set_option(ldap.OPT_REFERRALS, 0) # Enable for Active Directory 165 | #ldap.set_option(ldap.OPT_X_TLS_CACERTDIR, "/etc/ssl/ca") 166 | #ldap.set_option(ldap.OPT_X_TLS_CERTFILE, "/etc/ssl/mycert.pem") 167 | #ldap.set_option(ldap.OPT_X_TLS_KEYFILE, "/etc/ssl/mykey.pem") 168 | #ldap.set_option(ldap.OPT_DEBUG_LEVEL, 65535) # To enable verbose debugging 169 | # See http://www.python-ldap.org/ for further details on these options. 170 | 171 | ## REMOTE_USER authentication. See: https://docs.djangoproject.com/en/dev/howto/auth-remote-user/ 172 | #USE_REMOTE_USER_AUTHENTICATION = True 173 | 174 | # Override the URL for the login link (e.g. for django_openid_auth) 175 | #LOGIN_URL = '/account/login' 176 | 177 | 178 | ############################### 179 | # Authorization for Dashboard # 180 | ############################### 181 | # By default, there is no security on dashboards - any user can add, change or delete them. 182 | # This section provides 3 different authorization models, of varying strictness. 183 | 184 | # If set to True, users must be logged in to save or delete dashboards. Defaults to False 185 | #DASHBOARD_REQUIRE_AUTHENTICATION = True 186 | 187 | # If set to the name of a user group, dashboards can be saved and deleted by any user in this 188 | # group. Groups can be set in the Django Admin app, or in LDAP. Defaults to None. 189 | # NOTE: Ignored if DASHBOARD_REQUIRE_AUTHENTICATION is not set 190 | #DASHBOARD_REQUIRE_EDIT_GROUP = 'dashboard-editors-group' 191 | 192 | # If set to True, dashboards can be saved or deleted by any user having the appropriate 193 | # (change or delete) permission (as set in the Django Admin app). Defaults to False 194 | # NOTE: Ignored if DASHBOARD_REQUIRE_AUTHENTICATION is not set 195 | #DASHBOARD_REQUIRE_PERMISSIONS = True 196 | 197 | 198 | ########################## 199 | # Database Configuration # 200 | ########################## 201 | # 202 | # By default sqlite is used. If you cluster multiple webapps you will need 203 | # to setup an external database (such as MySQL) and configure all of the webapp 204 | # instances to use the same database. Note that this database is only used to store 205 | # Django models such as saved graphs, dashboards, user preferences, etc. 206 | # Metric data is not stored here. 207 | # 208 | # DO NOT FORGET TO RUN MIGRATIONS AFTER SETTING UP A NEW DATABASE 209 | # http://graphite.readthedocs.io/en/latest/config-database-setup.html 210 | # 211 | # 212 | # The following built-in database engines are available: 213 | # django.db.backends.postgresql_psycopg2 214 | # django.db.backends.mysql 215 | # django.db.backends.sqlite3 216 | # django.db.backends.oracle 217 | # 218 | # The default is 'django.db.backends.sqlite3' with file 'graphite.db' 219 | # located in STORAGE_DIR 220 | # 221 | #DATABASES = { 222 | # 'default': { 223 | # 'NAME': '/opt/graphite/storage/graphite.db', 224 | # 'ENGINE': 'django.db.backends.sqlite3', 225 | # 'USER': '', 226 | # 'PASSWORD': '', 227 | # 'HOST': '', 228 | # 'PORT': '' 229 | # } 230 | #} 231 | # 232 | 233 | 234 | ######################### 235 | # Cluster Configuration # 236 | ######################### 237 | # 238 | # To avoid excessive DNS lookups you want to stick to using IP addresses only 239 | # in this entire section. 240 | # 241 | 242 | # This should list the IP address (and optionally port) of the webapp on each 243 | # remote server in the cluster. These servers must each have local access to 244 | # metric data. Note that the first server to return a match for a query will be 245 | # used. 246 | CLUSTER_SERVERS = [@@GRAPHITE_NODES@@] 247 | 248 | # This settings control wether https is used to communicate between cluster members 249 | #INTRACLUSTER_HTTPS = False 250 | 251 | # These are timeout values (in seconds) for requests to remote webapps 252 | #REMOTE_FIND_TIMEOUT = 3.0 # Timeout for metric find requests 253 | #REMOTE_FETCH_TIMEOUT = 3.0 # Timeout to fetch series data 254 | #REMOTE_RETRY_DELAY = 60.0 # Time before retrying a failed remote webapp 255 | 256 | # Try to detect when a cluster server is localhost and don't forward queries 257 | #REMOTE_EXCLUDE_LOCAL = False 258 | 259 | # Number of retries for a specific remote data fetch. 260 | #MAX_FETCH_RETRIES = 2 261 | 262 | #FIND_CACHE_DURATION = 300 # Time to cache remote metric find results 263 | # If the query doesn't fall entirely within the FIND_TOLERANCE window 264 | # we disregard the window. This prevents unnecessary remote fetches 265 | # caused when carbon's cache skews node.intervals, giving the appearance 266 | # remote systems have data we don't have locally, which we probably do. 267 | #FIND_TOLERANCE = 2 * FIND_CACHE_DURATION 268 | 269 | # During a rebalance of a consistent hash cluster, after a partition event on a replication > 1 cluster, 270 | # or in other cases we might receive multiple TimeSeries data for a metric key. Merge them together rather 271 | # that choosing the "most complete" one (pre-0.9.14 behaviour). 272 | #REMOTE_STORE_MERGE_RESULTS = True 273 | 274 | ## Remote rendering settings 275 | # Set to True to enable rendering of Graphs on a remote webapp 276 | #REMOTE_RENDERING = True 277 | # List of IP (and optionally port) of the webapp on each remote server that 278 | # will be used for rendering. Note that each rendering host should have local 279 | # access to metric data or should have CLUSTER_SERVERS configured 280 | #RENDERING_HOSTS = [] 281 | #REMOTE_RENDER_CONNECT_TIMEOUT = 1.0 282 | 283 | # If you are running multiple carbon-caches on this machine (typically behind 284 | # a relay using consistent hashing), you'll need to list the ip address, cache 285 | # query port, and instance name of each carbon-cache instance on the local 286 | # machine (NOT every carbon-cache in the entire cluster). The default cache 287 | # query port is 7002 and a common scheme is to use 7102 for instance b, 7202 288 | # for instance c, etc. 289 | # If you're using consistent hashing, please keep an order of hosts the same as 290 | # order of DESTINATIONS in your relay - otherways you'll get cache misses. 291 | # 292 | # You *should* use 127.0.0.1 here in most cases. 293 | # 294 | #CARBONLINK_HOSTS = ["127.0.0.1:7002:a", "127.0.0.1:7102:b", "127.0.0.1:7202:c"] 295 | #CARBONLINK_TIMEOUT = 1.0 296 | #CARBONLINK_RETRY_DELAY = 15 # Seconds to blacklist a failed remote server 297 | # 298 | 299 | # Type of metric hashing function. 300 | # The default `carbon_ch` is Graphite's traditional consistent-hashing implementation. 301 | # Alternatively, you can use `fnv1a_ch`, which supports the Fowler-Noll-Vo hash 302 | # function (FNV-1a) hash implementation offered by the carbon-c-relay project 303 | # https://github.com/grobian/carbon-c-relay 304 | # 305 | # Supported values: carbon_ch, fnv1a_ch 306 | # 307 | #CARBONLINK_HASHING_TYPE = 'carbon_ch' 308 | 309 | # A "keyfunc" is a user-defined python function that is given a metric name 310 | # and returns a string that should be used when hashing the metric name. 311 | # This is important when your hashing has to respect certain metric groupings. 312 | #CARBONLINK_HASHING_KEYFUNC = "/opt/graphite/bin/keyfuncs.py:my_keyfunc" 313 | 314 | # Prefix for internal carbon statistics. 315 | #CARBON_METRIC_PREFIX='carbon' 316 | 317 | # The replication factor to use with consistent hashing. 318 | # This should usually match the value configured in Carbon. 319 | #REPLICATION_FACTOR = 1 320 | 321 | 322 | ##################################### 323 | # Additional Django Settings # 324 | ##################################### 325 | # Uncomment the following line for direct access to Django settings such as 326 | # MIDDLEWARE_CLASSES or APPS 327 | #from graphite.app_settings import * 328 | 329 | 330 | -------------------------------------------------------------------------------- /docker/graphite-node/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nanit/kubernetes-graphite-cluster/900a89cce137b167345c162a6554e7a30ea9b3e9/docker/graphite-node/.DS_Store -------------------------------------------------------------------------------- /docker/graphite-node/Dockerfile: -------------------------------------------------------------------------------- 1 | from ubuntu:14.04 2 | run echo 'deb http://us.archive.ubuntu.com/ubuntu/ trusty universe' >> /etc/apt/sources.list 3 | run apt-get -y update 4 | 5 | # Install required packages 6 | RUN apt-get -y update &&\ 7 | apt-get -y install software-properties-common python-django-tagging python-simplejson \ 8 | python-memcache python-ldap python-cairo python-pysqlite2 python-support python-pip \ 9 | gunicorn supervisor nginx-light git wget curl build-essential python-dev libffi-dev vim 10 | RUN pip install Twisted==13.2.0 11 | RUN pip install pytz 12 | RUN git clone https://github.com/graphite-project/whisper.git /src/whisper &&\ 13 | cd /src/whisper &&\ 14 | git checkout 1.0.x &&\ 15 | python setup.py install 16 | 17 | RUN git clone https://github.com/graphite-project/carbon.git /src/carbon &&\ 18 | cd /src/carbon &&\ 19 | git checkout 1.0.x &&\ 20 | python setup.py install 21 | 22 | 23 | RUN git clone https://github.com/graphite-project/graphite-web.git /src/graphite-web &&\ 24 | cd /src/graphite-web &&\ 25 | git checkout 1.0.x &&\ 26 | python setup.py install &&\ 27 | pip install -r requirements.txt &&\ 28 | python check-dependencies.py 29 | 30 | # fixes fatal error "Your WhiteNoise configuration is incompatible with WhiteNoise v4.0" 31 | RUN /usr/bin/yes | pip uninstall whitenoise &&\ 32 | pip install "whitenoise<4" 33 | 34 | # Add system service config 35 | add ./nginx/nginx.conf /etc/nginx/nginx.conf 36 | add ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf 37 | 38 | # Add graphite config 39 | add ./webapp/initial_data.json /opt/graphite/webapp/graphite/initial_data.json 40 | add ./webapp/local_settings.py /opt/graphite/webapp/graphite/local_settings.py 41 | add ./conf/carbon.conf /opt/graphite/conf/carbon.conf 42 | add ./conf/storage-schemas.conf /opt/graphite/conf/storage-schemas.conf 43 | add ./conf/storage-aggregation.conf /opt/graphite/conf/storage-aggregation.conf 44 | run mkdir -p /opt/graphite/storage/whisper 45 | run touch /opt/graphite/storage/graphite.db /opt/graphite/storage/index 46 | run chmod 0775 /opt/graphite/storage /opt/graphite/storage/whisper 47 | run chmod 0664 /opt/graphite/storage/graphite.db 48 | RUN cp /src/graphite-web/webapp/manage.py /opt/graphite/webapp 49 | 50 | # Install curator cron job 51 | ADD curator/cron /etc/cron.d/curator.cron 52 | ADD curator/run.sh /etc/cron.d/curator.sh 53 | RUN chmod +x /etc/cron.d/curator.sh 54 | 55 | ADD entrypoint.sh /entrypoint.sh 56 | 57 | # Nginx 58 | expose 80 59 | # Carbon line receiver port 60 | # expose 2003 61 | # Carbon UDP receiver port 62 | # expose 2003/udp 63 | # Carbon pickle receiver port 64 | expose 2004 65 | # Carbon cache query port 66 | # expose 7002 67 | 68 | cmd ["/entrypoint.sh"] 69 | -------------------------------------------------------------------------------- /docker/graphite-node/README.md: -------------------------------------------------------------------------------- 1 | ## Graphite + Carbon 2 | 3 | An all-in-one image running graphite and carbon-cache. **Version**: 0.9.12. 4 | 5 | This image contains a sensible default configuration of graphite and 6 | carbon-cache. Starting this container will, by default, bind the the following 7 | host ports: 8 | 9 | - `80`: the graphite web interface 10 | - `2003`: the carbon-cache line receiver (the standard graphite protocol) 11 | - `2004`: the carbon-cache pickle receiver 12 | - `7002`: the carbon-cache query port (used by the web interface) 13 | 14 | With this image, you can get up and running with graphite by simply running: 15 | 16 | docker run -d nickstenning/graphite 17 | 18 | If you already have services running on the host on one or more of these ports, 19 | you may wish to allow docker to assign random ports on the host. You can do this 20 | easily by running: 21 | 22 | docker run -p 80 -p 2003 -p 2004 -p 7002 -d nickstenning/graphite 23 | 24 | You can log into the administrative interface of graphite-web (a Django 25 | application) with the username `admin` and password `admin`. These passwords can 26 | be changed through the web interface. 27 | 28 | **N.B.** Please be aware that by default docker will make the exposed ports 29 | accessible from anywhere if the host firewall is unconfigured. 30 | 31 | ### Data volumes 32 | 33 | Graphite data is stored at `/var/lib/graphite/storage/whisper` within the 34 | container. If you wish to store your metrics outside the container (highly 35 | recommended) you can use docker's data volumes feature. For example, to store 36 | graphite's metric database at `/data/graphite` on the host, you could use: 37 | 38 | docker run -v /data/graphite:/var/lib/graphite/storage/whisper \ 39 | -d nickstenning/graphite 40 | 41 | **N.B.** You will need to run the container with suitable permissions to write 42 | to the data volume directory. Carbon and the graphite webapp run as `www-data` 43 | inside the container, but this UID/GID may be mapped inconsistently on the host. 44 | 45 | ### Technical details 46 | 47 | By default, this instance of carbon-cache uses the following retention periods 48 | resulting in whisper files of approximately 2.5MiB. 49 | 50 | 10s:8d,1m:31d,10m:1y,1h:5y 51 | 52 | For more information, see [the 53 | repository](https://github.com/nickstenning/dockerfiles/tree/master/graphite). 54 | -------------------------------------------------------------------------------- /docker/graphite-node/conf/carbon.conf: -------------------------------------------------------------------------------- 1 | [cache] 2 | # Configure carbon directories. 3 | # 4 | # OS environment variables can be used to tell carbon where graphite is 5 | # installed, where to read configuration from and where to write data. 6 | # 7 | # GRAPHITE_ROOT - Root directory of the graphite installation. 8 | # Defaults to ../ 9 | # GRAPHITE_CONF_DIR - Configuration directory (where this file lives). 10 | # Defaults to $GRAPHITE_ROOT/conf/ 11 | # GRAPHITE_STORAGE_DIR - Storage directory for whisper/rrd/log/pid files. 12 | # Defaults to $GRAPHITE_ROOT/storage/ 13 | # 14 | # To change other directory paths, add settings to this file. The following 15 | # configuration variables are available with these default values: 16 | # 17 | # STORAGE_DIR = $GRAPHITE_STORAGE_DIR 18 | # LOCAL_DATA_DIR = %(STORAGE_DIR)s/whisper/ 19 | # WHITELISTS_DIR = %(STORAGE_DIR)s/lists/ 20 | # CONF_DIR = %(STORAGE_DIR)s/conf/ 21 | # LOG_DIR = %(STORAGE_DIR)s/log/ 22 | # PID_DIR = %(STORAGE_DIR)s/ 23 | # 24 | # For FHS style directory structures, use: 25 | # 26 | # STORAGE_DIR = /var/lib/carbon/ 27 | # CONF_DIR = /etc/carbon/ 28 | # LOG_DIR = /var/log/carbon/ 29 | # PID_DIR = /var/run/ 30 | # 31 | #LOCAL_DATA_DIR = /opt/graphite/storage/whisper/ 32 | 33 | # Specify the database library used to store metric data on disk. Each database 34 | # may have configurable options to change the behaviour of how it writes to 35 | # persistent storage. 36 | # 37 | # whisper - Fixed-size database, similar in design and purpose to RRD. This is 38 | # the default storage backend for carbon and the most rigorously tested. 39 | # 40 | # ceres - Experimental alternative database that supports storing data in sparse 41 | # files of arbitrary fixed-size resolutions. 42 | DATABASE = whisper 43 | 44 | # Enable daily log rotation. If disabled, a new file will be opened whenever the log file path no 45 | # longer exists (i.e. it is removed or renamed) 46 | ENABLE_LOGROTATION = True 47 | 48 | # Specify the user to drop privileges to 49 | # If this is blank carbon-cache runs as the user that invokes it 50 | # This user must have write access to the local data directory 51 | USER = 52 | 53 | # Limit the size of the cache to avoid swapping or becoming CPU bound. 54 | # Sorts and serving cache queries gets more expensive as the cache grows. 55 | # Use the value "inf" (infinity) for an unlimited cache size. 56 | # value should be an integer number of metric datapoints. 57 | MAX_CACHE_SIZE = inf 58 | 59 | # Limits the number of whisper update_many() calls per second, which effectively 60 | # means the number of write requests sent to the disk. This is intended to 61 | # prevent over-utilizing the disk and thus starving the rest of the system. 62 | # When the rate of required updates exceeds this, then carbon's caching will 63 | # take effect and increase the overall throughput accordingly. 64 | MAX_UPDATES_PER_SECOND = inf 65 | 66 | # If defined, this changes the MAX_UPDATES_PER_SECOND in Carbon when a 67 | # stop/shutdown is initiated. This helps when MAX_UPDATES_PER_SECOND is 68 | # relatively low and carbon has cached a lot of updates; it enables the carbon 69 | # daemon to shutdown more quickly. 70 | # MAX_UPDATES_PER_SECOND_ON_SHUTDOWN = 1000 71 | 72 | # Softly limits the number of whisper files that get created each minute. 73 | # Setting this value low (e.g. 50) is a good way to ensure that your carbon 74 | # system will not be adversely impacted when a bunch of new metrics are 75 | # sent to it. The trade off is that any metrics received in excess of this 76 | # value will be silently dropped, and the whisper file will not be created 77 | # until such point as a subsequent metric is received and fits within the 78 | # defined rate limit. Setting this value high (like "inf" for infinity) will 79 | # cause carbon to create the files quickly but at the risk of increased I/O. 80 | MAX_CREATES_PER_MINUTE = inf 81 | 82 | # Set the interface and port for the line (plain text) listener. Setting the 83 | # interface to 0.0.0.0 listens on all interfaces. Port can be set to 0 to 84 | # disable this listener if it is not required. 85 | LINE_RECEIVER_INTERFACE = 0.0.0.0 86 | LINE_RECEIVER_PORT = 2003 87 | 88 | # Set this to True to enable the UDP listener. By default this is off 89 | # because it is very common to run multiple carbon daemons and managing 90 | # another (rarely used) port for every carbon instance is not fun. 91 | ENABLE_UDP_LISTENER = True 92 | UDP_RECEIVER_INTERFACE = 0.0.0.0 93 | UDP_RECEIVER_PORT = 2003 94 | 95 | # Set the interface and port for the pickle listener. Setting the interface to 96 | # 0.0.0.0 listens on all interfaces. Port can be set to 0 to disable this 97 | # listener if it is not required. 98 | PICKLE_RECEIVER_INTERFACE = 0.0.0.0 99 | PICKLE_RECEIVER_PORT = 2004 100 | 101 | # Limit the number of open connections the receiver can handle as any time. 102 | # Default is no limit. Setting up a limit for sites handling high volume 103 | # traffic may be recommended to avoid running out of TCP memory or having 104 | # thousands of TCP connections reduce the throughput of the service. 105 | #MAX_RECEIVER_CONNECTIONS = inf 106 | 107 | # Per security concerns outlined in Bug #817247 the pickle receiver 108 | # will use a more secure and slightly less efficient unpickler. 109 | # Set this to True to revert to the old-fashioned insecure unpickler. 110 | USE_INSECURE_UNPICKLER = False 111 | 112 | CACHE_QUERY_INTERFACE = 0.0.0.0 113 | CACHE_QUERY_PORT = 7002 114 | 115 | # Set this to False to drop datapoints received after the cache 116 | # reaches MAX_CACHE_SIZE. If this is True (the default) then sockets 117 | # over which metrics are received will temporarily stop accepting 118 | # data until the cache size falls below 95% MAX_CACHE_SIZE. 119 | USE_FLOW_CONTROL = True 120 | 121 | # If enabled this setting is used to timeout metric client connection if no 122 | # metrics have been sent in specified time in seconds 123 | #METRIC_CLIENT_IDLE_TIMEOUT = None 124 | 125 | # By default, carbon-cache will log every whisper update and cache hit. 126 | # This can be excessive and degrade performance if logging on the same 127 | # volume as the whisper data is stored. 128 | LOG_UPDATES = False 129 | LOG_CREATES = True 130 | LOG_CACHE_HITS = False 131 | LOG_CACHE_QUEUE_SORTS = False 132 | 133 | # The thread that writes metrics to disk can use one of the following strategies 134 | # determining the order in which metrics are removed from cache and flushed to 135 | # disk. The default option preserves the same behavior as has been historically 136 | # available in version 0.9.10. 137 | # 138 | # sorted - All metrics in the cache will be counted and an ordered list of 139 | # them will be sorted according to the number of datapoints in the cache at the 140 | # moment of the list's creation. Metrics will then be flushed from the cache to 141 | # disk in that order. 142 | # 143 | # max - The writer thread will always pop and flush the metric from cache 144 | # that has the most datapoints. This will give a strong flush preference to 145 | # frequently updated metrics and will also reduce random file-io. Infrequently 146 | # updated metrics may only ever be persisted to disk at daemon shutdown if 147 | # there are a large number of metrics which receive very frequent updates OR if 148 | # disk i/o is very slow. 149 | # 150 | # naive - Metrics will be flushed from the cache to disk in an unordered 151 | # fashion. This strategy may be desirable in situations where the storage for 152 | # whisper files is solid state, CPU resources are very limited or deference to 153 | # the OS's i/o scheduler is expected to compensate for the random write 154 | # pattern. 155 | # 156 | CACHE_WRITE_STRATEGY = sorted 157 | 158 | # On some systems it is desirable for whisper to write synchronously. 159 | # Set this option to True if you'd like to try this. Basically it will 160 | # shift the onus of buffering writes from the kernel into carbon's cache. 161 | WHISPER_AUTOFLUSH = False 162 | 163 | # By default new Whisper files are created pre-allocated with the data region 164 | # filled with zeros to prevent fragmentation and speed up contiguous reads and 165 | # writes (which are common). Enabling this option will cause Whisper to create 166 | # the file sparsely instead. Enabling this option may allow a large increase of 167 | # MAX_CREATES_PER_MINUTE but may have longer term performance implications 168 | # depending on the underlying storage configuration. 169 | # WHISPER_SPARSE_CREATE = False 170 | 171 | # Only beneficial on linux filesystems that support the fallocate system call. 172 | # It maintains the benefits of contiguous reads/writes, but with a potentially 173 | # much faster creation speed, by allowing the kernel to handle the block 174 | # allocation and zero-ing. Enabling this option may allow a large increase of 175 | # MAX_CREATES_PER_MINUTE. If enabled on an OS or filesystem that is unsupported 176 | # this option will gracefully fallback to standard POSIX file access methods. 177 | WHISPER_FALLOCATE_CREATE = True 178 | 179 | # Enabling this option will cause Whisper to lock each Whisper file it writes 180 | # to with an exclusive lock (LOCK_EX, see: man 2 flock). This is useful when 181 | # multiple carbon-cache daemons are writing to the same files. 182 | # WHISPER_LOCK_WRITES = False 183 | 184 | # On systems which has a large number of metrics, an amount of Whisper write(2)'s 185 | # pageback sometimes cause disk thrashing due to memory shortage, so that abnormal 186 | # disk reads occur. Enabling this option makes it possible to decrease useless 187 | # page cache memory by posix_fadvise(2) with POSIX_FADVISE_RANDOM option. 188 | # WHISPER_FADVISE_RANDOM = False 189 | 190 | # By default all nodes stored in Ceres are cached in memory to improve the 191 | # throughput of reads and writes to underlying slices. Turning this off will 192 | # greatly reduce memory consumption for databases with millions of metrics, at 193 | # the cost of a steep increase in disk i/o, approximately an extra two os.stat 194 | # calls for every read and write. Reasons to do this are if the underlying 195 | # storage can handle stat() with practically zero cost (SSD, NVMe, zRAM). 196 | # Valid values are: 197 | # all - all nodes are cached 198 | # none - node caching is disabled 199 | # CERES_NODE_CACHING_BEHAVIOR = all 200 | 201 | # Ceres nodes can have many slices and caching the right ones can improve 202 | # performance dramatically. Note that there are many trade-offs to tinkering 203 | # with this, and unless you are a ceres developer you *really* should not 204 | # mess with this. Valid values are: 205 | # latest - only the most recent slice is cached 206 | # all - all slices are cached 207 | # none - slice caching is disabled 208 | # CERES_SLICE_CACHING_BEHAVIOR = latest 209 | 210 | # If a Ceres node accumulates too many slices, performance can suffer. 211 | # This can be caused by intermittently reported data. To mitigate 212 | # slice fragmentation there is a tolerance for how much space can be 213 | # wasted within a slice file to avoid creating a new one. That tolerance 214 | # level is determined by MAX_SLICE_GAP, which is the number of consecutive 215 | # null datapoints allowed in a slice file. 216 | # If you set this very low, you will waste less of the *tiny* bit disk space 217 | # that this feature wastes, and you will be prone to performance problems 218 | # caused by slice fragmentation, which can be pretty severe. 219 | # If you set this really high, you will waste a bit more disk space (each 220 | # null datapoint wastes 8 bytes, but keep in mind your filesystem's block 221 | # size). If you suffer slice fragmentation issues, you should increase this or 222 | # run the ceres-maintenance defrag plugin more often. However you should not 223 | # set it to be huge because then if a large but allowed gap occurs it has to 224 | # get filled in, which means instead of a simple 8-byte write to a new file we 225 | # could end up doing an (8 * MAX_SLICE_GAP)-byte write to the latest slice. 226 | # CERES_MAX_SLICE_GAP = 80 227 | 228 | # Enabling this option will cause Ceres to lock each Ceres file it writes to 229 | # to with an exclusive lock (LOCK_EX, see: man 2 flock). This is useful when 230 | # multiple carbon-cache daemons are writing to the same files. 231 | # CERES_LOCK_WRITES = False 232 | 233 | # Set this to True to enable whitelisting and blacklisting of metrics in 234 | # CONF_DIR/whitelist.conf and CONF_DIR/blacklist.conf. If the whitelist is 235 | # missing or empty, all metrics will pass through 236 | # USE_WHITELIST = False 237 | 238 | # By default, carbon itself will log statistics (such as a count, 239 | # metricsReceived) with the top level prefix of 'carbon' at an interval of 60 240 | # seconds. Set CARBON_METRIC_INTERVAL to 0 to disable instrumentation 241 | # CARBON_METRIC_PREFIX = carbon 242 | # CARBON_METRIC_INTERVAL = 60 243 | 244 | # Enable AMQP if you want to receve metrics using an amqp broker 245 | # ENABLE_AMQP = False 246 | 247 | # Verbose means a line will be logged for every metric received 248 | # useful for testing 249 | # AMQP_VERBOSE = False 250 | 251 | # AMQP_HOST = localhost 252 | # AMQP_PORT = 5672 253 | # AMQP_VHOST = / 254 | # AMQP_USER = guest 255 | # AMQP_PASSWORD = guest 256 | # AMQP_EXCHANGE = graphite 257 | # AMQP_METRIC_NAME_IN_BODY = False 258 | 259 | # The manhole interface allows you to SSH into the carbon daemon 260 | # and get a python interpreter. BE CAREFUL WITH THIS! If you do 261 | # something like time.sleep() in the interpreter, the whole process 262 | # will sleep! This is *extremely* helpful in debugging, assuming 263 | # you are familiar with the code. If you are not, please don't 264 | # mess with this, you are asking for trouble :) 265 | # 266 | # ENABLE_MANHOLE = False 267 | # MANHOLE_INTERFACE = 127.0.0.1 268 | # MANHOLE_PORT = 7222 269 | # MANHOLE_USER = admin 270 | # MANHOLE_PUBLIC_KEY = ssh-rsa AAAAB3NzaC1yc2EAAAABiwAaAIEAoxN0sv/e4eZCPpi3N3KYvyzRaBaMeS2RsOQ/cDuKv11dlNzVeiyc3RFmCv5Rjwn/lQ79y0zyHxw67qLyhQ/kDzINc4cY41ivuQXm2tPmgvexdrBv5nsfEpjs3gLZfJnyvlcVyWK/lId8WUvEWSWHTzsbtmXAF2raJMdgLTbQ8wE= 271 | 272 | # Patterns for all of the metrics this machine will store. Read more at 273 | # http://en.wikipedia.org/wiki/Advanced_Message_Queuing_Protocol#Bindings 274 | # 275 | # Example: store all sales, linux servers, and utilization metrics 276 | # BIND_PATTERNS = sales.#, servers.linux.#, #.utilization 277 | # 278 | # Example: store everything 279 | # BIND_PATTERNS = # 280 | 281 | # To configure special settings for the carbon-cache instance 'b', uncomment this: 282 | #[cache:b] 283 | #LINE_RECEIVER_PORT = 2103 284 | #PICKLE_RECEIVER_PORT = 2104 285 | #CACHE_QUERY_PORT = 7102 286 | # and any other settings you want to customize, defaults are inherited 287 | # from the [cache] section. 288 | # You can then specify the --instance=b option to manage this instance 289 | # 290 | # In order to turn off logging of successful connections for the line 291 | # receiver, set this to False 292 | # LOG_LISTENER_CONN_SUCCESS = True 293 | 294 | -------------------------------------------------------------------------------- /docker/graphite-node/conf/storage-aggregation.conf: -------------------------------------------------------------------------------- 1 | # Aggregation methods for whisper files. Entries are scanned in order, 2 | # and first match wins. This file is scanned for changes every 60 seconds 3 | # 4 | # [name] 5 | # pattern = 6 | # xFilesFactor = 7 | # aggregationMethod = 8 | # 9 | # name: Arbitrary unique name for the rule 10 | # pattern: Regex pattern to match against the metric name 11 | # xFilesFactor: Ratio of valid data points required for aggregation to the next retention to occur 12 | # aggregationMethod: function to apply to data points for aggregation 13 | # 14 | [min] 15 | pattern = \.lower$ 16 | xFilesFactor = 0.1 17 | aggregationMethod = min 18 | 19 | [max] 20 | pattern = \.upper(_\d+)?$ 21 | xFilesFactor = 0.1 22 | aggregationMethod = max 23 | 24 | [sum] 25 | pattern = \.sum$ 26 | xFilesFactor = 0 27 | aggregationMethod = sum 28 | 29 | [avg_count] 30 | pattern = \.avg\.count$ 31 | xFilesFactor = 0 32 | aggregationMethod = average 33 | 34 | [count] 35 | pattern = \.count$ 36 | xFilesFactor = 0 37 | aggregationMethod = sum 38 | 39 | [count_legacy] 40 | pattern = ^stats_counts.* 41 | xFilesFactor = 0 42 | aggregationMethod = sum 43 | 44 | [default_average] 45 | pattern = .* 46 | xFilesFactor = 0 47 | aggregationMethod = average 48 | -------------------------------------------------------------------------------- /docker/graphite-node/conf/storage-schemas.conf: -------------------------------------------------------------------------------- 1 | # Schema definitions for Whisper files. Entries are scanned in order, 2 | # and first match wins. This file is scanned for changes every 60 seconds. 3 | # 4 | # Definition Syntax: 5 | # 6 | # [name] 7 | # pattern = regex 8 | # retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ... 9 | # 10 | # Remember: To support accurate aggregation from higher to lower resolution 11 | # archives, the precision of a longer retention archive must be 12 | # cleanly divisible by precision of next lower retention archive. 13 | # 14 | # Valid: 60s:7d,300s:30d (300/60 = 5) 15 | # Invalid: 180s:7d,300s:30d (300/180 = 3.333) 16 | # 17 | 18 | # Carbon's internal metrics. This entry should match what is specified in 19 | # CARBON_METRIC_PREFIX and CARBON_METRIC_INTERVAL settings 20 | 21 | [carbon] 22 | pattern = ^carbon\. 23 | retentions = 5s:6h,1m:90d 24 | 25 | [low_retention] 26 | pattern = \.low_ret\. 27 | retentions = 5s:1d,1m:7d 28 | 29 | [default_5seconds_for_1day] 30 | pattern = .* 31 | retentions = 5s:1d,1m:7d,10m:365d 32 | 33 | -------------------------------------------------------------------------------- /docker/graphite-node/curator/cron: -------------------------------------------------------------------------------- 1 | 0 0 * * * /etc/cron.d/curator.sh >> /var/log/curator.log 2>&1 2 | -------------------------------------------------------------------------------- /docker/graphite-node/curator/run.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | echo "$(date) Starting curator with {{CURATOR_RETENTION}} retention" 4 | 5 | set -x 6 | find /opt/graphite/storage/whisper/ -type f -name "*wsp" -mtime +{{CURATOR_RETENTION}} -delete 7 | find /opt/graphite/storage/whisper/ -type d -empty -delete 8 | set +x 9 | 10 | echo "$(date) Done" 11 | -------------------------------------------------------------------------------- /docker/graphite-node/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | 5 | if [ -n "$CURATOR_RETENTION" ] 6 | then 7 | sed -i "s/{{CURATOR_RETENTION}}/$CURATOR_RETENTION/g" /etc/cron.d/curator.sh 8 | crontab -u root /etc/cron.d/curator.cron 9 | fi 10 | 11 | exec /usr/bin/supervisord 12 | -------------------------------------------------------------------------------- /docker/graphite-node/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | daemon off; 2 | user root; 3 | worker_processes 1; 4 | pid /var/run/nginx.pid; 5 | 6 | events { 7 | worker_connections 1024; 8 | } 9 | 10 | http { 11 | sendfile on; 12 | tcp_nopush on; 13 | tcp_nodelay on; 14 | keepalive_timeout 65; 15 | types_hash_max_size 2048; 16 | server_tokens off; 17 | 18 | server_names_hash_bucket_size 32; 19 | 20 | include /etc/nginx/mime.types; 21 | default_type application/octet-stream; 22 | 23 | access_log /var/log/nginx/access.log; 24 | error_log /var/log/nginx/error.log; 25 | 26 | gzip on; 27 | gzip_disable "msie6"; 28 | 29 | server { 30 | listen 80 default_server; 31 | server_name _; 32 | 33 | open_log_file_cache max=1000 inactive=20s min_uses=2 valid=1m; 34 | 35 | location / { 36 | proxy_pass http://127.0.0.1:8000; 37 | proxy_set_header X-Real-IP $remote_addr; 38 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 39 | proxy_set_header X-Forwarded-Proto $scheme; 40 | proxy_set_header X-Forwarded-Server $host; 41 | proxy_set_header X-Forwarded-Host $http_host; 42 | proxy_set_header Host $http_host; 43 | 44 | client_max_body_size 10m; 45 | client_body_buffer_size 128k; 46 | 47 | proxy_connect_timeout 90; 48 | proxy_send_timeout 90; 49 | proxy_read_timeout 90; 50 | 51 | proxy_buffer_size 4k; 52 | proxy_buffers 4 32k; 53 | proxy_busy_buffers_size 64k; 54 | proxy_temp_file_write_size 64k; 55 | } 56 | 57 | add_header Access-Control-Allow-Origin "*"; 58 | add_header Access-Control-Allow-Methods "GET, OPTIONS"; 59 | add_header Access-Control-Allow-Headers "origin, authorization, accept"; 60 | 61 | location /content { 62 | alias /opt/graphite/webapp/content; 63 | } 64 | 65 | location /media { 66 | alias /usr/share/pyshared/django/contrib/admin/media; 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /docker/graphite-node/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon = true 3 | environment = GRAPHITE_STORAGE_DIR='/opt/graphite/storage',GRAPHITE_CONF_DIR='/opt/graphite/conf' 4 | 5 | [program:nginx] 6 | command = /usr/sbin/nginx 7 | stdout_logfile = /var/log/supervisor/%(program_name)s.log 8 | stderr_logfile = /var/log/supervisor/%(program_name)s.log 9 | autorestart = true 10 | 11 | [program:carbon-cache] 12 | user = root 13 | command = /opt/graphite/bin/carbon-cache.py --debug start 14 | stdout_logfile = /var/log/supervisor/%(program_name)s.log 15 | stderr_logfile = /var/log/supervisor/%(program_name)s.log 16 | autorestart = true 17 | 18 | [program:graphite-webapp] 19 | user = root 20 | directory = /opt/graphite/webapp 21 | environment = PYTHONPATH='/opt/graphite/webapp' 22 | command = /usr/bin/gunicorn -b127.0.0.1:8000 graphite.wsgi:application 23 | stdout_logfile = /var/log/supervisor/%(program_name)s.log 24 | stderr_logfile = /var/log/supervisor/%(program_name)s.log 25 | autorestart = true 26 | 27 | [program:cron] 28 | user = root 29 | command = /usr/sbin/cron -f -L15 30 | stdout_logfile = /var/log/supervisor/%(program_name)s.log 31 | stderr_logfile = /var/log/supervisor/%(program_name)s.log 32 | autostart=true 33 | autorestart=true 34 | -------------------------------------------------------------------------------- /docker/graphite-node/webapp/initial_data.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pk": 1, 4 | "model": "auth.user", 5 | "fields": { 6 | "username": "admin", 7 | "first_name": "", 8 | "last_name": "", 9 | "is_active": true, 10 | "is_superuser": true, 11 | "is_staff": true, 12 | "last_login": "2011-09-20 17:02:14", 13 | "groups": [], 14 | "user_permissions": [], 15 | "password": "sha1$1b11b$edeb0a67a9622f1f2cfeabf9188a711f5ac7d236", 16 | "email": "root@example.com", 17 | "date_joined": "2011-09-20 17:02:14" 18 | } 19 | } 20 | ] 21 | -------------------------------------------------------------------------------- /docker/graphite-node/webapp/local_settings.py: -------------------------------------------------------------------------------- 1 | ## Graphite local_settings.py 2 | # Edit this file to customize the default Graphite webapp settings 3 | # 4 | # Additional customizations to Django settings can be added to this file as well 5 | 6 | ##################################### 7 | # General Configuration # 8 | ##################################### 9 | # 10 | # Set this to a long, random unique string to use as a secret key for this 11 | # install. This key is used for salting of hashes used in auth tokens, 12 | # CRSF middleware, cookie storage, etc. This should be set identically among 13 | # instances if used behind a load balancer. 14 | #SECRET_KEY = 'UNSAFE_DEFAULT' 15 | 16 | # In Django 1.5+ set this to the list of hosts your graphite instances is 17 | # accessible as. See: 18 | # https://docs.djangoproject.com/en/dev/ref/settings/#std:setting-ALLOWED_HOSTS 19 | #ALLOWED_HOSTS = [ '*' ] 20 | 21 | # Set your local timezone (Django's default is America/Chicago) 22 | # If your graphs appear to be offset by a couple hours then this probably 23 | # needs to be explicitly set to your local timezone. 24 | #TIME_ZONE = 'America/Los_Angeles' 25 | 26 | # Set the default short date format. See strftime(3) for supported sequences. 27 | #DATE_FORMAT = '%m/%d' 28 | 29 | # Override this to provide documentation specific to your Graphite deployment 30 | #DOCUMENTATION_URL = "http://graphite.readthedocs.io/" 31 | 32 | # Logging 33 | LOG_ROTATION = True 34 | LOG_ROTATION_COUNT = 1 35 | LOG_RENDERING_PERFORMANCE = True 36 | #LOG_CACHE_PERFORMANCE = True 37 | 38 | # Enable full debug page display on exceptions (Internal Server Error pages) 39 | #DEBUG = True 40 | 41 | # If using RRD files and rrdcached, set to the address or socket of the daemon 42 | #FLUSHRRDCACHED = 'unix:/var/run/rrdcached.sock' 43 | 44 | # This lists the memcached servers that will be used by this webapp. 45 | # If you have a cluster of webapps you should ensure all of them 46 | # have the *exact* same value for this setting. That will maximize cache 47 | # efficiency. Setting MEMCACHE_HOSTS to be empty will turn off use of 48 | # memcached entirely. 49 | # 50 | # You should not use the loopback address (127.0.0.1) here if using clustering 51 | # as every webapp in the cluster should use the exact same values to prevent 52 | # unneeded cache misses. Set to [] to disable caching of images and fetched data 53 | #MEMCACHE_HOSTS = ['10.10.10.10:11211', '10.10.10.11:11211', '10.10.10.12:11211'] 54 | 55 | # Metric data and graphs are cached for one minute by default. If defined, 56 | # DEFAULT_CACHE_POLICY is a list of tuples of minimum query time ranges mapped 57 | # to the cache duration for the results. This allows for larger queries to be 58 | # cached for longer periods of times. All times are in seconds. If the policy is 59 | # empty or undefined, all results will be cached for DEFAULT_CACHE_DURATION. 60 | #DEFAULT_CACHE_DURATION = 60 # Cache images and data for 1 minute 61 | #DEFAULT_CACHE_POLICY = [(0, 60), # default is 60 seconds 62 | # (7200, 120), # >= 2 hour queries are cached 2 minutes 63 | # (21600, 180)] # >= 6 hour queries are cached 3 minutes 64 | #MEMCACHE_KEY_PREFIX = 'graphite' 65 | 66 | 67 | # This lists the memcached options. Default is an empty dict. 68 | # Accepted options depend on the Memcached implementation and the Django version. 69 | # Until Django 1.10, options are used only for pylibmc. 70 | # Starting from 1.11, options are used for both python-memcached and pylibmc. 71 | #MEMCACHE_OPTIONS = { 'socket_timeout': 0.5 } 72 | 73 | # Set URL_PREFIX when deploying graphite-web to a non-root location 74 | #URL_PREFIX = '/graphite' 75 | 76 | # Graphite uses Django Tagging to support tags in Events. By default each 77 | # tag is limited to 50 characters in length. 78 | #MAX_TAG_LENGTH = 50 79 | 80 | # Interval for the Auto-Refresh feature in the Composer, measured in seconds. 81 | #AUTO_REFRESH_INTERVAL = 60 82 | 83 | ##################################### 84 | # Filesystem Paths # 85 | ##################################### 86 | # 87 | # Change only GRAPHITE_ROOT if your install is merely shifted from /opt/graphite 88 | # to somewhere else 89 | #GRAPHITE_ROOT = '/opt/graphite' 90 | 91 | # Most installs done outside of a separate tree such as /opt/graphite will 92 | # need to change these settings. Note that the default settings for each 93 | # of these is relative to GRAPHITE_ROOT. 94 | #CONF_DIR = '/opt/graphite/conf' 95 | #STORAGE_DIR = '/opt/graphite/storage' 96 | #STATIC_ROOT = '/opt/graphite/static' 97 | #LOG_DIR = '/opt/graphite/storage/log/webapp' 98 | #INDEX_FILE = '/opt/graphite/storage/index' # Search index file 99 | 100 | # To further or fully customize the paths, modify the following. Note that the 101 | # default settings for each of these are relative to CONF_DIR and STORAGE_DIR 102 | # 103 | ## Webapp config files 104 | #DASHBOARD_CONF = '/opt/graphite/conf/dashboard.conf' 105 | #GRAPHTEMPLATES_CONF = '/opt/graphite/conf/graphTemplates.conf' 106 | 107 | ## Data directories 108 | # 109 | # NOTE: If any directory is unreadable in STANDARD_DIRS it will break metric browsing 110 | # 111 | #CERES_DIR = '/opt/graphite/storage/ceres' 112 | #WHISPER_DIR = '/opt/graphite/storage/whisper' 113 | #RRD_DIR = '/opt/graphite/storage/rrd' 114 | # 115 | # Data directories using the "Standard" metrics finder (i.e. not Ceres) 116 | #STANDARD_DIRS = [WHISPER_DIR, RRD_DIR] # Default: set from the above variables 117 | 118 | 119 | ##################################### 120 | # Email Configuration # 121 | ##################################### 122 | # 123 | # This is used for emailing rendered graphs. The default backend is SMTP. 124 | #EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' 125 | # 126 | # To drop emails on the floor, enable the Dummy backend instead. 127 | #EMAIL_BACKEND = 'django.core.mail.backends.dummy.EmailBackend' 128 | 129 | #EMAIL_HOST = 'localhost' 130 | #EMAIL_PORT = 25 131 | #EMAIL_HOST_USER = '' 132 | #EMAIL_HOST_PASSWORD = '' 133 | #EMAIL_USE_TLS = False 134 | 135 | 136 | ##################################### 137 | # Authentication Configuration # 138 | ##################################### 139 | # 140 | ## LDAP / ActiveDirectory authentication setup 141 | #USE_LDAP_AUTH = True 142 | #LDAP_SERVER = "ldap.mycompany.com" 143 | #LDAP_PORT = 389 144 | #LDAP_USE_TLS = False 145 | 146 | ## Manual URI / query setup 147 | #LDAP_URI = "ldaps://ldap.mycompany.com:636" 148 | #LDAP_SEARCH_BASE = "OU=users,DC=mycompany,DC=com" 149 | #LDAP_BASE_USER = "CN=some_readonly_account,DC=mycompany,DC=com" 150 | #LDAP_BASE_PASS = "readonly_account_password" 151 | #LDAP_USER_QUERY = "(username=%s)" #For Active Directory use "(sAMAccountName=%s)" 152 | 153 | # User DN template to use for binding (and authentication) against the 154 | # LDAP server. %(username) is replaced with the username supplied at 155 | # graphite login. 156 | #LDAP_USER_DN_TEMPLATE = "CN=%(username)s,OU=users,DC=mycompany,DC=com" 157 | 158 | # If you want to further customize the ldap connection options you should 159 | # directly use ldap.set_option to set the ldap module's global options. 160 | # For example: 161 | # 162 | #import ldap 163 | #ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW) # Use ldap.OPT_X_TLS_DEMAND to force TLS 164 | #ldap.set_option(ldap.OPT_REFERRALS, 0) # Enable for Active Directory 165 | #ldap.set_option(ldap.OPT_X_TLS_CACERTDIR, "/etc/ssl/ca") 166 | #ldap.set_option(ldap.OPT_X_TLS_CERTFILE, "/etc/ssl/mycert.pem") 167 | #ldap.set_option(ldap.OPT_X_TLS_KEYFILE, "/etc/ssl/mykey.pem") 168 | #ldap.set_option(ldap.OPT_DEBUG_LEVEL, 65535) # To enable verbose debugging 169 | # See http://www.python-ldap.org/ for further details on these options. 170 | 171 | ## REMOTE_USER authentication. See: https://docs.djangoproject.com/en/dev/howto/auth-remote-user/ 172 | #USE_REMOTE_USER_AUTHENTICATION = True 173 | 174 | # Override the URL for the login link (e.g. for django_openid_auth) 175 | #LOGIN_URL = '/account/login' 176 | 177 | 178 | ############################### 179 | # Authorization for Dashboard # 180 | ############################### 181 | # By default, there is no security on dashboards - any user can add, change or delete them. 182 | # This section provides 3 different authorization models, of varying strictness. 183 | 184 | # If set to True, users must be logged in to save or delete dashboards. Defaults to False 185 | #DASHBOARD_REQUIRE_AUTHENTICATION = True 186 | 187 | # If set to the name of a user group, dashboards can be saved and deleted by any user in this 188 | # group. Groups can be set in the Django Admin app, or in LDAP. Defaults to None. 189 | # NOTE: Ignored if DASHBOARD_REQUIRE_AUTHENTICATION is not set 190 | #DASHBOARD_REQUIRE_EDIT_GROUP = 'dashboard-editors-group' 191 | 192 | # If set to True, dashboards can be saved or deleted by any user having the appropriate 193 | # (change or delete) permission (as set in the Django Admin app). Defaults to False 194 | # NOTE: Ignored if DASHBOARD_REQUIRE_AUTHENTICATION is not set 195 | #DASHBOARD_REQUIRE_PERMISSIONS = True 196 | 197 | 198 | ########################## 199 | # Database Configuration # 200 | ########################## 201 | # 202 | # By default sqlite is used. If you cluster multiple webapps you will need 203 | # to setup an external database (such as MySQL) and configure all of the webapp 204 | # instances to use the same database. Note that this database is only used to store 205 | # Django models such as saved graphs, dashboards, user preferences, etc. 206 | # Metric data is not stored here. 207 | # 208 | # DO NOT FORGET TO RUN MIGRATIONS AFTER SETTING UP A NEW DATABASE 209 | # http://graphite.readthedocs.io/en/latest/config-database-setup.html 210 | # 211 | # 212 | # The following built-in database engines are available: 213 | # django.db.backends.postgresql_psycopg2 214 | # django.db.backends.mysql 215 | # django.db.backends.sqlite3 216 | # django.db.backends.oracle 217 | # 218 | # The default is 'django.db.backends.sqlite3' with file 'graphite.db' 219 | # located in STORAGE_DIR 220 | # 221 | #DATABASES = { 222 | # 'default': { 223 | # 'NAME': '/opt/graphite/storage/graphite.db', 224 | # 'ENGINE': 'django.db.backends.sqlite3', 225 | # 'USER': '', 226 | # 'PASSWORD': '', 227 | # 'HOST': '', 228 | # 'PORT': '' 229 | # } 230 | #} 231 | # 232 | 233 | 234 | ######################### 235 | # Cluster Configuration # 236 | ######################### 237 | # 238 | # To avoid excessive DNS lookups you want to stick to using IP addresses only 239 | # in this entire section. 240 | # 241 | 242 | # This should list the IP address (and optionally port) of the webapp on each 243 | # remote server in the cluster. These servers must each have local access to 244 | # metric data. Note that the first server to return a match for a query will be 245 | # used. 246 | #CLUSTER_SERVERS = ["node-1:80", "node-2:80", "node-3:80"] 247 | 248 | # This settings control wether https is used to communicate between cluster members 249 | #INTRACLUSTER_HTTPS = False 250 | 251 | # These are timeout values (in seconds) for requests to remote webapps 252 | #REMOTE_FIND_TIMEOUT = 3.0 # Timeout for metric find requests 253 | #REMOTE_FETCH_TIMEOUT = 3.0 # Timeout to fetch series data 254 | #REMOTE_RETRY_DELAY = 60.0 # Time before retrying a failed remote webapp 255 | 256 | # Try to detect when a cluster server is localhost and don't forward queries 257 | #REMOTE_EXCLUDE_LOCAL = False 258 | 259 | # Number of retries for a specific remote data fetch. 260 | #MAX_FETCH_RETRIES = 2 261 | 262 | #FIND_CACHE_DURATION = 300 # Time to cache remote metric find results 263 | # If the query doesn't fall entirely within the FIND_TOLERANCE window 264 | # we disregard the window. This prevents unnecessary remote fetches 265 | # caused when carbon's cache skews node.intervals, giving the appearance 266 | # remote systems have data we don't have locally, which we probably do. 267 | #FIND_TOLERANCE = 2 * FIND_CACHE_DURATION 268 | 269 | # During a rebalance of a consistent hash cluster, after a partition event on a replication > 1 cluster, 270 | # or in other cases we might receive multiple TimeSeries data for a metric key. Merge them together rather 271 | # that choosing the "most complete" one (pre-0.9.14 behaviour). 272 | #REMOTE_STORE_MERGE_RESULTS = True 273 | 274 | ## Remote rendering settings 275 | # Set to True to enable rendering of Graphs on a remote webapp 276 | #REMOTE_RENDERING = True 277 | # List of IP (and optionally port) of the webapp on each remote server that 278 | # will be used for rendering. Note that each rendering host should have local 279 | # access to metric data or should have CLUSTER_SERVERS configured 280 | #RENDERING_HOSTS = [] 281 | #REMOTE_RENDER_CONNECT_TIMEOUT = 1.0 282 | 283 | # If you are running multiple carbon-caches on this machine (typically behind 284 | # a relay using consistent hashing), you'll need to list the ip address, cache 285 | # query port, and instance name of each carbon-cache instance on the local 286 | # machine (NOT every carbon-cache in the entire cluster). The default cache 287 | # query port is 7002 and a common scheme is to use 7102 for instance b, 7202 288 | # for instance c, etc. 289 | # If you're using consistent hashing, please keep an order of hosts the same as 290 | # order of DESTINATIONS in your relay - otherways you'll get cache misses. 291 | # 292 | # You *should* use 127.0.0.1 here in most cases. 293 | # 294 | #CARBONLINK_HOSTS = ["127.0.0.1:7002:a", "127.0.0.1:7102:b", "127.0.0.1:7202:c"] 295 | #CARBONLINK_TIMEOUT = 1.0 296 | #CARBONLINK_RETRY_DELAY = 15 # Seconds to blacklist a failed remote server 297 | # 298 | 299 | # Type of metric hashing function. 300 | # The default `carbon_ch` is Graphite's traditional consistent-hashing implementation. 301 | # Alternatively, you can use `fnv1a_ch`, which supports the Fowler-Noll-Vo hash 302 | # function (FNV-1a) hash implementation offered by the carbon-c-relay project 303 | # https://github.com/grobian/carbon-c-relay 304 | # 305 | # Supported values: carbon_ch, fnv1a_ch 306 | # 307 | #CARBONLINK_HASHING_TYPE = 'carbon_ch' 308 | 309 | # A "keyfunc" is a user-defined python function that is given a metric name 310 | # and returns a string that should be used when hashing the metric name. 311 | # This is important when your hashing has to respect certain metric groupings. 312 | #CARBONLINK_HASHING_KEYFUNC = "/opt/graphite/bin/keyfuncs.py:my_keyfunc" 313 | 314 | # Prefix for internal carbon statistics. 315 | #CARBON_METRIC_PREFIX='carbon' 316 | 317 | # The replication factor to use with consistent hashing. 318 | # This should usually match the value configured in Carbon. 319 | #REPLICATION_FACTOR = 1 320 | 321 | 322 | ##################################### 323 | # Additional Django Settings # 324 | ##################################### 325 | # Uncomment the following line for direct access to Django settings such as 326 | # MIDDLEWARE_CLASSES or APPS 327 | #from graphite.app_settings import * 328 | 329 | 330 | -------------------------------------------------------------------------------- /docker/statsd-daemon/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:8-alpine 2 | RUN apk add --update git 3 | RUN mkdir -p /app 4 | WORKDIR /app 5 | RUN git clone https://github.com/etsy/statsd.git 6 | WORKDIR /app/statsd 7 | COPY config.js . 8 | 9 | EXPOSE 8125/udp 8126 10 | 11 | CMD ["node", "stats.js", "config.js"] 12 | -------------------------------------------------------------------------------- /docker/statsd-daemon/config.js: -------------------------------------------------------------------------------- 1 | /* 2 | Graphite Required Variable: 3 | 4 | (Leave this unset to avoid sending stats to Graphite. 5 | Set debug flag and leave this unset to run in 'dry' debug mode - 6 | useful for testing statsd clients without a Graphite server.) 7 | 8 | graphiteHost: hostname or IP of Graphite server 9 | 10 | Optional Variables: 11 | 12 | graphitePort: port for the graphite text collector [default: 2003] 13 | graphitePicklePort: port for the graphite pickle collector [default: 2004] 14 | graphiteProtocol: either 'text' or 'pickle' [default: 'text'] 15 | backends: an array of backends to load. Each backend must exist 16 | by name in the directory backends/. If not specified, 17 | the default graphite backend will be loaded. 18 | * example for console and graphite: 19 | [ "./backends/console", "./backends/graphite" ] 20 | 21 | servers: an array of server configurations. 22 | If not specified, the server, address, 23 | address_ipv6, and port top-level configuration 24 | options are used to configure a single server for 25 | backwards-compatibility 26 | Each server configuration supports the following keys: 27 | server: the server to load. The server must exist by name in the directory 28 | servers/. If not specified, the default udp server will be loaded. 29 | * example for tcp server: 30 | "./servers/tcp" 31 | address: address to listen on [default: 0.0.0.0] 32 | address_ipv6: defines if the address is an IPv4 or IPv6 address [true or false, default: false] 33 | port: port to listen for messages on [default: 8125] 34 | socket: (only for tcp servers) path to unix domain socket which will be used to receive 35 | metrics [default: undefinded] 36 | socket_mod: (only for tcp servers) file mode which should be applied to unix domain socket, relevant 37 | only if socket option is used [default: undefined] 38 | 39 | debug: debug flag [default: false] 40 | mgmt_address: address to run the management TCP interface on 41 | [default: 0.0.0.0] 42 | mgmt_port: port to run the management TCP interface on [default: 8126] 43 | title: Allows for overriding the process title. [default: statsd] 44 | if set to false, will not override the process title and let the OS set it. 45 | The length of the title has to be less than or equal to the binary name + cli arguments 46 | NOTE: This does not work on Mac's with node versions prior to v0.10 47 | 48 | healthStatus: default health status to be returned and statsd process starts ['up' or 'down', default: 'up'] 49 | dumpMessages: log all incoming messages 50 | flushInterval: interval (in ms) to flush metrics to each backend 51 | percentThreshold: for time information, calculate the Nth percentile(s) 52 | (can be a single value or list of floating-point values) 53 | negative values mean to use "top" Nth percentile(s) values 54 | [%, default: 90] 55 | flush_counts: send stats_counts metrics [default: true] 56 | 57 | keyFlush: log the most frequently sent keys [object, default: undefined] 58 | interval: how often to log frequent keys [ms, default: 0] 59 | percent: percentage of frequent keys to log [%, default: 100] 60 | log: location of log file for frequent keys [default: STDOUT] 61 | deleteIdleStats: don't send values to graphite for inactive counters, sets, gauges, or timers 62 | as opposed to sending 0. For gauges, this unsets the gauge (instead of sending 63 | the previous value). Can be individually overriden. [default: false] 64 | deleteGauges: don't send values to graphite for inactive gauges, as opposed to sending the previous value [default: false] 65 | deleteTimers: don't send values to graphite for inactive timers, as opposed to sending 0 [default: false] 66 | deleteSets: don't send values to graphite for inactive sets, as opposed to sending 0 [default: false] 67 | deleteCounters: don't send values to graphite for inactive counters, as opposed to sending 0 [default: false] 68 | prefixStats: prefix to use for the statsd statistics data for this running instance of statsd [default: statsd] 69 | applies to both legacy and new namespacing 70 | keyNameSanitize: sanitize all stat names on ingress [default: true] 71 | If disabled, it is up to the backends to sanitize keynames 72 | as appropriate per their storage requirements. 73 | 74 | console: 75 | prettyprint: whether to prettyprint the console backend 76 | output [true or false, default: true] 77 | 78 | log: log settings [object, default: undefined] 79 | backend: where to log: stdout or syslog [string, default: stdout] 80 | application: name of the application for syslog [string, default: statsd] 81 | level: log level for [node-]syslog [string, default: LOG_INFO] 82 | 83 | graphite: 84 | legacyNamespace: use the legacy namespace [default: true] 85 | globalPrefix: global prefix to use for sending stats to graphite [default: "stats"] 86 | prefixCounter: graphite prefix for counter metrics [default: "counters"] 87 | prefixTimer: graphite prefix for timer metrics [default: "timers"] 88 | prefixGauge: graphite prefix for gauge metrics [default: "gauges"] 89 | prefixSet: graphite prefix for set metrics [default: "sets"] 90 | globalSuffix: global suffix to use for sending stats to graphite [default: ""] 91 | This is particularly useful for sending per host stats by 92 | settings this value to: require('os').hostname().split('.')[0] 93 | 94 | repeater: an array of hashes of the for host: and port: 95 | that details other statsd servers to which the received 96 | packets should be "repeated" (duplicated to). 97 | e.g. [ { host: '10.10.10.10', port: 8125 }, 98 | { host: 'observer', port: 88125 } ] 99 | 100 | repeaterProtocol: whether to use udp4, udp6, or tcp for repeaters. 101 | ["udp4," "udp6", or "tcp" default: "udp4"] 102 | 103 | histogram: for timers, an array of mappings of strings (to match metrics) and 104 | corresponding ordered non-inclusive upper limits of bins. 105 | For all matching metrics, histograms are maintained over 106 | time by writing the frequencies for all bins. 107 | 'inf' means infinity. A lower limit of 0 is assumed. 108 | default: [], meaning no histograms for any timer. 109 | First match wins. examples: 110 | * histogram to only track render durations, with unequal 111 | class intervals and catchall for outliers: 112 | [ { metric: 'render', bins: [ 0.01, 0.1, 1, 10, 'inf'] } ] 113 | * histogram for all timers except 'foo' related, 114 | equal class interval and catchall for outliers: 115 | [ { metric: 'foo', bins: [] }, 116 | { metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ] 117 | 118 | automaticConfigReload: whether to watch the config file and reload it when it 119 | changes. The default is true. Set this to false to disable. 120 | */ 121 | { 122 | mgmt_port: 8126, 123 | servers: [ {server: "./servers/udp", port: 8125} ], 124 | backends: [ "./backends/graphite" ], 125 | graphiteHost: "carbon-relay", 126 | graphiteProtocol: "pickle", 127 | graphitePort: 2003, 128 | graphitePicklePort: 2004, 129 | flushInterval: 5000, 130 | deleteIdleStats: true, 131 | graphite: 132 | { 133 | legacyNamespace: false, 134 | prefixCounter: "counters", 135 | prefixTimer: "timers", 136 | prefixGauge: "gauges" 137 | } 138 | } 139 | 140 | -------------------------------------------------------------------------------- /docker/statsd-proxy/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:8-alpine 2 | RUN apk add --update git bash curl jq 3 | RUN mkdir -p /app 4 | WORKDIR /app 5 | RUN git clone https://github.com/etsy/statsd.git 6 | WORKDIR /app/statsd 7 | RUN npm install -g forever 8 | RUN npm install hashring kubernetes-client@5 json-stream --save 9 | COPY * ./ 10 | 11 | EXPOSE 8125/udp 8126 12 | 13 | CMD ["forever", "statsd-proxy.json"] 14 | -------------------------------------------------------------------------------- /docker/statsd-proxy/kube-watch.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const util = require('util'); 3 | const Client = require('kubernetes-client').Client; 4 | const config = require('kubernetes-client').config; 5 | const client = new Client({ config: config.getInCluster() }); 6 | const JSONStream = require('json-stream'); 7 | const jsonStream = new JSONStream(); 8 | const configFilePath = "./proxyConfig.js" 9 | const namespace = fs.readFileSync('/var/run/secrets/kubernetes.io/serviceaccount/namespace', 'utf8').toString(); 10 | 11 | function getNodes(endpoints) { 12 | return endpoints.subsets ? endpoints.subsets[0].addresses.map(e => ({ host: e.ip, port: 8125, adminport: 8126 })) : []; 13 | } 14 | 15 | function changeConfig(endpoints) { 16 | currentConfig = fs.readFileSync(configFilePath); 17 | eval("currentConfig = " + currentConfig); 18 | currentConfig.nodes = getNodes(endpoints); 19 | fs.writeFileSync(configFilePath, util.inspect(currentConfig)); 20 | } 21 | 22 | async function main() { 23 | await client.loadSpec(); 24 | const stream = client.apis.v1.ns(namespace).endpoints.getStream({ qs: { watch: true, fieldSelector: 'metadata.name=statsd-daemon' } }); 25 | stream.pipe(jsonStream); 26 | jsonStream.on('data', obj => { 27 | if (!obj) { 28 | return; 29 | } 30 | console.log('Received update:', JSON.stringify(obj)); 31 | changeConfig(obj.object); 32 | }); 33 | } 34 | 35 | try { 36 | main(); 37 | } catch (error) { 38 | console.error(error); 39 | process.exit(1); 40 | } 41 | -------------------------------------------------------------------------------- /docker/statsd-proxy/proxyConfig.js: -------------------------------------------------------------------------------- 1 | { 2 | nodes: [], 3 | server: './servers/udp', 4 | host: '0.0.0.0', 5 | port: 8125, 6 | mgmt_port: 8126, 7 | forkCount: 0, 8 | checkInterval: 1000, 9 | cacheSize: 10000 10 | } 11 | 12 | -------------------------------------------------------------------------------- /docker/statsd-proxy/statsd-proxy.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "uid": "kube-watch", 4 | "append": true, 5 | "script": "kube-watch.js" 6 | }, 7 | { 8 | "uid": "statsd-proxy", 9 | "append": true, 10 | "watch": true, 11 | "script": "proxy.js", 12 | "args": ["proxyConfig.js"] 13 | } 14 | ] 15 | -------------------------------------------------------------------------------- /kube/carbon-relay/dep.yml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{APP_NAME}} 5 | spec: 6 | replicas: {{REPLICAS}} 7 | template: 8 | metadata: 9 | labels: 10 | component: graphite 11 | app: {{APP_NAME}} 12 | spec: 13 | containers: 14 | - name: {{APP_NAME}} 15 | image: {{IMAGE_NAME}} 16 | ports: 17 | - containerPort: 2003 18 | name: plaintext 19 | protocol: TCP 20 | - containerPort: 2004 21 | name: pickle 22 | protocol: TCP 23 | serviceAccountName: graphite-cluster-sa 24 | {{ADDITIONAL_YAML}} 25 | -------------------------------------------------------------------------------- /kube/carbon-relay/svc.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{APP_NAME}} 5 | spec: 6 | selector: 7 | app: {{APP_NAME}} 8 | ports: 9 | - port: 2003 10 | name: plaintext 11 | targetPort: 2003 12 | protocol: TCP 13 | - port: 2004 14 | name: pickle 15 | targetPort: 2004 16 | protocol: TCP 17 | -------------------------------------------------------------------------------- /kube/graphite-master/dep.yml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{APP_NAME}} 5 | spec: 6 | replicas: {{REPLICAS}} 7 | template: 8 | metadata: 9 | labels: 10 | component: graphite 11 | app: {{APP_NAME}} 12 | spec: 13 | containers: 14 | - name: {{APP_NAME}} 15 | image: {{IMAGE_NAME}} 16 | resources: 17 | requests: 18 | cpu: 300m 19 | memory: 512Mi 20 | ports: 21 | - containerPort: 80 22 | name: http 23 | protocol: TCP 24 | serviceAccountName: graphite-cluster-sa 25 | -------------------------------------------------------------------------------- /kube/graphite-master/svc.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{APP_NAME}} 5 | labels: 6 | app: {{APP_NAME}} 7 | spec: 8 | type: ClusterIP 9 | selector: 10 | app: {{APP_NAME}} 11 | ports: 12 | - port: 80 13 | name: http 14 | protocol: TCP 15 | -------------------------------------------------------------------------------- /kube/graphite-node/stateful.set.yml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1beta1 2 | kind: StatefulSet 3 | metadata: 4 | name: {{APP_NAME}} 5 | spec: 6 | serviceName: {{APP_NAME}} 7 | podManagementPolicy: Parallel 8 | replicas: {{REPLICAS}} 9 | template: 10 | metadata: 11 | labels: 12 | component: graphite 13 | app: {{APP_NAME}} 14 | spec: 15 | terminationGracePeriodSeconds: 10 16 | containers: 17 | - name: {{APP_NAME}} 18 | image: {{IMAGE_NAME}} 19 | env: 20 | - name: CURATOR_RETENTION 21 | value: "{{CURATOR_RETENTION}}" 22 | resources: 23 | requests: 24 | cpu: 700m 25 | memory: 6Gi 26 | ports: 27 | - containerPort: 80 28 | name: http 29 | protocol: TCP 30 | - containerPort: 2004 31 | name: pickle 32 | protocol: TCP 33 | volumeMounts: 34 | - name: graphite-node-pv 35 | mountPath: /opt/graphite/storage/whisper 36 | {{ADDITIONAL_YAML}} 37 | volumeClaimTemplates: 38 | - metadata: 39 | name: graphite-node-pv 40 | spec: 41 | accessModes: [ "ReadWriteOnce" ] 42 | storageClassName: "{{STORAGE_CLASS}}" 43 | resources: 44 | requests: 45 | storage: "{{DISK_SIZE}}" 46 | -------------------------------------------------------------------------------- /kube/graphite-node/svc.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{APP_NAME}} 5 | labels: 6 | app: {{APP_NAME}} 7 | spec: 8 | clusterIP: None 9 | selector: 10 | app: {{APP_NAME}} 11 | ports: 12 | - port: 80 13 | name: http 14 | protocol: TCP 15 | - port: 2004 16 | name: pickle 17 | protocol: TCP 18 | -------------------------------------------------------------------------------- /kube/rbac/role-binding.yml: -------------------------------------------------------------------------------- 1 | apiVersion: {{RBAC_API_VERSION}} 2 | kind: RoleBinding 3 | metadata: 4 | name: read-endpoints 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: endpoints-reader 9 | subjects: 10 | - kind: ServiceAccount 11 | name: graphite-cluster-sa 12 | -------------------------------------------------------------------------------- /kube/rbac/role.yml: -------------------------------------------------------------------------------- 1 | apiVersion: {{RBAC_API_VERSION}} 2 | kind: Role 3 | metadata: 4 | name: endpoints-reader 5 | rules: 6 | - apiGroups: [""] 7 | resources: ["endpoints"] 8 | verbs: ["get", "watch", "list"] -------------------------------------------------------------------------------- /kube/rbac/serviceaccount.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: graphite-cluster-sa 5 | -------------------------------------------------------------------------------- /kube/statsd-daemon/dep.yml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{APP_NAME}} 5 | spec: 6 | replicas: {{REPLICAS}} 7 | template: 8 | metadata: 9 | labels: 10 | component: graphite 11 | app: {{APP_NAME}} 12 | spec: 13 | containers: 14 | - name: {{APP_NAME}} 15 | image: {{IMAGE_NAME}} 16 | resources: 17 | requests: 18 | cpu: 900m 19 | memory: 256Mi 20 | ports: 21 | - containerPort: 8125 22 | name: incoming-udp 23 | protocol: UDP 24 | - containerPort: 8126 25 | name: admin 26 | protocol: TCP 27 | {{ADDITIONAL_YAML}} 28 | -------------------------------------------------------------------------------- /kube/statsd-daemon/svc.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{APP_NAME}} 5 | labels: 6 | app: {{APP_NAME}} 7 | spec: 8 | selector: 9 | app: {{APP_NAME}} 10 | clusterIP: None 11 | ports: 12 | - port: 8125 13 | name: incoming-udp 14 | protocol: UDP 15 | - port: 8126 16 | name: admin 17 | protocol: TCP 18 | 19 | -------------------------------------------------------------------------------- /kube/statsd-proxy/dep.yml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: {{APP_NAME}} 5 | spec: 6 | replicas: {{REPLICAS}} 7 | template: 8 | metadata: 9 | labels: 10 | component: graphite 11 | app: {{APP_NAME}} 12 | spec: 13 | containers: 14 | - name: {{APP_NAME}} 15 | image: {{IMAGE_NAME}} 16 | resources: 17 | requests: 18 | cpu: 600m 19 | memory: 256Mi 20 | ports: 21 | - containerPort: 8125 22 | name: incoming-udp 23 | protocol: UDP 24 | serviceAccountName: graphite-cluster-sa 25 | {{ADDITIONAL_YAML}} 26 | -------------------------------------------------------------------------------- /kube/statsd-proxy/svc.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{APP_NAME}} 5 | spec: 6 | type: ClusterIP 7 | selector: 8 | app: {{APP_NAME}} 9 | ports: 10 | - port: 8125 11 | name: default-udp 12 | targetPort: 8125 13 | protocol: UDP 14 | - port: 8188 15 | name: backward-compatible-udp 16 | targetPort: 8125 17 | protocol: UDP 18 | --------------------------------------------------------------------------------