├── CODE_OF_CONDUCT.md
├── LICENSE.md
├── README.md
├── example-workloads.yaml
├── example
    ├── kustomization.yaml
    └── workloads
    │   ├── grpc
    │       ├── deployment.yaml
    │       ├── grpc-load.sh
    │       ├── service.yaml
    │       └── slo-grpc.yaml
    │   ├── kustomization.yaml
    │   ├── load-simulations
    │       ├── kustomization.yaml
    │       ├── todo-backend-django-valence.yaml
    │       └── todo-backend-django.yaml
    │   ├── slo-webapps.yaml
    │   ├── todo-backend-django-valence
    │       ├── deployment.yaml
    │       ├── kustomization.yaml
    │       └── service.yaml
    │   └── todo-backend-django
    │       ├── deployment.yaml
    │       ├── kustomization.yaml
    │       └── service.yaml
├── how-it-works.jpg
├── makefile
├── manifests
    ├── kustomization.yaml
    └── valence
    │   ├── grafana
    │       ├── configMap.yaml
    │       ├── dashboard-valence.yaml
    │       ├── deployment.yaml
    │       ├── kustomization.yaml
    │       └── service.yaml
    │   ├── kustomization.yaml
    │   ├── operator
    │       ├── crds.yaml
    │       ├── deployment.yaml
    │       ├── kustomization.yaml
    │       ├── namespace.yaml
    │       ├── rbac.yaml
    │       └── service.yaml
    │   └── prometheus
    │       ├── config-map.yaml
    │       ├── kustomization.yaml
    │       ├── prometheus-service-accounts.yaml
    │       ├── service.yaml
    │       └── stateful-set.yaml
├── tooling.yaml
├── tooling
    ├── kube-state-metrics
    │   ├── deployment.yaml
    │   ├── kustomization.yaml
    │   ├── rbac.yaml
    │   └── service.yaml
    ├── kustomization.base.yaml
    ├── kustomization.yaml
    └── metrics-server
    │   ├── api-service.yaml
    │   ├── auth-delegator.yaml
    │   ├── auth-reader.yaml
    │   ├── deployment.yaml
    │   ├── kustomization.yaml
    │   ├── resource-reader.yaml
    │   └── service.yaml
└── valence.yaml


/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behaviour that contributes to creating a positive environment include:
10 | 
11 | Using welcoming and inclusive language
12 | 
13 | Being respectful of differing viewpoints and experiences
14 | 
15 | Gracefully accepting constructive criticism
16 | 
17 | Focusing on what is best for the community
18 | 
19 | Showing empathy towards other community members
20 | 
21 | Examples of unacceptable behaviour by participants include:
22 | 
23 | The use of sexualized language or imagery and unwelcome sexual attention or advances
24 | 
25 | Trolling, insulting/derogatory comments, and personal or political attacks
26 | 
27 | Public or private harassment
28 | 
29 | Publishing others' private information, such as a physical or electronic address, without explicit permission
30 | 
31 | Other conduct which could reasonably be considered inappropriate in a professional setting
32 | 
33 | ## Our Responsibilities
34 | 
35 | Project maintainers are responsible for clarifying the standards of acceptable behaviour and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour.
36 | 
37 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviours that they deem inappropriate, threatening, offensive, or harmful.
38 | 
39 | ## Scope
40 | 
41 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include 
42 | 
43 | using an official project e-mail address,
44 | 
45 | posting via an official social media account or acting as an appointed representative at an online or offline event
46 | 
47 | Representation of a project may be further defined and clarified by project maintainers.
48 | 
49 | ## Enforcement
50 | 
51 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at [hello@manifold.co](mailto:hello@manifold.co). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
52 | 
53 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
54 | 
55 | ## Attribution
56 | 
57 | This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at [http://contributor-covenant.org/version/1/4](http://contributor-covenant.org/version/1/4).


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, Arigato Machine Inc.
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # [Valence](https://valence.net/?utm_source=github&utm_medium=referral) 🚀🤖
  2 | 
  3 | ![how it works](./how-it-works.jpg)
  4 | 
  5 | ## TOC
  6 | 
  7 | 1. [Introduction](#introduction)
  8 | 2. [How to get started](#how-to-get-started)
  9 | 3. [Installation](#installation)
 10 | 4. [Using Valence](#using-valence)
 11 | 5. [Testing Valence with Example Workloads](#example-workloads)
 12 | 
 13 | ## Introduction
 14 | 
 15 | [Valence](https://valence.net/?utm_source=github&utm_medium=referral) is an autoscaler operator for Kubernetes for right sizing and autoscaling containers intelligently to meet performance objectives. It learns how applications behave and optimizes resources according to defined Service Level Objectives manifests. Valence manages bidirectional pod autoscaling in order to ensure maximum utility of your cluster without performance degredation. Valence is **not a replacement of Vertical Pod or Horizontal Pod Autoscalers but an operator that reconciles the two** and will autoconfigure them based on application behaviour.
 16 | 
 17 | ## How it works
 18 | 
 19 | Valence is based on the notion of Declarative Performance. We believe you should be able to declare performance objectives and have an operator (Valence) which figures out how to autoscale, right size, and pack your Kubernetes resources. In contrast, current Kubernetes scaling and performance management tools are largely imperative requiring overhead to determine right size, autoscaling metrics, related configuration. Since code, traffic, and node utilization changes - we believe this should be managed automatically by an operator, rather than by manual calculation and intervention. We also think the right unit of scaling isn't utilization or metrics thresholds but based, dynamically, on how applications behavour (utilization) responds to its use (such as HTTP or gRPC Requests).
 20 | 
 21 | ## Declarative Performance: The Service Level Objective Manifest
 22 | 
 23 | Like Kubernetes goes and figures out how to get a Deployment running with replica sets and pods and has a controller figuring out how to maintain the declared state of the Deployment, Valence goes and figures out how to maintain, and continue to maintain, the declared performance with ServiceLevelObjective manifests.
 24 | 
 25 | Use these [ServiceLevelObjective](./example/workloads/slo-webapps.yaml) objects to manage your applications performance, right sizing, and autoscaling instead of setting all that up manually. This is the main interface for Operators to use Valence.
 26 | 
 27 | **What if I don't have SLOs for my application!!**
 28 | Most people don't have formal SLOs ([read more here](https://landing.google.com/sre/sre-book/chapters/service-level-objectives/)) that they have built tooling around for monitoring let alone management by them. We see this as a chance to start using them. They are a great abstraction for declaring the core performance requirements of your application which you will have even if you don't have formal SLOs.
 29 | 
 30 | ```
 31 | apiVersion: optimizer.valence.io/v1alpha1
 32 | kind: ServiceLevelObjective
 33 | metadata:
 34 |   name: slo-webapps
 35 | spec:
 36 |   # First we define a selector.
 37 |   # We use this to label deployments to tell Valence to meet the following objectives for those [deployments.](https://github.com/valencenet/valence-manifests/blob/master/example/workloads/todo-backend-django-valence/deployment.yaml#L7)
 38 |   selector:
 39 |     slo: slo-webapps
 40 | 
 41 |   # Now we declare our objectives. So far we only have HTTP objectives. We are working on a bunch more, let us know if you have ideas.
 42 |   objectives:
 43 |     # The http objective consists of ideal latency for a percentile at a throughput.
 44 |     # Omit throughput if you want to maintain that latency no matter the throughput (ie. autoscaling v. rightsizing)
 45 |     - type: HTTP
 46 |       http:
 47 |         latency:
 48 |           # Valid values are 99, 95, 90, 75, 50.
 49 |           percentile: 99
 50 |           # The ideal response time for that percentile.
 51 |           responseTime: 100ms
 52 |         # This is throughput of queries per minute.
 53 |         # Omit this for autoscaling (ie. latency objective valid for all throughputs).
 54 |         throughput: 500
 55 | ```
 56 | 
 57 | **See example deployment set up in example/workloads**
 58 | 
 59 | ## Want to get started quickly with example workloads?
 60 | 
 61 | - start on a fresh cluster such as docker-for-desktop or a testing instance of GKE
 62 | - Clone the Valence repo: `git clone https://github.com/valencenet/valence-manifests`
 63 | - _if your cluster already has metrics-server (GKE does by default)_ run `make tooling-no-ms`
 64 | - Apply the Tooling (Metrics server (if don't have) and Kube-state-metrics): `kubectl apply -f tooling.yaml`
 65 | - Apply the Valence system: `kubectl apply -f valence.yaml`
 66 | - Apply the Example workloads and tooling: `kubectl apply -f example-workloads.yaml`
 67 | - View results!
 68 | - - `kubectl proxy svc/grafana -n valence-system &`
 69 | - - `open http://localhost:8001/api/v1/namespaces/valence-system/services/grafana/proxy`
 70 |     Authentication is Grafana Default: username: admin, password: admin
 71 |     Recommendations for Replicas, Requests and Limits, and live changes to those should start coming in 5-20 minutes.
 72 | 
 73 | ## How to get started
 74 | 
 75 | In order to get the most of out Valence, we recommend starting with Valence in recommendation mode. This will help you understand the configuration options of Valence, before going into Live mode where Valence takes control of your deployments resourcing and scaling on your behalf.
 76 | 
 77 | Once Valence is installed, it will be sending metrics data remotely to the Valence server for our analysis and improvement. If you'd like to opt-out of data collection follow instructions during installation
 78 | 
 79 | **Step 1 - Installation:**
 80 | Follow the installation instructions below (full support from the Valence team will be available @ info@valence.net)
 81 | 
 82 | **Step 2 - Recommendation Mode:**
 83 | Pick a few deployments you’d like to see recommendations being made on and write SLO manifests for them.
 84 | We recommend you observe Valence recommendations for a couple days at this point. Please discuss any concerns you may have or feedback with the Valence team as you are observing recommendations. During this period you should manually use those recommendations as you please.
 85 | **Note: our prometheus only retains data for 6 hours so you will have to make your observations accordingly**
 86 | 
 87 | **Step 3 - Live Mode, limited deployments:**
 88 | Now we recommend you let Valence take full control of those deployments by [using Valence Annotations](#using-valence-annotations). Again take a couple days to observe how Valence is operating those deployments and direct any feedback to the Valence team.
 89 | 
 90 | **Step 4 - Full roll out:**
 91 | Add more deployments for recommendations or management by Valence.
 92 | 
 93 | ## Installation
 94 | 
 95 | Installing Valence:
 96 | 
 97 | 1. [Installing Valence Operator](#installing-valence-operator)
 98 | 2. [Preparing Deployments and Services for Operation by Valence](#preparing-deployments-and-services-for-operation-by-valence)
 99 | 3. [Setting SLOs](#setting-slos)
100 | 
101 | ### Installing Valence Operator
102 | 
103 | Valence is an operator that lives in its own namespace with all the tools it needs.
104 | 
105 | You will need to have the following components installed to use Valence.
106 | If you don't have these, you can take a look at the tooling manifests for examples.
107 | 
108 | **Prerequests:**
109 | 
110 | - [metrics-server](https://github.com/kubernetes-incubator/metrics-server)
111 | - Scrapable [kube-state-metrics](https://github.com/kubernetes/kube-state-metrics) with the following service label: `app: kube-state-metrics` **Note:** This component is only necessary for supplementing our dashboard if you don't need existing deploy information in the dashboard than its optional.
112 | - **Note:** If you have limited your already existing kube-state-metrics, ensure the following metrics from kube-state-metrics are available:
113 | 
114 | ```
115 | kube_pod_container_resource_requests_memory_byte,
116 | kube_pod_container_resource_limits_memory_bytes,
117 | kube_pod_container_resource_requests_cpu_cores,
118 | kube_pod_container_resource_limits_cpu_cores,
119 | kube_deployment_status_replicas_available
120 | ```
121 | 
122 | **Note** These tools are available in tooling and `./tooling.yaml` if you want to install them from here.
123 | 
124 | **Valence can be installed:**
125 | 
126 | - **Opt out of data collection** You can opt out of data collection (ie. for on-prem) by turning the `DATA_OPT_OUT` flag on the valence operator from "false" to be "true" here: https://github.com/valencenet/valence-manifests/blob/master/manifests/valence/operator/deployment.yaml#L18
127 | 
128 | - **Free** by adding an email as your license and applying valence.
129 | 
130 | ```
131 | make valence LICENSE=<YOUR.EMAIL>
132 | kubectl apply -f valence.yaml
133 | ```
134 | 
135 | - **License** by adding your license key you provisioned through during sign up on manifold and applying valence.
136 | 
137 | ```
138 | make valence LICENSE=<YOUR.LICENSE.KEY>
139 | kubectl apply -f valence.yaml
140 | ```
141 | 
142 | Valence can be removed by deleting valence.yaml
143 | 
144 | ```
145 | kubectl delete -f valence.yaml
146 | ```
147 | 
148 | Components installed in valence-system namespace:
149 | 
150 | - Prometheus (Valence’s own managed Prometheus)
151 | - Grafana with Valence Dashboards (Valence’s own managed Grafana)
152 | - Valence Operator
153 | 
154 | If you need to **modify** these files you can use the make commands to recompile the manifests. (ie. `make valence`)
155 | 
156 | ### Preparing Deployments and Services for Operation by Valence
157 | 
158 | There are five steps to operating a deployment with Valence.
159 | 
160 | **1) Write a SLO for a deployment or group of deployments**
161 | 
162 | Example: [slo-webapps.yaml](./example/workloads/slo-webapps.yaml)
163 | 
164 | ```
165 | apiVersion: optimizer.valence.io/v1alpha1
166 | kind: ServiceLevelObjective
167 | metadata:
168 |   name: slo-webapps
169 | spec:
170 |   # First we define a selector.
171 |   # We use this to label deployments to tell Valence to meet the following objectives for those [deployments.](https://github.com/valencenet/valence-manifests/blob/master/example/workloads/todo-backend-django-valence/deployment.yaml#L7)
172 |   selector:
173 |     slo: slo-webapps
174 |   objectives:
175 |     - type: HTTP
176 |       http:
177 |         latency:
178 |           # Valid values are 99, 95, 90, 75, 50.
179 |           percentile: 99
180 |           responseTime: 100ms
181 |         # Omit this for autoscaling (ie. latency objective valid for all throughputs).
182 |         # This is throughput of queries per minute.
183 |         throughput: 500
184 | ```
185 | 
186 | **2) Label the deployment with that SLO and add Envoy:**
187 | 
188 | #### Selecting SLO
189 | 
190 | Choose the Deployment(s) you'd like to be operated by that Service Level Objective and Label them accordingly.
191 | 
192 | ```
193 | apiVersion: extensions/v1beta1
194 | kind: Deployment
195 | metadata:
196 |   name: todo-backend-django
197 |   labels:
198 |     app: todo-backend-django
199 |     # Add this as a label to your Deployment to match the selector you defined above.
200 |     slo: slo-webapps
201 | ...
202 |   template:
203 |     metadata:
204 |       labels:
205 |         app: todo-backend-django
206 |         # Add this as a template label to your Deployment to match the selector you defined above.
207 |         slo: slo-webapps
208 | ...
209 | ```
210 | 
211 | #### Adding Sidecar
212 | 
213 | Valence collects application metrics through a sidecar, [envoy](https://www.envoyproxy.io/). If you’d prefer to collect metrics based on your ingress, load-balancer, custom envoy containers, linkerd, istio or otherwise, let the Valence team know, we are currently working on custom app metrics. This will eventually be automated, all feedback is appreciated!
214 | 
215 | Add the envoy proxy container to your deployment and set the target address to where your application is normally serving.
216 | 
217 | Example: [todo-backend-django/deployment.yaml](./example/workloads/todo-backend-django-valence/deployment.yaml)
218 | 
219 | ```
220 | apiVersion: extensions/v1beta1
221 | kind: Deployment
222 | metadata:
223 |   name: todo-backend-django
224 |   labels:
225 |     app: todo-backend-django
226 |     slo: slo-webapps
227 | ...
228 |   template:
229 |     metadata:
230 |       labels:
231 |         app: todo-backend-django
232 |         slo: slo-webapps
233 | ...
234 |     spec:
235 |       containers:
236 |         - name: envoy
237 |           image: valencenet/envoyproxy:latest
238 |           imagePullPolicy: IfNotPresent
239 |           env:
240 |           - name: SERVICE_PORT_VALUE
241 |             value: "8000" # this should be the port your app is serving on.
242 |           # if you are using HTTP2 with something like grpc then you should include the following:
243 |           # - name: PROTOCAL
244 |           # value: http2
245 |           ports:
246 |             - containerPort: 8081
247 |               name: envoy-sidecar
248 |             - containerPort: 8181
249 |               name: envoy-metrics
250 | ...
251 | ```
252 | 
253 | **Note: Valence will make relatively frequent changes so we recommend you ensure at least the following availability configuration for your deployments:**
254 | 
255 | ```
256 | spec:
257 |   # Revision history limit should be low but # greater than 1.
258 |   revisionHistoryLimit: 3
259 |   strategy:
260 |     # Ensure we use rolling updates with:
261 |     rollingUpdate:
262 |       maxSurge: 2
263 |       maxUnavailable: 10%
264 | ```
265 | 
266 | It is also helpful if you are using readiness and liveness probes to ensure availablity.
267 | 
268 | **3) Label your Kubernetes Service for that Deployment with the envoy proxy collection and replace your existing service with a Valence compatible service.**
269 | 
270 | Example [todo-backend-django/service.yaml](./example/workloads/todo-backend-django-valence/service.yaml)
271 | Change:
272 | 
273 | ```
274 | apiVersion: v1
275 | kind: Service
276 | metadata:
277 |   labels:
278 |     service: todo-backend-django
279 |   name: todo-backend-django
280 | spec:
281 |   # Works with any service type, NodePort just an example.
282 |   type: NodePort
283 |   ports:
284 |   - name: headless # example port name
285 |     port: 80
286 |     targetPort: 8080
287 |   selector:
288 |     app: todo-backend-django
289 | ```
290 | 
291 | To:
292 | 
293 | ```
294 | apiVersion: v1
295 | kind: Service
296 | metadata:
297 |   name: todo-backend-django
298 |   labels:
299 |     service: todo-backend-django
300 |     # Scrape prometheus metrics by valence.
301 |     valence.net/prometheus: "true"
302 | spec:
303 |   type: NodePort
304 |   ports:
305 |   # This would be your port you were exposing your application on.
306 |   - name: headless # this name is arbitrary and can be changed to anything you want.
307 |     port: 80
308 |     targetPort: 8081 # this is the port envoy is serving on
309 |   # These three lines allow us to scrape application metrics.
310 |   - name: prometheus
311 |     port: 8181
312 |     targetPort: 8181
313 |   selector:
314 |     app: todo-backend-django
315 | ```
316 | 
317 | ## Using Valence
318 | 
319 | Using Valence:
320 | 
321 | 1. [Using Valence Annotations](#using-valence-annotations)
322 | 2. [Viewing Valence Recommendations and Changes](#viewing-valence-recommendations-and-changes)
323 | 
324 | ### Setting SLOs
325 | 
326 | Setting a SLO is done via writing the manifest, applying it, and registering a deployment using the label defined in the slo selector.
327 | 
328 | Example:
329 | 
330 | ```
331 | apiVersion: optimizer.valence.io/v1alpha1
332 | kind: ServiceLevelObjective
333 | metadata:
334 |   name: slo-webapps
335 | spec:
336 |   selector:
337 |    # The label you want to select on deployments.
338 |     slo: slo-webapps
339 |   objectives:
340 |     - type: HTTP
341 |       http:
342 |         latency:
343 |           # Percentile you'd like your response times to fall under.
344 |           # Valid values are 99, 95, 90, 75, 50.
345 |           percentile: 99
346 |           # Response time you want your application to meet.
347 |           responseTime: 100ms
348 |         # The throughput objective you want the latency objective to be valid for.
349 |         # Omit this for throughput scaling (ie. latency objective valid for all throughputs).
350 |         # This is throughput of queries per minute.
351 |         throughput: 500
352 | ```
353 | 
354 | ## Using Valence Annotations
355 | 
356 | You can use these optional [annotations](https://github.com/valencenet/valence-manifests/blob/master/example/workloads/todo-backend-django-valence/deployment.yaml#L8) on the deployments managed by Valence:
357 | 
358 | ```
359 |   annotations:
360 |     # Whether to make changes automatically with recommendations.
361 |     valence.io/optimizer.configure: "true"
362 |     # Minimum amount of replicas to recommend.
363 |     valence.io/optimizer.min-replicas: "2"
364 |     # Minimum cpu requests to recommend.
365 |     valence.io/optimizer.min-cpu-requests: "100m"
366 |     # Minimum memory requests to recommend.
367 |     # For example: set this to your max heap size if you are using JVM.
368 |     valence.io/optimizer.min-memory-requests: "500M"
369 | ```
370 | 
371 | ## Viewing Valence Recommendations and Changes
372 | 
373 | ### Recommendations
374 | 
375 | #### Prometheus
376 | 
377 | The recommendations are available in [prometheus exposition format](https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format). Valence exposes its metrics on `/metrics` endpoint on port 8080 of the `optimization-operator.valence-system` service and can be scraped by prometheus and other similar tools for metrics collection in a standard way. The metrics can be accessed like:
378 | 
379 | ```
380 | kubectl port-forward svc/optimization-operator -n valence-system 8080 &
381 | open http://localhost:8080/metrics
382 | ```
383 | 
384 | We expose the following metrics:
385 | 
386 | - valence_recommendations_cpu_limits
387 | - valence_recommendations_cpu_requests
388 | - valence_recommendations_memory_limits
389 | - valence_recommendations_memory_requests
390 | - valence_recommendations_replicas
391 | 
392 | For a example of how we scrape these recommendations for our own local prometheus see [config-map](manifests/valence/prometheus/config-map.yaml#L255) . Here we scrape on the following label: `app.kubernetes.io/component: operator`
393 | 
394 | #### StatsD / Datadog
395 | 
396 | The recommendations are available to statsd (or any statsd compatable system) through adding a statsd url to the valence optimization-operator deployment.
397 | 
398 | Add this
399 | 
400 | ```
401 | - name: STATSD_URL
402 |   value: datadog.default:8125 # replace with your statsd url and port
403 | ```
404 | 
405 | [here](manifests/valence/operator/deployment.yaml#L17)
406 | 
407 | ### Grafana
408 | 
409 | Open Grafana
410 | 
411 | ```
412 | kubectl proxy svc/grafana -n valence-system
413 | open http://localhost:8001/api/v1/namespaces/valence-system/services/grafana/proxy
414 | ```
415 | 
416 | Authentication is Grafana Default:
417 | 
418 | - username: admin
419 | - password: admin
420 | 
421 | Once you are in Grafana look at the Valence Recommendations dashboard.
422 | You will see:
423 | 
424 | - Memory recommendations and resources
425 | - CPU recommendations and resources
426 | - HTTP Request Count in Queries per Second
427 | - HTTP Latency at selected percentile
428 | - Replica recommendations and current replicas
429 | 
430 | ## Example Workloads
431 | 
432 | If you want to test out valence on example workloads we have provided examples manifests that you can use. We generate synthetic workloads using our realistic workload generation tool Majin (see the workload.yaml files). See the `example/workloads` dir for more details. There are also additional gRPC workloads in `example/workloads/grpc`.
433 | 
434 | The workloads for testing are:
435 | 
436 | - todo-backend-django (this is a control workload not using valence)
437 | - todo-backend-django-valence
438 | - grpc (fortune-telling-app)
439 | 
440 | They will use the following SLO manifests:
441 | 
442 | - slo-webapps
443 | - slo-grpc
444 | 
445 | Want to get started quickly with example workloads?
446 | 
447 | - start on a fresh cluster such as docker-for-desktop or a testing instance of GKE
448 | - Clone the Valence repo: `git clone https://github.com/valencenet/valence-manifests`
449 | - _if your cluster already has metrics-server (GKE does by default)_ run `make tooling-no-ms`
450 | - Apply the Tooling (Metrics server (if don't have) and Kube-state-metrics): `kubectl apply -f tooling.yaml`
451 | - Apply the Valence system: `kubectl apply -f valence.yaml`
452 | - Apply the Example workloads and tooling: `kubectl apply -f example-workloads.yaml`
453 | - View results!
454 | - - `kubectl proxy svc/grafana -n valence-system &`
455 | - - `open http://localhost:8001/api/v1/namespaces/valence-system/services/grafana/proxy`
456 |     Authentication is Grafana Default: username: admin, password: admin
457 |     Recommendations for Replicas, Requests and Limits, and live changes to those should start coming in 5-20 minutes.
458 | 


--------------------------------------------------------------------------------
/example-workloads.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: v1
  2 | kind: Service
  3 | metadata:
  4 |   labels:
  5 |     app: todo-backend-django-valence
  6 |     valence.net/prometheus: "true"
  7 |   name: todo-backend-django-valence
  8 | spec:
  9 |   ports:
 10 |   - name: headless
 11 |     port: 80
 12 |     targetPort: 8081
 13 |   - name: prometheus
 14 |     port: 8181
 15 |     targetPort: 8181
 16 |   selector:
 17 |     app: todo-backend-django-valence
 18 |   type: NodePort
 19 | ---
 20 | apiVersion: v1
 21 | kind: Service
 22 | metadata:
 23 |   labels:
 24 |     app: todo-backend-django
 25 |     valence.net/prometheus: "true"
 26 |   name: todo-backend-django
 27 | spec:
 28 |   ports:
 29 |   - name: headless
 30 |     port: 80
 31 |     targetPort: 8081
 32 |   - name: prometheus
 33 |     port: 8181
 34 |     targetPort: 8181
 35 |   selector:
 36 |     app: todo-backend-django
 37 |   type: NodePort
 38 | ---
 39 | apiVersion: extensions/v1beta1
 40 | kind: Deployment
 41 | metadata:
 42 |   annotations:
 43 |     valence.io/optimizer.configure: "true"
 44 |   labels:
 45 |     app: todo-backend-django-valence
 46 |     slo: slo-webapps
 47 |   name: todo-backend-django-valence
 48 | spec:
 49 |   replicas: 2
 50 |   revisionHistoryLimit: 3
 51 |   selector:
 52 |     matchLabels:
 53 |       app: todo-backend-django-valence
 54 |   strategy:
 55 |     rollingUpdate:
 56 |       maxSurge: 2
 57 |       maxUnavailable: 10%
 58 |   template:
 59 |     metadata:
 60 |       labels:
 61 |         app: todo-backend-django-valence
 62 |     spec:
 63 |       containers:
 64 |       - env:
 65 |         - name: SERVICE_PORT_VALUE
 66 |           value: "8000"
 67 |         image: valencenet/envoyproxy:0.3.0
 68 |         imagePullPolicy: IfNotPresent
 69 |         name: envoy
 70 |         ports:
 71 |         - containerPort: 8081
 72 |           name: envoy-sidecar
 73 |         - containerPort: 8181
 74 |           name: envoy-metrics
 75 |         resources:
 76 |           limits:
 77 |             cpu: 500m
 78 |             memory: 500M
 79 |           requests:
 80 |             cpu: 250m
 81 |             memory: 250M
 82 |       - env:
 83 |         - name: PORT
 84 |           value: "8000"
 85 |         image: manifoldco/todo-backend-django:latest
 86 |         imagePullPolicy: IfNotPresent
 87 |         name: todo-backend-django-valence
 88 |         ports:
 89 |         - containerPort: 8000
 90 |           name: http
 91 |           protocol: TCP
 92 |         readinessProbe:
 93 |           failureThreshold: 30
 94 |           httpGet:
 95 |             path: /todos
 96 |             port: 8000
 97 |           initialDelaySeconds: 5
 98 |           periodSeconds: 60
 99 |           timeoutSeconds: 30
100 |         resources:
101 |           limits:
102 |             cpu: 500m
103 |             memory: 500M
104 |           requests:
105 |             cpu: 250m
106 |             memory: 250M
107 |       restartPolicy: Always
108 | ---
109 | apiVersion: extensions/v1beta1
110 | kind: Deployment
111 | metadata:
112 |   labels:
113 |     app: todo-backend-django
114 |   name: todo-backend-django
115 | spec:
116 |   replicas: 2
117 |   revisionHistoryLimit: 3
118 |   selector:
119 |     matchLabels:
120 |       app: todo-backend-django
121 |   strategy:
122 |     rollingUpdate:
123 |       maxSurge: 2
124 |       maxUnavailable: 10%
125 |   template:
126 |     metadata:
127 |       labels:
128 |         app: todo-backend-django
129 |     spec:
130 |       containers:
131 |       - env:
132 |         - name: SERVICE_PORT_VALUE
133 |           value: "8000"
134 |         image: valencenet/envoyproxy:0.3.0
135 |         imagePullPolicy: IfNotPresent
136 |         name: envoy
137 |         ports:
138 |         - containerPort: 8081
139 |           name: envoy-sidecar
140 |         - containerPort: 8181
141 |           name: envoy-metrics
142 |         resources:
143 |           limits:
144 |             cpu: 500m
145 |             memory: 500M
146 |           requests:
147 |             cpu: 250m
148 |             memory: 250M
149 |       - env:
150 |         - name: PORT
151 |           value: "8000"
152 |         image: manifoldco/todo-backend-django:latest
153 |         imagePullPolicy: IfNotPresent
154 |         name: todo-backend-django
155 |         ports:
156 |         - containerPort: 8000
157 |           name: http
158 |           protocol: TCP
159 |         readinessProbe:
160 |           failureThreshold: 30
161 |           httpGet:
162 |             path: /todos
163 |             port: 8000
164 |           initialDelaySeconds: 5
165 |           periodSeconds: 60
166 |           timeoutSeconds: 30
167 |         resources:
168 |           limits:
169 |             cpu: 500m
170 |             memory: 500M
171 |           requests:
172 |             cpu: 250m
173 |             memory: 250M
174 |       restartPolicy: Always
175 | ---
176 | apiVersion: batch/v1
177 | kind: Job
178 | metadata:
179 |   labels:
180 |     app: workload-simulation
181 |   name: majin-todo-backend-django-valence
182 | spec:
183 |   template:
184 |     metadata:
185 |       labels:
186 |         app: workload-simulation
187 |     spec:
188 |       containers:
189 |       - args:
190 |         - attack
191 |         - --base-load
192 |         - "300"
193 |         - --period
194 |         - "600"
195 |         env:
196 |         - name: TARGET
197 |           value: http://todo-backend-django-valence.default/todos
198 |         image: valencenet/majin:0.3.2
199 |         name: majin
200 |       restartPolicy: OnFailure
201 | ---
202 | apiVersion: batch/v1
203 | kind: Job
204 | metadata:
205 |   labels:
206 |     app: workload-simulation
207 |   name: majin-todo-backend-django
208 | spec:
209 |   template:
210 |     metadata:
211 |       labels:
212 |         app: workload-simulation
213 |     spec:
214 |       containers:
215 |       - args:
216 |         - attack
217 |         - --base-load
218 |         - "300"
219 |         - --period
220 |         - "600"
221 |         env:
222 |         - name: TARGET
223 |           value: http://todo-backend-django.default/todos
224 |         image: valencenet/majin:0.3.2
225 |         name: majin
226 |       restartPolicy: OnFailure
227 | ---
228 | apiVersion: optimizer.valence.io/v1alpha1
229 | kind: ServiceLevelObjective
230 | metadata:
231 |   name: slo-webapps
232 | spec:
233 |   objectives:
234 |   - http:
235 |       latency:
236 |         percentile: 95
237 |         responseTime: 500ms
238 |     type: HTTP
239 |   selector:
240 |     slo: slo-webapps
241 | 


--------------------------------------------------------------------------------
/example/kustomization.yaml:
--------------------------------------------------------------------------------
1 | bases:
2 |   - ./workloads
3 | 


--------------------------------------------------------------------------------
/example/workloads/grpc/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: extensions/v1beta1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: fortune-teller-app
 5 |   labels:
 6 |     k8s-app: fortune-teller-app
 7 |     slo: slo-grpc
 8 |   annotations:
 9 |     valence.io/optimizer.configure: "true"
10 |   namespace: default
11 | spec:
12 |   replicas: 1
13 |   template:
14 |     metadata:
15 |       labels:
16 |         k8s-app: fortune-teller-app
17 |         slo: slo-grpc
18 |     spec:
19 |       containers:
20 |       - name: envoy
21 |         image: valencenet/envoyproxy:0.3.2
22 |         imagePullPolicy: IfNotPresent
23 |         env:
24 |         - name: SERVICE_PORT_VALUE
25 |           value: "50051"
26 |         - name: PROTOCAL
27 |           value: http2
28 |         ports:
29 |           - containerPort: 8081
30 |             name: envoy-sidecar
31 |           - containerPort: 8181
32 |             name: envoy-metrics
33 |       - name: fortune-teller-app
34 |         image: quay.io/kubernetes-ingress-controller/grpc-fortune-teller:0.1
35 |         ports:
36 |         - containerPort: 50051
37 |           name: grpc


--------------------------------------------------------------------------------
/example/workloads/grpc/grpc-load.sh:
--------------------------------------------------------------------------------
1 | # basic bash script for load testing the grpc service.
2 | kubectl port-forward svc/fortune-teller-app 8080:80 &
3 | for i in {1..1200}; do grpcurl -v -plaintext localhost:8080 build.stack.fortune.FortuneTeller/Predict; done 
4 | 


--------------------------------------------------------------------------------
/example/workloads/grpc/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: fortune-teller-app
 5 |   namespace: default
 6 |   labels:
 7 |     valence.net/prometheus: "true"
 8 | spec:
 9 |   selector:
10 |     k8s-app: fortune-teller-app
11 |   ports:
12 |   - name: http2
13 |     port: 80
14 |     targetPort: 8081
15 |   - name: prometheus
16 |     port: 8181
17 |     targetPort: 8181


--------------------------------------------------------------------------------
/example/workloads/grpc/slo-grpc.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: optimizer.valence.io/v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   name: slo-grpc
 5 | spec:
 6 |   selector:
 7 |     slo: slo-grpc
 8 |   objectives:
 9 |     - type: HTTP
10 |       http:
11 |         latency:
12 |           percentile: 95
13 |           responseTime: 100ms
14 | 


--------------------------------------------------------------------------------
/example/workloads/kustomization.yaml:
--------------------------------------------------------------------------------
1 | bases:
2 |   - ./todo-backend-django
3 |   - ./todo-backend-django-valence
4 |   - ./load-simulations
5 | resources:
6 |   - ./slo-webapps.yaml
7 | 


--------------------------------------------------------------------------------
/example/workloads/load-simulations/kustomization.yaml:
--------------------------------------------------------------------------------
1 | commonLabels:
2 |   app: workload-simulation
3 | resources:
4 |   - todo-backend-django-valence.yaml
5 |   - todo-backend-django.yaml
6 | 


--------------------------------------------------------------------------------
/example/workloads/load-simulations/todo-backend-django-valence.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: majin-todo-backend-django-valence
 5 | spec:
 6 |   template:
 7 |     spec:
 8 |       containers:
 9 |         - name: majin
10 |           image: valencenet/majin:0.3.2
11 |           args:
12 |             - attack
13 |             - --base-load
14 |             - "300"
15 |             - --period
16 |             - "600"
17 |           env:
18 |             - name: TARGET
19 |               value: http://todo-backend-django-valence.default/todos
20 |       restartPolicy: OnFailure
21 | 


--------------------------------------------------------------------------------
/example/workloads/load-simulations/todo-backend-django.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: batch/v1
 2 | kind: Job
 3 | metadata:
 4 |   name: majin-todo-backend-django
 5 | spec:
 6 |   template:
 7 |     spec:
 8 |       containers:
 9 |         - name: majin
10 |           image: valencenet/majin:0.3.2
11 |           args:
12 |             - attack
13 |             - --base-load
14 |             - "300"
15 |             - --period
16 |             - "600"
17 |           env:
18 |             - name: TARGET
19 |               value: http://todo-backend-django.default/todos
20 |       restartPolicy: OnFailure
21 | 


--------------------------------------------------------------------------------
/example/workloads/slo-webapps.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: optimizer.valence.io/v1alpha1
 2 | kind: ServiceLevelObjective
 3 | metadata:
 4 |   name: slo-webapps
 5 | spec:
 6 |   selector:
 7 |     slo: slo-webapps
 8 |   objectives:
 9 |     - type: HTTP
10 |       http:
11 |         latency:
12 |           percentile: 95
13 |           responseTime: 500ms
14 | 


--------------------------------------------------------------------------------
/example/workloads/todo-backend-django-valence/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: extensions/v1beta1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: todo-backend-django-valence
 5 |   labels:
 6 |     app: todo-backend-django-valence
 7 |     slo: slo-webapps
 8 |   annotations:
 9 |     valence.io/optimizer.configure: "true"
10 | spec:
11 |   revisionHistoryLimit: 3
12 |   replicas: 2
13 |   strategy:
14 |     rollingUpdate:
15 |       maxSurge: 2
16 |       maxUnavailable: 10%
17 |   template:
18 |     spec:
19 |       restartPolicy: Always
20 |       containers:
21 |         - name: envoy
22 |           image: valencenet/envoyproxy:0.3.2
23 |           imagePullPolicy: IfNotPresent
24 |           env:
25 |           - name: SERVICE_PORT_VALUE
26 |             value: "8000"
27 |           ports:
28 |             - containerPort: 8081
29 |               name: envoy-sidecar
30 |             - containerPort: 8181
31 |               name: envoy-metrics
32 |         - image: manifoldco/todo-backend-django:latest
33 |           imagePullPolicy: IfNotPresent
34 |           name: todo-backend-django-valence
35 |           resources:
36 |             limits:
37 |               cpu: 500m
38 |               memory: 500M
39 |             requests:
40 |               cpu: 250m
41 |               memory: 250M
42 |           env:
43 |             - name: PORT
44 |               value: "8000"
45 |           ports:
46 |             - containerPort: 8000
47 |               name: http
48 |               protocol: TCP
49 |           readinessProbe:
50 |             httpGet:
51 |               path: /todos
52 |               port: 8000
53 |             initialDelaySeconds: 5
54 |             periodSeconds: 60
55 |             timeoutSeconds: 30
56 |             failureThreshold: 30
57 | 


--------------------------------------------------------------------------------
/example/workloads/todo-backend-django-valence/kustomization.yaml:
--------------------------------------------------------------------------------
1 | commonLabels:
2 |   app: todo-backend-django-valence
3 | resources:
4 |   - deployment.yaml
5 |   - service.yaml
6 | 


--------------------------------------------------------------------------------
/example/workloads/todo-backend-django-valence/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: todo-backend-django-valence
 5 |   labels:
 6 |     valence.net/prometheus: "true"
 7 | spec:
 8 |   type: NodePort
 9 |   ports:
10 |   - name: headless
11 |     port: 80
12 |     targetPort: 8081
13 |   - name: prometheus
14 |     port: 8181
15 |     targetPort: 8181
16 |   selector:
17 |     app: todo-backend-django-valence


--------------------------------------------------------------------------------
/example/workloads/todo-backend-django/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: extensions/v1beta1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: todo-backend-django
 5 |   labels:
 6 |     app: todo-backend-django
 7 | spec:
 8 |   revisionHistoryLimit: 3
 9 |   replicas: 2
10 |   strategy:
11 |     rollingUpdate:
12 |       maxSurge: 2
13 |       maxUnavailable: 10%
14 |   template:
15 |     metadata:
16 |       labels:
17 |         app: todo-backend-django
18 |     spec:
19 |       restartPolicy: Always
20 |       containers:
21 |         - name: envoy
22 |           image: valencenet/envoyproxy:0.3.2
23 |           imagePullPolicy: IfNotPresent
24 |           env:
25 |           - name: SERVICE_PORT_VALUE
26 |             value: "8000"
27 |           ports:
28 |             - containerPort: 8081
29 |               name: envoy-sidecar
30 |             - containerPort: 8181
31 |               name: envoy-metrics
32 |         - image: manifoldco/todo-backend-django:latest
33 |           imagePullPolicy: IfNotPresent
34 |           name: todo-backend-django
35 |           resources:
36 |             limits:
37 |               cpu: 500m
38 |               memory: 500M
39 |             requests:
40 |               cpu: 250m
41 |               memory: 250M
42 |           env:
43 |             - name: PORT
44 |               value: "8000"
45 |           ports:
46 |             - containerPort: 8000
47 |               name: http
48 |               protocol: TCP
49 |           readinessProbe:
50 |             httpGet:
51 |               path: /todos
52 |               port: 8000
53 |             initialDelaySeconds: 5
54 |             periodSeconds: 60
55 |             timeoutSeconds: 30
56 |             failureThreshold: 30
57 | 


--------------------------------------------------------------------------------
/example/workloads/todo-backend-django/kustomization.yaml:
--------------------------------------------------------------------------------
1 | commonLabels:
2 |   app: todo-backend-django
3 | resources:
4 |   - deployment.yaml
5 |   - service.yaml
6 | 


--------------------------------------------------------------------------------
/example/workloads/todo-backend-django/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: todo-backend-django
 5 |   labels:
 6 |     valence.net/prometheus: "true"
 7 | spec:
 8 |   type: NodePort
 9 |   ports:
10 |   - name: headless
11 |     port: 80
12 |     targetPort: 8081
13 |   - name: prometheus
14 |     port: 8181
15 |     targetPort: 8181
16 |   selector:
17 |     app: todo-backend-django


--------------------------------------------------------------------------------
/how-it-works.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/valencenet/valence-manifests/77ecedc7ee0d7d57986c68157759d7b4fd5ebae9/how-it-works.jpg


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | LICENSE?="license key"
 2 | 
 3 | install-kustomize:
 4 | 	go get sigs.k8s.io/kustomize
 5 | 
 6 | build-valence: install-kustomize
 7 | 	rm valence.yaml
 8 | 	kustomize build ./manifests > valence.yaml
 9 | 
10 | valence: build-valence
11 | 	sed -i 's/"license key"/"$(LICENSE)"/g' ./valence.yaml
12 | 
13 | example-workloads: install-kustomize
14 | 	rm example-workloads.yaml
15 | 	kustomize build ./example > example-workloads.yaml
16 | 
17 | tooling: install-kustomize
18 | 	rm tooling.yaml
19 | 	cp ./tooling/kustomization.base.yaml ./tooling/kustomization.yaml
20 | 	kustomize build ./tooling > tooling.yaml
21 | 
22 | tooling-no-ms: install-kustomize
23 | 	rm tooling.yaml
24 | 	sed 's/- .\/metrics-server//g' ./tooling/kustomization.base.yaml > ./tooling/kustomization.yaml
25 | 	kustomize build ./tooling > tooling.yaml
26 | 	


--------------------------------------------------------------------------------
/manifests/kustomization.yaml:
--------------------------------------------------------------------------------
1 | bases:
2 |   - ./valence
3 | 


--------------------------------------------------------------------------------
/manifests/valence/grafana/configMap.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: grafana-datasources
 5 | data:
 6 |   prometheus.yaml: |
 7 |     apiVersion: 1
 8 |     datasources:
 9 |       - name: DS_PROM_VALENCE
10 |         type: prometheus
11 |         access: proxy
12 |         url: http://prometheus-valence.valence-system:9090
13 |         editable: false
14 |         version: 1
15 | 
16 | ---
17 | 
18 | apiVersion: v1
19 | kind: ConfigMap
20 | metadata:
21 |   name: grafana-providers
22 | data:
23 |   default.yaml: |
24 |     apiVersion: 1
25 |     providers:
26 |       - name: 'default'
27 |         org_id: 1
28 |         folder: ''
29 |         type: 'file'
30 |         options:
31 |           path: '/var/lib/grafana/dashboards'
32 | 


--------------------------------------------------------------------------------
/manifests/valence/grafana/dashboard-valence.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: v1
  2 | kind: ConfigMap
  3 | metadata:
  4 |   name: grafana-dashboards-valence
  5 | data:
  6 |   valence.json: |
  7 |     {
  8 |       "annotations": {
  9 |         "list": [
 10 |           {
 11 |             "builtIn": 1,
 12 |             "datasource": "-- Grafana --",
 13 |             "enable": true,
 14 |             "hide": true,
 15 |             "iconColor": "rgba(0, 211, 255, 1)",
 16 |             "name": "Annotations & Alerts",
 17 |             "type": "dashboard"
 18 |           }
 19 |         ]
 20 |       },
 21 |       "editable": true,
 22 |       "gnetId": null,
 23 |       "graphTooltip": 0,
 24 |       "id": 1,
 25 |       "iteration": 1559313049824,
 26 |       "links": [],
 27 |       "panels": [
 28 |         {
 29 |           "aliasColors": {},
 30 |           "bars": false,
 31 |           "dashLength": 10,
 32 |           "dashes": false,
 33 |           "datasource": "-- Mixed --",
 34 |           "description": "Recommendations of Memory requests and limits to set for $deployment",
 35 |           "fill": 1,
 36 |           "gridPos": {
 37 |             "h": 8,
 38 |             "w": 24,
 39 |             "x": 0,
 40 |             "y": 0
 41 |           },
 42 |           "id": 2,
 43 |           "legend": {
 44 |             "avg": false,
 45 |             "current": true,
 46 |             "max": false,
 47 |             "min": false,
 48 |             "show": true,
 49 |             "total": false,
 50 |             "values": true
 51 |           },
 52 |           "lines": true,
 53 |           "linewidth": 1,
 54 |           "links": [],
 55 |           "nullPointMode": "null",
 56 |           "percentage": false,
 57 |           "pointradius": 5,
 58 |           "points": false,
 59 |           "renderer": "flot",
 60 |           "seriesOverrides": [],
 61 |           "spaceLength": 10,
 62 |           "stack": false,
 63 |           "steppedLine": false,
 64 |           "targets": [
 65 |             {
 66 |               "datasource": "DS_PROM_VALENCE",
 67 |               "expr": "max(container_memory_working_set_bytes{container_name=\"$deployment\"})",
 68 |               "format": "time_series",
 69 |               "instant": false,
 70 |               "interval": "5s",
 71 |               "intervalFactor": 1,
 72 |               "legendFormat": "Observed Memory Value",
 73 |               "refId": "A"
 74 |             },
 75 |             {
 76 |               "datasource": "DS_PROM_VALENCE",
 77 |               "expr": "max(valence_recommendations_memory_requests{container_name=\"$deployment\"})",
 78 |               "format": "time_series",
 79 |               "hide": false,
 80 |               "instant": false,
 81 |               "interval": "60s",
 82 |               "intervalFactor": 1,
 83 |               "legendFormat": "Recommended Memory Request",
 84 |               "refId": "B"
 85 |             },
 86 |             {
 87 |               "datasource": "DS_PROM_VALENCE",
 88 |               "expr": "max(valence_recommendations_memory_limits{container_name=\"$deployment\"})",
 89 |               "format": "time_series",
 90 |               "hide": false,
 91 |               "interval": "60s",
 92 |               "intervalFactor": 1,
 93 |               "legendFormat": "Recommended Memory Limit",
 94 |               "refId": "C"
 95 |             },
 96 |             {
 97 |               "datasource": "DS_PROM_VALENCE",
 98 |               "expr": "max(kube_pod_container_resource_requests_memory_bytes{container=\"$deployment\"})",
 99 |               "format": "time_series",
100 |               "intervalFactor": 1,
101 |               "legendFormat": "Requests",
102 |               "refId": "D"
103 |             },
104 |             {
105 |               "datasource": "DS_PROM_VALENCE",
106 |               "expr": "max(kube_pod_container_resource_limits_memory_bytes{container=\"$deployment\"})",
107 |               "format": "time_series",
108 |               "intervalFactor": 1,
109 |               "legendFormat": "Limits",
110 |               "refId": "E"
111 |             }
112 |           ],
113 |           "thresholds": [],
114 |           "timeFrom": null,
115 |           "timeShift": null,
116 |           "title": "Memory recommendations: $deployment",
117 |           "tooltip": {
118 |             "shared": true,
119 |             "sort": 0,
120 |             "value_type": "individual"
121 |           },
122 |           "transparent": false,
123 |           "type": "graph",
124 |           "xaxis": {
125 |             "buckets": null,
126 |             "mode": "time",
127 |             "name": null,
128 |             "show": true,
129 |             "values": []
130 |           },
131 |           "yaxes": [
132 |             {
133 |               "format": "bytes",
134 |               "label": "Memory",
135 |               "logBase": 1,
136 |               "max": null,
137 |               "min": null,
138 |               "show": true
139 |             },
140 |             {
141 |               "format": "short",
142 |               "label": null,
143 |               "logBase": 1,
144 |               "max": null,
145 |               "min": null,
146 |               "show": true
147 |             }
148 |           ],
149 |           "yaxis": {
150 |             "align": false,
151 |             "alignLevel": null
152 |           }
153 |         },
154 |         {
155 |           "aliasColors": {},
156 |           "bars": false,
157 |           "dashLength": 10,
158 |           "dashes": false,
159 |           "datasource": "-- Mixed --",
160 |           "description": "Recommendations of CPU requests and limits to set for $deployment",
161 |           "fill": 1,
162 |           "gridPos": {
163 |             "h": 8,
164 |             "w": 24,
165 |             "x": 0,
166 |             "y": 8
167 |           },
168 |           "id": 4,
169 |           "legend": {
170 |             "avg": false,
171 |             "current": true,
172 |             "max": false,
173 |             "min": false,
174 |             "show": true,
175 |             "total": false,
176 |             "values": true
177 |           },
178 |           "lines": true,
179 |           "linewidth": 1,
180 |           "links": [],
181 |           "nullPointMode": "null",
182 |           "percentage": false,
183 |           "pointradius": 5,
184 |           "points": false,
185 |           "renderer": "flot",
186 |           "seriesOverrides": [],
187 |           "spaceLength": 10,
188 |           "stack": false,
189 |           "steppedLine": false,
190 |           "targets": [
191 |             {
192 |               "datasource": "DS_PROM_VALENCE",
193 |               "expr": "avg(rate(container_cpu_usage_seconds_total{container_name=\"$deployment\"}[2m]))",
194 |               "format": "time_series",
195 |               "instant": false,
196 |               "interval": "5s",
197 |               "intervalFactor": 1,
198 |               "legendFormat": "Observed CPU Value",
199 |               "refId": "A"
200 |             },
201 |             {
202 |               "datasource": "DS_PROM_VALENCE",
203 |               "expr": "max(valence_recommendations_cpu_requests{container_name=\"$deployment\"} / 1000)",
204 |               "format": "time_series",
205 |               "hide": false,
206 |               "instant": false,
207 |               "interval": "60s",
208 |               "intervalFactor": 1,
209 |               "legendFormat": "Recommended CPU Request",
210 |               "refId": "B"
211 |             },
212 |             {
213 |               "datasource": "DS_PROM_VALENCE",
214 |               "expr": "max(valence_recommendations_cpu_limits{container_name=\"$deployment\"} / 1000)",
215 |               "format": "time_series",
216 |               "hide": false,
217 |               "interval": "60s",
218 |               "intervalFactor": 1,
219 |               "legendFormat": "Recommended CPU Limit",
220 |               "refId": "C"
221 |             },
222 |             {
223 |               "datasource": "DS_PROM_VALENCE",
224 |               "expr": "max(kube_pod_container_resource_requests_cpu_cores{container=\"$deployment\"})",
225 |               "format": "time_series",
226 |               "intervalFactor": 1,
227 |               "legendFormat": "Requests",
228 |               "refId": "D"
229 |             },
230 |             {
231 |               "datasource": "DS_PROM_VALENCE",
232 |               "expr": "max(kube_pod_container_resource_limits_cpu_cores{container=\"$deployment\"})",
233 |               "format": "time_series",
234 |               "intervalFactor": 1,
235 |               "legendFormat": "Limits",
236 |               "refId": "E"
237 |             }
238 |           ],
239 |           "thresholds": [],
240 |           "timeFrom": null,
241 |           "timeShift": null,
242 |           "title": "Cpu recommendations: $deployment",
243 |           "tooltip": {
244 |             "shared": true,
245 |             "sort": 0,
246 |             "value_type": "individual"
247 |           },
248 |           "transparent": false,
249 |           "type": "graph",
250 |           "xaxis": {
251 |             "buckets": null,
252 |             "mode": "time",
253 |             "name": null,
254 |             "show": true,
255 |             "values": []
256 |           },
257 |           "yaxes": [
258 |             {
259 |               "format": "short",
260 |               "label": "CPU",
261 |               "logBase": 1,
262 |               "max": null,
263 |               "min": null,
264 |               "show": true
265 |             },
266 |             {
267 |               "format": "short",
268 |               "label": null,
269 |               "logBase": 1,
270 |               "max": null,
271 |               "min": null,
272 |               "show": true
273 |             }
274 |           ],
275 |           "yaxis": {
276 |             "align": false,
277 |             "alignLevel": null
278 |           }
279 |         },
280 |         {
281 |           "aliasColors": {},
282 |           "bars": false,
283 |           "dashLength": 10,
284 |           "dashes": false,
285 |           "datasource": "DS_PROM_VALENCE",
286 |           "fill": 1,
287 |           "gridPos": {
288 |             "h": 8,
289 |             "w": 24,
290 |             "x": 0,
291 |             "y": 16
292 |           },
293 |           "id": 6,
294 |           "legend": {
295 |             "avg": false,
296 |             "current": true,
297 |             "max": false,
298 |             "min": false,
299 |             "show": true,
300 |             "total": false,
301 |             "values": true
302 |           },
303 |           "lines": true,
304 |           "linewidth": 1,
305 |           "links": [],
306 |           "nullPointMode": "null",
307 |           "percentage": false,
308 |           "pointradius": 5,
309 |           "points": false,
310 |           "renderer": "flot",
311 |           "seriesOverrides": [],
312 |           "spaceLength": 10,
313 |           "stack": false,
314 |           "steppedLine": false,
315 |           "targets": [
316 |             {
317 |               "expr": "sum(rate(envoy_http_downstream_rq_total{service=\"$deployment\"}[1m]))",
318 |               "format": "time_series",
319 |               "instant": false,
320 |               "interval": "",
321 |               "intervalFactor": 1,
322 |               "legendFormat": "HTTP Queries Per Second",
323 |               "refId": "A"
324 |             },
325 |             {
326 |               "expr": "",
327 |               "format": "time_series",
328 |               "intervalFactor": 1,
329 |               "refId": "B"
330 |             }
331 |           ],
332 |           "thresholds": [],
333 |           "timeFrom": null,
334 |           "timeShift": null,
335 |           "title": "HTTP Request Count: $deployment",
336 |           "tooltip": {
337 |             "shared": true,
338 |             "sort": 0,
339 |             "value_type": "individual"
340 |           },
341 |           "type": "graph",
342 |           "xaxis": {
343 |             "buckets": null,
344 |             "mode": "time",
345 |             "name": null,
346 |             "show": true,
347 |             "values": []
348 |           },
349 |           "yaxes": [
350 |             {
351 |               "format": "short",
352 |               "label": "QpS",
353 |               "logBase": 1,
354 |               "max": null,
355 |               "min": null,
356 |               "show": true
357 |             },
358 |             {
359 |               "format": "short",
360 |               "label": null,
361 |               "logBase": 1,
362 |               "max": null,
363 |               "min": null,
364 |               "show": true
365 |             }
366 |           ],
367 |           "yaxis": {
368 |             "align": false,
369 |             "alignLevel": null
370 |           }
371 |         },
372 |         {
373 |           "aliasColors": {},
374 |           "bars": false,
375 |           "dashLength": 10,
376 |           "dashes": false,
377 |           "datasource": "DS_PROM_VALENCE",
378 |           "fill": 1,
379 |           "gridPos": {
380 |             "h": 8,
381 |             "w": 24,
382 |             "x": 0,
383 |             "y": 24
384 |           },
385 |           "id": 7,
386 |           "legend": {
387 |             "avg": false,
388 |             "current": true,
389 |             "max": false,
390 |             "min": false,
391 |             "show": true,
392 |             "total": false,
393 |             "values": true
394 |           },
395 |           "lines": true,
396 |           "linewidth": 1,
397 |           "links": [],
398 |           "nullPointMode": "null",
399 |           "percentage": false,
400 |           "pointradius": 5,
401 |           "points": false,
402 |           "renderer": "flot",
403 |           "seriesOverrides": [],
404 |           "spaceLength": 10,
405 |           "stack": false,
406 |           "steppedLine": false,
407 |           "targets": [
408 |             {
409 |               "expr": "avg(histogram_quantile($LatencyPercentile, sum(rate(envoy_http_downstream_rq_time_bucket{service=\"$deployment\"}[1m])) by(le, pod)))",
410 |               "format": "time_series",
411 |               "instant": false,
412 |               "interval": "",
413 |               "intervalFactor": 1,
414 |               "legendFormat": "HTTP Request Latency",
415 |               "refId": "A"
416 |             },
417 |             {
418 |               "expr": "valence_slo_http_latency{name=\"$deployment\"}",
419 |               "format": "time_series",
420 |               "intervalFactor": 1,
421 |               "refId": "B"
422 |             }
423 |           ],
424 |           "thresholds": [],
425 |           "timeFrom": null,
426 |           "timeShift": null,
427 |           "title": "HTTP Request Latency: $deployment",
428 |           "tooltip": {
429 |             "shared": true,
430 |             "sort": 0,
431 |             "value_type": "individual"
432 |           },
433 |           "type": "graph",
434 |           "xaxis": {
435 |             "buckets": null,
436 |             "mode": "time",
437 |             "name": null,
438 |             "show": true,
439 |             "values": []
440 |           },
441 |           "yaxes": [
442 |             {
443 |               "format": "ms",
444 |               "label": "",
445 |               "logBase": 1,
446 |               "max": null,
447 |               "min": "0",
448 |               "show": true
449 |             },
450 |             {
451 |               "format": "short",
452 |               "label": null,
453 |               "logBase": 1,
454 |               "max": null,
455 |               "min": null,
456 |               "show": true
457 |             }
458 |           ],
459 |           "yaxis": {
460 |             "align": false,
461 |             "alignLevel": null
462 |           }
463 |         },
464 |         {
465 |           "aliasColors": {},
466 |           "bars": false,
467 |           "dashLength": 10,
468 |           "dashes": false,
469 |           "datasource": "-- Mixed --",
470 |           "description": "Number of replicas observed and recommended for: $deployment",
471 |           "fill": 1,
472 |           "gridPos": {
473 |             "h": 8,
474 |             "w": 24,
475 |             "x": 0,
476 |             "y": 32
477 |           },
478 |           "id": 9,
479 |           "legend": {
480 |             "avg": false,
481 |             "current": true,
482 |             "max": false,
483 |             "min": false,
484 |             "show": true,
485 |             "total": false,
486 |             "values": true
487 |           },
488 |           "lines": true,
489 |           "linewidth": 1,
490 |           "links": [],
491 |           "nullPointMode": "null",
492 |           "percentage": false,
493 |           "pointradius": 5,
494 |           "points": false,
495 |           "renderer": "flot",
496 |           "seriesOverrides": [],
497 |           "spaceLength": 10,
498 |           "stack": false,
499 |           "steppedLine": false,
500 |           "targets": [
501 |             {
502 |               "datasource": "DS_PROM_VALENCE",
503 |               "expr": "valence_recommendations_replicas{name=\"$deployment\"}",
504 |               "format": "time_series",
505 |               "hide": false,
506 |               "intervalFactor": 1,
507 |               "legendFormat": "Recommended Replicas",
508 |               "refId": "A"
509 |             },
510 |             {
511 |               "datasource": "DS_PROM_VALENCE",
512 |               "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment\"}",
513 |               "format": "time_series",
514 |               "intervalFactor": 1,
515 |               "legendFormat": "Current Replicas",
516 |               "refId": "B"
517 |             }
518 |           ],
519 |           "thresholds": [],
520 |           "timeFrom": null,
521 |           "timeShift": null,
522 |           "title": "Replicas: $deployment",
523 |           "tooltip": {
524 |             "shared": true,
525 |             "sort": 0,
526 |             "value_type": "individual"
527 |           },
528 |           "type": "graph",
529 |           "xaxis": {
530 |             "buckets": null,
531 |             "mode": "time",
532 |             "name": null,
533 |             "show": true,
534 |             "values": []
535 |           },
536 |           "yaxes": [
537 |             {
538 |               "decimals": null,
539 |               "format": "short",
540 |               "label": null,
541 |               "logBase": 1,
542 |               "max": null,
543 |               "min": null,
544 |               "show": true
545 |             },
546 |             {
547 |               "format": "short",
548 |               "label": null,
549 |               "logBase": 1,
550 |               "max": null,
551 |               "min": null,
552 |               "show": true
553 |             }
554 |           ],
555 |           "yaxis": {
556 |             "align": false,
557 |             "alignLevel": null
558 |           }
559 |         }
560 |       ],
561 |       "refresh": "5s",
562 |       "schemaVersion": 16,
563 |       "style": "dark",
564 |       "tags": [],
565 |       "templating": {
566 |         "list": [
567 |           {
568 |             "allValue": null,
569 |             "current": {
570 |               "selected": false,
571 |               "tags": [],
572 |               "text": "todo-backend-django-valence",
573 |               "value": "todo-backend-django-valence"
574 |             },
575 |             "datasource": "DS_PROM_VALENCE",
576 |             "hide": 0,
577 |             "includeAll": false,
578 |             "label": "Deployment",
579 |             "multi": false,
580 |             "name": "deployment",
581 |             "options": [],
582 |             "query": "label_values(envoy_http_downstream_rq_total, service)",
583 |             "refresh": 1,
584 |             "regex": "",
585 |             "sort": 0,
586 |             "tagValuesQuery": "",
587 |             "tags": [],
588 |             "tagsQuery": "",
589 |             "type": "query",
590 |             "useTags": false
591 |           },
592 |           {
593 |             "allValue": null,
594 |             "current": {
595 |               "selected": true,
596 |               "tags": [],
597 |               "text": "0.95",
598 |               "value": "0.95"
599 |             },
600 |             "hide": 0,
601 |             "includeAll": false,
602 |             "label": "Latency Percentile",
603 |             "multi": false,
604 |             "name": "LatencyPercentile",
605 |             "options": [
606 |               {
607 |                 "selected": false,
608 |                 "text": "0.5",
609 |                 "value": "0.5"
610 |               },
611 |               {
612 |                 "selected": false,
613 |                 "text": "0.75",
614 |                 "value": "0.75"
615 |               },
616 |               {
617 |                 "selected": false,
618 |                 "text": "0.9",
619 |                 "value": "0.9"
620 |               },
621 |               {
622 |                 "selected": true,
623 |                 "text": "0.95",
624 |                 "value": "0.95"
625 |               },
626 |               {
627 |                 "selected": false,
628 |                 "text": "0.99",
629 |                 "value": "0.99"
630 |               }
631 |             ],
632 |             "query": "0.5, 0.75, 0.9, 0.95, 0.99",
633 |             "type": "custom"
634 |           }
635 |         ]
636 |       },
637 |       "time": {
638 |         "from": "now-1h",
639 |         "to": "now"
640 |       },
641 |       "timepicker": {
642 |         "refresh_intervals": [
643 |           "1s",
644 |           "5s",
645 |           "30s",
646 |           "1m"
647 |         ],
648 |         "time_options": [
649 |           "5m",
650 |           "15m",
651 |           "1h",
652 |           "6h",
653 |           "12h",
654 |           "24h",
655 |           "2d",
656 |           "7d",
657 |           "30d"
658 |         ]
659 |       },
660 |       "timezone": "",
661 |       "title": "Valence",
662 |       "uid": "9ri9X0Qiz",
663 |       "version": 1
664 |     }


--------------------------------------------------------------------------------
/manifests/valence/grafana/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: extensions/v1beta1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: grafana
 5 | spec:
 6 |   replicas: 1
 7 |   strategy:
 8 |     rollingUpdate:
 9 |       maxSurge: 1
10 |       maxUnavailable: 0
11 |     type: RollingUpdate
12 |   template:
13 |     spec:
14 |       containers:
15 |       - image: grafana/grafana:0.0.0
16 |         name: grafana
17 |         ports:
18 |         - containerPort: 3000
19 |           protocol: TCP
20 |         env:
21 |           - name: GF_SERVER_ROOT_URL
22 |             value: /api/v1/namespaces/valence-system/services/grafana/proxy/
23 |         resources:
24 |           limits:
25 |             cpu: 500m
26 |             memory: 2500Mi
27 |           requests:
28 |             cpu: 100m
29 |             memory: 100Mi
30 |         volumeMounts:
31 |         - mountPath: /var/lib/grafana
32 |           name: data
33 |         - mountPath: /etc/grafana/provisioning/dashboards
34 |           name: providers
35 |         - mountPath: /etc/grafana/provisioning/datasources
36 |           name: datasources
37 |         - mountPath: /var/lib/grafana/dashboards/capacity-planning.json
38 |           name: dashboards-valence
39 |           subPath: valence.json
40 |       restartPolicy: Always
41 |       volumes:
42 |       - emptyDir: {}
43 |         name: data
44 |       - name: providers
45 |         configMap:
46 |           name: grafana-providers
47 |       - name: datasources
48 |         configMap:
49 |           name: grafana-datasources
50 |       - name: dashboards-valence
51 |         configMap:
52 |           name: grafana-dashboards-valence
53 | 


--------------------------------------------------------------------------------
/manifests/valence/grafana/kustomization.yaml:
--------------------------------------------------------------------------------
 1 | commonLabels:
 2 |   app.kubernetes.io/name: grafana
 3 | resources:
 4 |   - service.yaml
 5 |   - deployment.yaml
 6 |   - configMap.yaml
 7 |   - dashboard-valence.yaml
 8 | imageTags:
 9 |   - name: grafana/grafana
10 |     newTag: 5.2.4
11 | 


--------------------------------------------------------------------------------
/manifests/valence/grafana/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: grafana
 5 | spec:
 6 |   ports:
 7 |   - port: 3000
 8 |     protocol: TCP
 9 |     targetPort: 3000
10 |   type: NodePort
11 | 


--------------------------------------------------------------------------------
/manifests/valence/kustomization.yaml:
--------------------------------------------------------------------------------
1 | commonLabels:
2 |   app.kubernetes.io/part-of: valence
3 | namespace: valence-system
4 | bases:
5 |   - ./grafana
6 |   - ./prometheus
7 |   - ./operator
8 | 


--------------------------------------------------------------------------------
/manifests/valence/operator/crds.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apiextensions.k8s.io/v1beta1
 2 | kind: CustomResourceDefinition
 3 | metadata:
 4 |   name: servicelevelobjectives.optimizer.valence.io
 5 | spec:
 6 |   group: optimizer.valence.io
 7 |   version: v1alpha1
 8 |   scope: Namespaced
 9 |   names:
10 |     plural: servicelevelobjectives
11 |     kind: ServiceLevelObjective
12 | 


--------------------------------------------------------------------------------
/manifests/valence/operator/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: extensions/v1beta1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: optimization-operator
 5 | spec:
 6 |   replicas: 1
 7 |   template:
 8 |     spec:
 9 |       serviceAccountName: valence-operator
10 |       containers:
11 |         - name: optimization-operator
12 |           image: valencenet/valence:0.0.0
13 |           imagePullPolicy: Always
14 |           args:
15 |             - operator
16 |           env:
17 |             - name: DATA_OPT_OUT
18 |               value: "false"
19 |             - name: LICENSE_KEY
20 |               value: "license key"
21 |             - name: MIN_SAMPLE_SIZE
22 |               value: "20"
23 |             - name: PROMETHEUS_URL
24 |               value: http://prometheus-valence.valence-system.svc:9090
25 |           resources:
26 |             limits:
27 |               cpu: 500m
28 |               memory: 500M
29 |             requests:
30 |               cpu: 250m
31 |               memory: 250M
32 | 


--------------------------------------------------------------------------------
/manifests/valence/operator/kustomization.yaml:
--------------------------------------------------------------------------------
 1 | commonLabels:
 2 |   app.kubernetes.io/name: valence
 3 |   app.kubernetes.io/component: operator
 4 |   app.kubernetes.io/version: 0.3.2
 5 | resources:
 6 |   - crds.yaml
 7 |   - rbac.yaml
 8 |   - deployment.yaml
 9 |   - service.yaml
10 |   - namespace.yaml
11 | imageTags:
12 |   - name: valencenet/valence
13 |     newTag: 0.3.2
14 | 


--------------------------------------------------------------------------------
/manifests/valence/operator/namespace.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 |   name: valence-system
5 | 


--------------------------------------------------------------------------------
/manifests/valence/operator/rbac.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1beta1
 2 | kind: ClusterRole
 3 | metadata:
 4 |   name: valence:optimization-operator
 5 | rules:
 6 |   - apiGroups: ["extensions"]
 7 |     resources: ["deployments"]
 8 |     verbs: ["get", "list", "watch", "update", "patch"]
 9 |   - apiGroups: ["metrics.k8s.io"]
10 |     resources: ["pods"]
11 |     verbs: ["get", "list"]
12 |   - apiGroups: [""]
13 |     resources:
14 |     - pods
15 |     - events
16 |     - nodes
17 |     verbs: ["get", "list", "watch"]
18 |   - apiGroups: ["optimizer.valence.io"]
19 |     resources: ["servicelevelobjectives"]
20 |     verbs: ["get", "list", "watch", "update", "patch"]
21 |   - apiGroups: ["apiextensions.k8s.io"]
22 |     resources: ["customresourcedefinitions"]
23 |     verbs: ["*"]
24 | 
25 | ---
26 | 
27 | apiVersion: rbac.authorization.k8s.io/v1beta1
28 | kind: ClusterRoleBinding
29 | metadata:
30 |   name: valence:optimization-operator
31 | roleRef:
32 |   apiGroup: rbac.authorization.k8s.io
33 |   kind: ClusterRole
34 |   name: valence:optimization-operator
35 | subjects:
36 |   - name: valence-operator
37 |     namespace: valence-system
38 |     kind: ServiceAccount
39 | 
40 | ---
41 | 
42 | apiVersion: v1
43 | kind: ServiceAccount
44 | metadata:
45 |   name: valence-operator
46 |   namespace: valence-system
47 | 


--------------------------------------------------------------------------------
/manifests/valence/operator/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: optimization-operator
 5 | spec:
 6 |   type: NodePort
 7 |   ports:
 8 |   - name: prometheus
 9 |     port: 8080
10 |     targetPort: 8080
11 |   selector:
12 |     app.kubernetes.io/name: valence
13 | 


--------------------------------------------------------------------------------
/manifests/valence/prometheus/config-map.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: v1
  2 | kind: ConfigMap
  3 | metadata:
  4 |   name: prometheus-valence
  5 | data:
  6 |   prometheus.yaml: |
  7 |     global:
  8 |       scrape_interval: 10s
  9 |       scrape_timeout: 10s
 10 |       evaluation_interval: 10s
 11 |     rule_files:
 12 |       - "/etc/prometheus-rules/*.rules"
 13 |     remote_write:
 14 |     - url: http://optimization-operator.valence-system:8080/write
 15 |     scrape_configs:
 16 |     - job_name: kube-state-metrics
 17 |       honor_labels: true
 18 |       scrape_interval: 1m
 19 |       scrape_timeout: 10s
 20 |       metrics_path: /metrics
 21 |       scheme: http
 22 |       kubernetes_sd_configs:
 23 |       - role: endpoints
 24 |       bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 25 |       tls_config:
 26 |         insecure_skip_verify: true
 27 |       metric_relabel_configs:
 28 |       - source_labels: [__name__]
 29 |         regex: (?i)(kube_pod_container_resource_requests_memory_bytes|kube_pod_container_resource_limits_memory_bytes|kube_pod_container_resource_requests_cpu_cores|kube_pod_container_resource_limits_cpu_cores|kube_deployment_status_replicas_available)
 30 |         action: keep
 31 |       relabel_configs:
 32 |       - source_labels: [__meta_kubernetes_service_label_app]
 33 |         separator: ;
 34 |         regex: kube-state-metrics
 35 |         replacement: $1
 36 |         action: keep
 37 |       - source_labels: [__meta_kubernetes_endpoint_port_name]
 38 |         separator: ;
 39 |         regex: metrics
 40 |         replacement: $1
 41 |         action: keep
 42 |       - source_labels: [__meta_kubernetes_namespace]
 43 |         separator: ;
 44 |         regex: (.*)
 45 |         target_label: namespace
 46 |         replacement: $1
 47 |         action: replace
 48 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
 49 |         separator: ;
 50 |         regex: Node;(.*)
 51 |         target_label: node
 52 |         replacement: ${1}
 53 |         action: replace
 54 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
 55 |         separator: ;
 56 |         regex: Pod;(.*)
 57 |         target_label: pod
 58 |         replacement: ${1}
 59 |         action: replace
 60 |       - source_labels: [__meta_kubernetes_service_name]
 61 |         separator: ;
 62 |         regex: (.*)
 63 |         target_label: service
 64 |         replacement: $1
 65 |         action: replace
 66 |       - source_labels: [__meta_kubernetes_service_name]
 67 |         separator: ;
 68 |         regex: (.*)
 69 |         target_label: job
 70 |         replacement: ${1}
 71 |         action: replace
 72 |       - source_labels: [__meta_kubernetes_service_label_app]
 73 |         separator: ;
 74 |         regex: (.+)
 75 |         target_label: job
 76 |         replacement: ${1}
 77 |         action: replace
 78 |       - separator: ;
 79 |         regex: (.*)
 80 |         target_label: endpoint
 81 |         replacement: metrics
 82 |         action: replace
 83 |     - job_name: kubernetes-nodes-cadvisor
 84 |       scrape_interval: 1m
 85 |       scrape_timeout: 10s
 86 |       metrics_path: /metrics
 87 |       scheme: https
 88 |       kubernetes_sd_configs:
 89 |       - api_server: null
 90 |         role: node
 91 |         namespaces:
 92 |           names: []
 93 |       bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 94 |       tls_config:
 95 |         ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 96 |         insecure_skip_verify: true
 97 |       metric_relabel_configs:
 98 |       - source_labels: [__name__]
 99 |         regex: (?i)(container_memory_working_set_bytes|container_cpu_usage_seconds_total)
100 |         action: keep
101 |       relabel_configs:
102 |       - separator: ;
103 |         regex: __meta_kubernetes_node_label_(.+)
104 |         replacement: $1
105 |         action: labelmap
106 |       - separator: ;
107 |         regex: (.*)
108 |         target_label: __address__
109 |         replacement: kubernetes.default.svc:443
110 |         action: replace
111 |       - source_labels: [__meta_kubernetes_node_name]
112 |         separator: ;
113 |         regex: (.+)
114 |         target_label: __metrics_path__
115 |         replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
116 |         action: replace
117 |     - job_name: prometheus-valence
118 |       scrape_interval: 5s
119 |       scrape_timeout: 5s
120 |       metrics_path: /stats/prometheus
121 |       scheme: http
122 |       kubernetes_sd_configs:
123 |       - role: endpoints
124 |       metric_relabel_configs:
125 |       - source_labels: [__name__]
126 |         regex: (?i)(envoy_http_downstream_rq_time_bucket|envoy_http_downstream_rq_total)
127 |         action: keep
128 |       relabel_configs:
129 |       - source_labels: [__meta_kubernetes_service_label_valence_net_prometheus]
130 |         separator: ;
131 |         regex: "true"
132 |         replacement: $1
133 |         action: keep
134 |       - source_labels: [__meta_kubernetes_endpoint_port_name]
135 |         separator: ;
136 |         regex: prometheus
137 |         replacement: $1
138 |         action: keep
139 |       - source_labels: [__meta_kubernetes_namespace]
140 |         separator: ;
141 |         regex: (.*)
142 |         target_label: namespace
143 |         replacement: $1
144 |         action: replace
145 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
146 |         separator: ;
147 |         regex: Node;(.*)
148 |         target_label: node
149 |         replacement: ${1}
150 |         action: replace
151 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
152 |         separator: ;
153 |         regex: Pod;(.*)
154 |         target_label: pod
155 |         replacement: ${1}
156 |         action: replace
157 |       - source_labels: [__meta_kubernetes_service_name]
158 |         separator: ;
159 |         regex: (.*)
160 |         target_label: service
161 |         replacement: $1
162 |         action: replace
163 |       - source_labels: [__meta_kubernetes_service_name]
164 |         separator: ;
165 |         regex: (.*)
166 |         target_label: job
167 |         replacement: ${1}
168 |         action: replace
169 |       - separator: ;
170 |         regex: (.*)
171 |         target_label: endpoint
172 |         replacement: prometheus
173 |         action: replace
174 |     - job_name: valence
175 |       scrape_interval: 1m
176 |       scrape_timeout: 10s
177 |       metrics_path: /metrics
178 |       scheme: http
179 |       kubernetes_sd_configs:
180 |       - role: endpoints
181 |         namespaces:
182 |           names:
183 |           - valence-system
184 |       metric_relabel_configs:
185 |       - source_labels: [__name__]
186 |         regex: (?i)(valence_recommendations_memory_requests|valence_recommendations_memory_limits|valence_recommendations_cpu_requests|valence_recommendations_cpu_limits|valence_recommendations_replicas|valence_original_cpu_limits|valence_original_cpu_requests|valence_original_memory_limits|valence_original_memory_requests|valence_original_replicas|valence_slo_http_latency|valence_slo_http_percentile|valence_slo_http_throughput)
187 |         action: keep
188 |       relabel_configs:
189 |       - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_component]
190 |         separator: ;
191 |         regex: operator
192 |         replacement: $1
193 |         action: keep
194 |       - source_labels: [__meta_kubernetes_endpoint_port_name]
195 |         separator: ;
196 |         regex: prometheus
197 |         replacement: $1
198 |         action: keep
199 |       - source_labels: [__meta_kubernetes_namespace]
200 |         separator: ;
201 |         regex: (.*)
202 |         target_label: namespace
203 |         replacement: $1
204 |         action: replace
205 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
206 |         separator: ;
207 |         regex: Node;(.*)
208 |         target_label: node
209 |         replacement: ${1}
210 |         action: replace
211 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
212 |         separator: ;
213 |         regex: Pod;(.*)
214 |         target_label: pod
215 |         replacement: ${1}
216 |         action: replace
217 |       - source_labels: [__meta_kubernetes_service_name]
218 |         separator: ;
219 |         regex: (.*)
220 |         target_label: service
221 |         replacement: $1
222 |         action: replace
223 |       - source_labels: [__meta_kubernetes_service_name]
224 |         separator: ;
225 |         regex: (.*)
226 |         target_label: job
227 |         replacement: ${1}
228 |         action: replace
229 |       - separator: ;
230 |         regex: (.*)
231 |         target_label: endpoint
232 |         replacement: prometheus
233 |         action: replace
234 | 


--------------------------------------------------------------------------------
/manifests/valence/prometheus/kustomization.yaml:
--------------------------------------------------------------------------------
1 | commonLabels:
2 |   app.kubernetes.io/name: prometheus-valence
3 | resources:
4 |   - config-map.yaml
5 |   - prometheus-service-accounts.yaml
6 |   - service.yaml
7 |   - stateful-set.yaml
8 | 
9 | 


--------------------------------------------------------------------------------
/manifests/valence/prometheus/prometheus-service-accounts.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ServiceAccount
 3 | metadata:
 4 |   name: prometheus-valence
 5 | 
 6 | ---
 7 | 
 8 | apiVersion: rbac.authorization.k8s.io/v1beta1
 9 | kind: ClusterRole
10 | metadata:
11 |   name: prometheus-valence
12 | rules:
13 | - apiGroups: [""]
14 |   resources:
15 |   - nodes
16 |   - nodes/proxy
17 |   - services
18 |   - endpoints
19 |   - pods
20 |   verbs: ["get", "list", "watch"]
21 | - apiGroups: [""]
22 |   resources:
23 |   - configmaps
24 |   verbs: ["get"]
25 | - nonResourceURLs: ["/metrics"]
26 |   verbs: ["get"]
27 | 
28 | ---
29 | 
30 | apiVersion: rbac.authorization.k8s.io/v1beta1
31 | kind: ClusterRoleBinding
32 | metadata:
33 |   name: prometheus-valence
34 | roleRef:
35 |   apiGroup: rbac.authorization.k8s.io
36 |   kind: ClusterRole
37 |   name: prometheus-valence
38 | subjects:
39 | - kind: ServiceAccount
40 |   name: prometheus-valence
41 |   namespace: valence-system
42 | 
43 | 


--------------------------------------------------------------------------------
/manifests/valence/prometheus/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: prometheus-valence
 5 | spec:
 6 |   type: NodePort
 7 |   ports:
 8 |   - name: web
 9 |     port: 9090
10 |     protocol: TCP
11 |     targetPort: web
12 |   selector:
13 |     app.kubernetes.io/name: prometheus-valence
14 | 


--------------------------------------------------------------------------------
/manifests/valence/prometheus/stateful-set.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: StatefulSet
 3 | metadata:
 4 |   name: prometheus-valence
 5 |   namespace: valence-system
 6 | spec:
 7 |   volumeClaimTemplates: []
 8 |   replicas: 1
 9 |   serviceName: prometheus-valence
10 |   template:
11 |     spec:
12 |       serviceAccountName: prometheus-valence
13 |       containers:
14 |       - name: prometheus
15 |         args:
16 |         - --web.console.templates=/etc/prometheus/consoles
17 |         - --web.console.libraries=/etc/prometheus/console_libraries
18 |         - --config.file=/etc/prometheus/prometheus.yaml
19 |         - --storage.tsdb.path=/prometheus
20 |         - --web.enable-lifecycle
21 |         - --storage.tsdb.no-lockfile
22 |         - --web.route-prefix=/
23 |         - --storage.tsdb.retention.time=6h
24 |         image: quay.io/prometheus/prometheus:v2.7.1
25 |         ports:
26 |         - name: web
27 |           containerPort: 9090
28 |         resources:
29 |           limits:
30 |             cpu: 250m
31 |             memory: 2.5Gi
32 |           requests:
33 |             cpu: 100m
34 |             memory: 2Gi
35 |         volumeMounts:
36 |         - name: config-volume
37 |           mountPath: /etc/prometheus
38 |         - name: rules-volume
39 |           mountPath: /etc/prometheus-rules
40 |         - mountPath: /prometheus
41 |           name: prometheus-valence-db
42 |       volumes:
43 |       - name: config-volume
44 |         configMap:
45 |           name: prometheus-valence
46 |       - name: rules-volume
47 |         emptyDir: {}
48 |       - emptyDir: {}
49 |         name: prometheus-valence-db
50 | 
51 | 


--------------------------------------------------------------------------------
/tooling.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: v1
  2 | kind: ServiceAccount
  3 | metadata:
  4 |   labels:
  5 |     app: kube-state-metrics
  6 |   name: kube-state-metrics
  7 |   namespace: kube-system
  8 | ---
  9 | apiVersion: v1
 10 | kind: ServiceAccount
 11 | metadata:
 12 |   name: metrics-server
 13 |   namespace: kube-system
 14 | ---
 15 | apiVersion: rbac.authorization.k8s.io/v1
 16 | kind: Role
 17 | metadata:
 18 |   labels:
 19 |     app: kube-state-metrics
 20 |   name: kube-state-metrics-resizer
 21 |   namespace: kube-system
 22 | rules:
 23 | - apiGroups:
 24 |   - ""
 25 |   resources:
 26 |   - pods
 27 |   verbs:
 28 |   - get
 29 | - apiGroups:
 30 |   - extensions
 31 |   resourceNames:
 32 |   - kube-state-metrics
 33 |   resources:
 34 |   - deployments
 35 |   verbs:
 36 |   - get
 37 |   - update
 38 | ---
 39 | apiVersion: rbac.authorization.k8s.io/v1
 40 | kind: ClusterRole
 41 | metadata:
 42 |   labels:
 43 |     app: kube-state-metrics
 44 |   name: kube-state-metrics
 45 | rules:
 46 | - apiGroups:
 47 |   - ""
 48 |   resources:
 49 |   - nodes
 50 |   - pods
 51 |   - services
 52 |   - resourcequotas
 53 |   - replicationcontrollers
 54 |   - limitranges
 55 |   - persistentvolumeclaims
 56 |   - persistentvolumes
 57 |   - namespaces
 58 |   - endpoints
 59 |   - secrets
 60 |   - configmaps
 61 |   verbs:
 62 |   - list
 63 |   - watch
 64 | - apiGroups:
 65 |   - extensions
 66 |   resources:
 67 |   - daemonsets
 68 |   - deployments
 69 |   - replicasets
 70 |   verbs:
 71 |   - list
 72 |   - watch
 73 | - apiGroups:
 74 |   - apps
 75 |   resources:
 76 |   - statefulsets
 77 |   verbs:
 78 |   - list
 79 |   - watch
 80 | - apiGroups:
 81 |   - batch
 82 |   resources:
 83 |   - cronjobs
 84 |   - jobs
 85 |   verbs:
 86 |   - list
 87 |   - watch
 88 | - apiGroups:
 89 |   - autoscaling
 90 |   resources:
 91 |   - horizontalpodautoscalers
 92 |   verbs:
 93 |   - list
 94 |   - watch
 95 | ---
 96 | apiVersion: rbac.authorization.k8s.io/v1
 97 | kind: ClusterRole
 98 | metadata:
 99 |   name: system:metrics-server
100 | rules:
101 | - apiGroups:
102 |   - ""
103 |   resources:
104 |   - pods
105 |   - nodes
106 |   - nodes/stats
107 |   - namespaces
108 |   verbs:
109 |   - get
110 |   - list
111 |   - watch
112 | - apiGroups:
113 |   - extensions
114 |   resources:
115 |   - deployments
116 |   verbs:
117 |   - get
118 |   - list
119 |   - watch
120 | ---
121 | apiVersion: rbac.authorization.k8s.io/v1
122 | kind: RoleBinding
123 | metadata:
124 |   labels:
125 |     app: kube-state-metrics
126 |   name: kube-state-metrics
127 |   namespace: kube-system
128 | roleRef:
129 |   apiGroup: rbac.authorization.k8s.io
130 |   kind: Role
131 |   name: kube-state-metrics-resizer
132 | subjects:
133 | - kind: ServiceAccount
134 |   name: kube-state-metrics
135 |   namespace: kube-system
136 | ---
137 | apiVersion: rbac.authorization.k8s.io/v1beta1
138 | kind: RoleBinding
139 | metadata:
140 |   name: metrics-server-auth-reader
141 |   namespace: kube-system
142 | roleRef:
143 |   apiGroup: rbac.authorization.k8s.io
144 |   kind: Role
145 |   name: extension-apiserver-authentication-reader
146 | subjects:
147 | - kind: ServiceAccount
148 |   name: metrics-server
149 |   namespace: kube-system
150 | ---
151 | apiVersion: rbac.authorization.k8s.io/v1
152 | kind: ClusterRoleBinding
153 | metadata:
154 |   labels:
155 |     app: kube-state-metrics
156 |   name: kube-state-metrics
157 | roleRef:
158 |   apiGroup: rbac.authorization.k8s.io
159 |   kind: ClusterRole
160 |   name: kube-state-metrics
161 | subjects:
162 | - kind: ServiceAccount
163 |   name: kube-state-metrics
164 |   namespace: kube-system
165 | ---
166 | apiVersion: rbac.authorization.k8s.io/v1
167 | kind: ClusterRoleBinding
168 | metadata:
169 |   name: system:metrics-server
170 | roleRef:
171 |   apiGroup: rbac.authorization.k8s.io
172 |   kind: ClusterRole
173 |   name: system:metrics-server
174 | subjects:
175 | - kind: ServiceAccount
176 |   name: metrics-server
177 |   namespace: kube-system
178 | ---
179 | apiVersion: rbac.authorization.k8s.io/v1beta1
180 | kind: ClusterRoleBinding
181 | metadata:
182 |   name: metrics-server:system:auth-delegator
183 | roleRef:
184 |   apiGroup: rbac.authorization.k8s.io
185 |   kind: ClusterRole
186 |   name: system:auth-delegator
187 | subjects:
188 | - kind: ServiceAccount
189 |   name: metrics-server
190 |   namespace: kube-system
191 | ---
192 | apiVersion: v1
193 | kind: Service
194 | metadata:
195 |   annotations:
196 |     prometheus.io/scrape: "true"
197 |   labels:
198 |     app: kube-state-metrics
199 |   name: kube-state-metrics
200 |   namespace: kube-system
201 | spec:
202 |   ports:
203 |   - name: metrics
204 |     port: 8080
205 |     protocol: TCP
206 |     targetPort: metrics
207 |   - name: telemetry
208 |     port: 8081
209 |     protocol: TCP
210 |     targetPort: telemetry
211 |   selector:
212 |     app: kube-state-metrics
213 | ---
214 | apiVersion: v1
215 | kind: Service
216 | metadata:
217 |   labels:
218 |     kubernetes.io/name: metrics-server
219 |   name: metrics-server
220 |   namespace: kube-system
221 | spec:
222 |   ports:
223 |   - port: 443
224 |     protocol: TCP
225 |     targetPort: 443
226 |   selector:
227 |     k8s-app: metrics-server
228 | ---
229 | apiVersion: extensions/v1beta1
230 | kind: Deployment
231 | metadata:
232 |   labels:
233 |     app: kube-state-metrics
234 |   name: kube-state-metrics
235 |   namespace: kube-system
236 | spec:
237 |   replicas: 1
238 |   selector:
239 |     matchLabels:
240 |       app: kube-state-metrics
241 |   template:
242 |     metadata:
243 |       labels:
244 |         app: kube-state-metrics
245 |     spec:
246 |       containers:
247 |       - image: gcr.io/google_containers/kube-state-metrics:v1.3.1
248 |         name: kube-state-metrics
249 |         ports:
250 |         - containerPort: 8080
251 |           name: metrics
252 |         resources:
253 |           limits:
254 |             cpu: 200m
255 |             memory: 500Mi
256 |           requests:
257 |             cpu: 100m
258 |             memory: 300Mi
259 |       serviceAccountName: kube-state-metrics
260 | ---
261 | apiVersion: extensions/v1beta1
262 | kind: Deployment
263 | metadata:
264 |   labels:
265 |     k8s-app: metrics-server
266 |   name: metrics-server
267 |   namespace: kube-system
268 | spec:
269 |   selector:
270 |     matchLabels:
271 |       k8s-app: metrics-server
272 |   template:
273 |     metadata:
274 |       labels:
275 |         k8s-app: metrics-server
276 |       name: metrics-server
277 |     spec:
278 |       containers:
279 |       - command:
280 |         - /metrics-server
281 |         - --source=kubernetes.summary_api:https://kubernetes.default?kubeletHttps=true&kubeletPort=10250&insecure=true
282 |         - --metric-resolution=5s
283 |         image: gcr.io/google_containers/metrics-server-amd64:v0.2.1
284 |         imagePullPolicy: Always
285 |         name: metrics-server
286 |         resources:
287 |           limits:
288 |             cpu: 80m
289 |             memory: 200Mi
290 |           requests:
291 |             cpu: 40m
292 |             memory: 32Mi
293 |       serviceAccountName: metrics-server
294 | ---
295 | apiVersion: apiregistration.k8s.io/v1beta1
296 | kind: APIService
297 | metadata:
298 |   name: v1beta1.metrics.k8s.io
299 | spec:
300 |   group: metrics.k8s.io
301 |   groupPriorityMinimum: 100
302 |   insecureSkipTLSVerify: true
303 |   service:
304 |     name: metrics-server
305 |     namespace: kube-system
306 |   version: v1beta1
307 |   versionPriority: 100
308 | 


--------------------------------------------------------------------------------
/tooling/kube-state-metrics/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: extensions/v1beta1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: kube-state-metrics
 5 | spec:
 6 |   replicas: 1
 7 |   template:
 8 |     spec:
 9 |       serviceAccountName: kube-state-metrics
10 |       containers:
11 |       - name: kube-state-metrics
12 |         image: gcr.io/google_containers/kube-state-metrics:v1.3.1
13 |         ports:
14 |         - name: metrics
15 |           containerPort: 8080
16 |         resources:
17 |           requests:
18 |             memory: 300Mi
19 |             cpu: 100m
20 |           limits:
21 |             memory: 500Mi
22 |             cpu: 200m
23 | 


--------------------------------------------------------------------------------
/tooling/kube-state-metrics/kustomization.yaml:
--------------------------------------------------------------------------------
1 | commonLabels:
2 |   app: kube-state-metrics
3 | namespace: kube-system
4 | resources:
5 |   - rbac.yaml
6 |   - deployment.yaml
7 |   - service.yaml
8 | 


--------------------------------------------------------------------------------
/tooling/kube-state-metrics/rbac.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ServiceAccount
 3 | metadata:
 4 |   name: kube-state-metrics
 5 | 
 6 | ---
 7 | 
 8 | apiVersion: rbac.authorization.k8s.io/v1
 9 | kind: Role
10 | metadata:
11 |   name: kube-state-metrics-resizer
12 | rules:
13 | - apiGroups: [""]
14 |   resources:
15 |   - pods
16 |   verbs: ["get"]
17 | - apiGroups: ["extensions"]
18 |   resources:
19 |   - deployments
20 |   resourceNames: ["kube-state-metrics"]
21 |   verbs: ["get", "update"]
22 | 
23 | ---
24 | 
25 | apiVersion: rbac.authorization.k8s.io/v1
26 | kind: ClusterRole
27 | metadata:
28 |   name: kube-state-metrics
29 | rules:
30 | - apiGroups: [""]
31 |   resources:
32 |   - nodes
33 |   - pods
34 |   - services
35 |   - resourcequotas
36 |   - replicationcontrollers
37 |   - limitranges
38 |   - persistentvolumeclaims
39 |   - persistentvolumes
40 |   - namespaces
41 |   - endpoints
42 |   - secrets
43 |   - configmaps
44 |   verbs: ["list", "watch"]
45 | - apiGroups: ["extensions"]
46 |   resources:
47 |   - daemonsets
48 |   - deployments
49 |   - replicasets
50 |   verbs: ["list", "watch"]
51 | - apiGroups: ["apps"]
52 |   resources:
53 |   - statefulsets
54 |   verbs: ["list", "watch"]
55 | - apiGroups: ["batch"]
56 |   resources:
57 |   - cronjobs
58 |   - jobs
59 |   verbs: ["list", "watch"]
60 | - apiGroups: ["autoscaling"]
61 |   resources:
62 |   - horizontalpodautoscalers
63 |   verbs: ["list", "watch"]
64 | 
65 | ---
66 | 
67 | apiVersion: rbac.authorization.k8s.io/v1
68 | kind: ClusterRoleBinding
69 | metadata:
70 |   name: kube-state-metrics
71 | roleRef:
72 |   apiGroup: rbac.authorization.k8s.io
73 |   kind: ClusterRole
74 |   name: kube-state-metrics
75 | subjects:
76 | - kind: ServiceAccount
77 |   name: kube-state-metrics
78 | 
79 | ---
80 | 
81 | apiVersion: rbac.authorization.k8s.io/v1
82 | kind: RoleBinding
83 | metadata:
84 |   name: kube-state-metrics
85 | roleRef:
86 |   apiGroup: rbac.authorization.k8s.io
87 |   kind: Role
88 |   name: kube-state-metrics-resizer
89 | subjects:
90 | - kind: ServiceAccount
91 |   name: kube-state-metrics


--------------------------------------------------------------------------------
/tooling/kube-state-metrics/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   annotations:
 5 |     prometheus.io/scrape: 'true'
 6 |   name: kube-state-metrics
 7 | spec:
 8 |   ports:
 9 |   - name: metrics
10 |     port: 8080
11 |     targetPort: metrics
12 |     protocol: TCP
13 |   - name: telemetry
14 |     port: 8081
15 |     targetPort: telemetry
16 |     protocol: TCP
17 |   selector:
18 |     app: kube-state-metrics
19 | 


--------------------------------------------------------------------------------
/tooling/kustomization.base.yaml:
--------------------------------------------------------------------------------
1 | bases:
2 |   - ./kube-state-metrics
3 |   - ./metrics-server
4 | 


--------------------------------------------------------------------------------
/tooling/kustomization.yaml:
--------------------------------------------------------------------------------
1 | bases:
2 |   - ./kube-state-metrics
3 |   - ./metrics-server
4 | 


--------------------------------------------------------------------------------
/tooling/metrics-server/api-service.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: apiregistration.k8s.io/v1beta1
 3 | kind: APIService
 4 | metadata:
 5 |   name: v1beta1.metrics.k8s.io
 6 | spec:
 7 |   service:
 8 |     name: metrics-server
 9 |     namespace: kube-system
10 |   group: metrics.k8s.io
11 |   version: v1beta1
12 |   insecureSkipTLSVerify: true
13 |   groupPriorityMinimum: 100
14 |   versionPriority: 100
15 | 


--------------------------------------------------------------------------------
/tooling/metrics-server/auth-delegator.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: rbac.authorization.k8s.io/v1beta1
 3 | kind: ClusterRoleBinding
 4 | metadata:
 5 |   name: metrics-server:system:auth-delegator
 6 | roleRef:
 7 |   apiGroup: rbac.authorization.k8s.io
 8 |   kind: ClusterRole
 9 |   name: system:auth-delegator
10 | subjects:
11 | - kind: ServiceAccount
12 |   name: metrics-server
13 |   namespace: kube-system
14 | 


--------------------------------------------------------------------------------
/tooling/metrics-server/auth-reader.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: rbac.authorization.k8s.io/v1beta1
 3 | kind: RoleBinding
 4 | metadata:
 5 |   name: metrics-server-auth-reader
 6 |   namespace: kube-system
 7 | roleRef:
 8 |   apiGroup: rbac.authorization.k8s.io
 9 |   kind: Role
10 |   name: extension-apiserver-authentication-reader
11 | subjects:
12 | - kind: ServiceAccount
13 |   name: metrics-server
14 |   namespace: kube-system
15 | 


--------------------------------------------------------------------------------
/tooling/metrics-server/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ServiceAccount
 3 | metadata:
 4 |   name: metrics-server
 5 |   namespace: kube-system
 6 | 
 7 | ---
 8 | apiVersion: extensions/v1beta1
 9 | kind: Deployment
10 | metadata:
11 |   name: metrics-server
12 |   namespace: kube-system
13 |   labels:
14 |     k8s-app: metrics-server
15 | spec:
16 |   selector:
17 |     matchLabels:
18 |       k8s-app: metrics-server
19 |   template:
20 |     metadata:
21 |       name: metrics-server
22 |       labels:
23 |         k8s-app: metrics-server
24 |     spec:
25 |       serviceAccountName: metrics-server
26 |       containers:
27 |       - name: metrics-server
28 |         image: gcr.io/google_containers/metrics-server-amd64:v0.0.0
29 |         imagePullPolicy: Always
30 |         command:
31 |         - /metrics-server
32 |         - --source=kubernetes.summary_api:''
33 |         - --metric-resolution=5s
34 |         resources:
35 |           requests:
36 |             cpu: 40m
37 |             memory: 32Mi
38 |           limits:
39 |             cpu: 80m
40 |             memory: 200Mi
41 | 


--------------------------------------------------------------------------------
/tooling/metrics-server/kustomization.yaml:
--------------------------------------------------------------------------------
 1 | resources:
 2 |   - api-service.yaml
 3 |   - auth-delegator.yaml
 4 |   - auth-reader.yaml
 5 |   - deployment.yaml
 6 |   - resource-reader.yaml
 7 |   - service.yaml
 8 | imageTags:
 9 |   - name: gcr.io/google_containers/metrics-server-amd64
10 |     newTag: v0.2.1
11 | 


--------------------------------------------------------------------------------
/tooling/metrics-server/resource-reader.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | kind: ClusterRole
 4 | metadata:
 5 |   name: system:metrics-server
 6 | rules:
 7 | - apiGroups:
 8 |   - ""
 9 |   resources:
10 |   - pods
11 |   - nodes
12 |   - nodes/stats
13 |   - namespaces
14 |   verbs:
15 |   - get
16 |   - list
17 |   - watch
18 | - apiGroups:
19 |   - "extensions"
20 |   resources:
21 |   - deployments
22 |   verbs:
23 |   - get
24 |   - list
25 |   - watch
26 | ---
27 | apiVersion: rbac.authorization.k8s.io/v1
28 | kind: ClusterRoleBinding
29 | metadata:
30 |   name: system:metrics-server
31 | roleRef:
32 |   apiGroup: rbac.authorization.k8s.io
33 |   kind: ClusterRole
34 |   name: system:metrics-server
35 | subjects:
36 | - kind: ServiceAccount
37 |   name: metrics-server
38 |   namespace: kube-system
39 | 


--------------------------------------------------------------------------------
/tooling/metrics-server/service.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: v1
 3 | kind: Service
 4 | metadata:
 5 |   name: metrics-server
 6 |   namespace: kube-system
 7 |   labels:
 8 |     kubernetes.io/name: "metrics-server"
 9 | spec:
10 |   selector:
11 |     k8s-app: metrics-server
12 |   ports:
13 |   - port: 443
14 |     protocol: TCP
15 |     targetPort: 443
16 | 


--------------------------------------------------------------------------------
/valence.yaml:
--------------------------------------------------------------------------------
   1 | apiVersion: v1
   2 | kind: Namespace
   3 | metadata:
   4 |   labels:
   5 |     app.kubernetes.io/component: operator
   6 |     app.kubernetes.io/name: valence
   7 |     app.kubernetes.io/part-of: valence
   8 |     app.kubernetes.io/version: 0.3.2
   9 |   name: valence-system
  10 | ---
  11 | apiVersion: apiextensions.k8s.io/v1beta1
  12 | kind: CustomResourceDefinition
  13 | metadata:
  14 |   labels:
  15 |     app.kubernetes.io/component: operator
  16 |     app.kubernetes.io/name: valence
  17 |     app.kubernetes.io/part-of: valence
  18 |     app.kubernetes.io/version: 0.3.2
  19 |   name: servicelevelobjectives.optimizer.valence.io
  20 | spec:
  21 |   group: optimizer.valence.io
  22 |   names:
  23 |     kind: ServiceLevelObjective
  24 |     plural: servicelevelobjectives
  25 |   scope: Namespaced
  26 |   version: v1alpha1
  27 | ---
  28 | apiVersion: v1
  29 | kind: ServiceAccount
  30 | metadata:
  31 |   labels:
  32 |     app.kubernetes.io/name: prometheus-valence
  33 |     app.kubernetes.io/part-of: valence
  34 |   name: prometheus-valence
  35 |   namespace: valence-system
  36 | ---
  37 | apiVersion: v1
  38 | kind: ServiceAccount
  39 | metadata:
  40 |   labels:
  41 |     app.kubernetes.io/component: operator
  42 |     app.kubernetes.io/name: valence
  43 |     app.kubernetes.io/part-of: valence
  44 |     app.kubernetes.io/version: 0.3.2
  45 |   name: valence-operator
  46 |   namespace: valence-system
  47 | ---
  48 | apiVersion: rbac.authorization.k8s.io/v1beta1
  49 | kind: ClusterRole
  50 | metadata:
  51 |   labels:
  52 |     app.kubernetes.io/name: prometheus-valence
  53 |     app.kubernetes.io/part-of: valence
  54 |   name: prometheus-valence
  55 | rules:
  56 | - apiGroups:
  57 |   - ""
  58 |   resources:
  59 |   - nodes
  60 |   - nodes/proxy
  61 |   - services
  62 |   - endpoints
  63 |   - pods
  64 |   verbs:
  65 |   - get
  66 |   - list
  67 |   - watch
  68 | - apiGroups:
  69 |   - ""
  70 |   resources:
  71 |   - configmaps
  72 |   verbs:
  73 |   - get
  74 | - nonResourceURLs:
  75 |   - /metrics
  76 |   verbs:
  77 |   - get
  78 | ---
  79 | apiVersion: rbac.authorization.k8s.io/v1beta1
  80 | kind: ClusterRole
  81 | metadata:
  82 |   labels:
  83 |     app.kubernetes.io/component: operator
  84 |     app.kubernetes.io/name: valence
  85 |     app.kubernetes.io/part-of: valence
  86 |     app.kubernetes.io/version: 0.3.2
  87 |   name: valence:optimization-operator
  88 | rules:
  89 | - apiGroups:
  90 |   - extensions
  91 |   resources:
  92 |   - deployments
  93 |   verbs:
  94 |   - get
  95 |   - list
  96 |   - watch
  97 |   - update
  98 |   - patch
  99 | - apiGroups:
 100 |   - metrics.k8s.io
 101 |   resources:
 102 |   - pods
 103 |   verbs:
 104 |   - get
 105 |   - list
 106 | - apiGroups:
 107 |   - ""
 108 |   resources:
 109 |   - pods
 110 |   - events
 111 |   - nodes
 112 |   verbs:
 113 |   - get
 114 |   - list
 115 |   - watch
 116 | - apiGroups:
 117 |   - optimizer.valence.io
 118 |   resources:
 119 |   - servicelevelobjectives
 120 |   verbs:
 121 |   - get
 122 |   - list
 123 |   - watch
 124 |   - update
 125 |   - patch
 126 | - apiGroups:
 127 |   - apiextensions.k8s.io
 128 |   resources:
 129 |   - customresourcedefinitions
 130 |   verbs:
 131 |   - '*'
 132 | ---
 133 | apiVersion: rbac.authorization.k8s.io/v1beta1
 134 | kind: ClusterRoleBinding
 135 | metadata:
 136 |   labels:
 137 |     app.kubernetes.io/name: prometheus-valence
 138 |     app.kubernetes.io/part-of: valence
 139 |   name: prometheus-valence
 140 | roleRef:
 141 |   apiGroup: rbac.authorization.k8s.io
 142 |   kind: ClusterRole
 143 |   name: prometheus-valence
 144 | subjects:
 145 | - kind: ServiceAccount
 146 |   name: prometheus-valence
 147 |   namespace: valence-system
 148 | ---
 149 | apiVersion: rbac.authorization.k8s.io/v1beta1
 150 | kind: ClusterRoleBinding
 151 | metadata:
 152 |   labels:
 153 |     app.kubernetes.io/component: operator
 154 |     app.kubernetes.io/name: valence
 155 |     app.kubernetes.io/part-of: valence
 156 |     app.kubernetes.io/version: 0.3.2
 157 |   name: valence:optimization-operator
 158 | roleRef:
 159 |   apiGroup: rbac.authorization.k8s.io
 160 |   kind: ClusterRole
 161 |   name: valence:optimization-operator
 162 | subjects:
 163 | - kind: ServiceAccount
 164 |   name: valence-operator
 165 |   namespace: valence-system
 166 | ---
 167 | apiVersion: v1
 168 | data:
 169 |   valence.json: |
 170 |     {
 171 |       "annotations": {
 172 |         "list": [
 173 |           {
 174 |             "builtIn": 1,
 175 |             "datasource": "-- Grafana --",
 176 |             "enable": true,
 177 |             "hide": true,
 178 |             "iconColor": "rgba(0, 211, 255, 1)",
 179 |             "name": "Annotations & Alerts",
 180 |             "type": "dashboard"
 181 |           }
 182 |         ]
 183 |       },
 184 |       "editable": true,
 185 |       "gnetId": null,
 186 |       "graphTooltip": 0,
 187 |       "id": 1,
 188 |       "iteration": 1559313049824,
 189 |       "links": [],
 190 |       "panels": [
 191 |         {
 192 |           "aliasColors": {},
 193 |           "bars": false,
 194 |           "dashLength": 10,
 195 |           "dashes": false,
 196 |           "datasource": "-- Mixed --",
 197 |           "description": "Recommendations of Memory requests and limits to set for $deployment",
 198 |           "fill": 1,
 199 |           "gridPos": {
 200 |             "h": 8,
 201 |             "w": 24,
 202 |             "x": 0,
 203 |             "y": 0
 204 |           },
 205 |           "id": 2,
 206 |           "legend": {
 207 |             "avg": false,
 208 |             "current": true,
 209 |             "max": false,
 210 |             "min": false,
 211 |             "show": true,
 212 |             "total": false,
 213 |             "values": true
 214 |           },
 215 |           "lines": true,
 216 |           "linewidth": 1,
 217 |           "links": [],
 218 |           "nullPointMode": "null",
 219 |           "percentage": false,
 220 |           "pointradius": 5,
 221 |           "points": false,
 222 |           "renderer": "flot",
 223 |           "seriesOverrides": [],
 224 |           "spaceLength": 10,
 225 |           "stack": false,
 226 |           "steppedLine": false,
 227 |           "targets": [
 228 |             {
 229 |               "datasource": "DS_PROM_VALENCE",
 230 |               "expr": "max(container_memory_working_set_bytes{container_name=\"$deployment\"})",
 231 |               "format": "time_series",
 232 |               "instant": false,
 233 |               "interval": "5s",
 234 |               "intervalFactor": 1,
 235 |               "legendFormat": "Observed Memory Value",
 236 |               "refId": "A"
 237 |             },
 238 |             {
 239 |               "datasource": "DS_PROM_VALENCE",
 240 |               "expr": "max(valence_recommendations_memory_requests{container_name=\"$deployment\"})",
 241 |               "format": "time_series",
 242 |               "hide": false,
 243 |               "instant": false,
 244 |               "interval": "60s",
 245 |               "intervalFactor": 1,
 246 |               "legendFormat": "Recommended Memory Request",
 247 |               "refId": "B"
 248 |             },
 249 |             {
 250 |               "datasource": "DS_PROM_VALENCE",
 251 |               "expr": "max(valence_recommendations_memory_limits{container_name=\"$deployment\"})",
 252 |               "format": "time_series",
 253 |               "hide": false,
 254 |               "interval": "60s",
 255 |               "intervalFactor": 1,
 256 |               "legendFormat": "Recommended Memory Limit",
 257 |               "refId": "C"
 258 |             },
 259 |             {
 260 |               "datasource": "DS_PROM_VALENCE",
 261 |               "expr": "max(kube_pod_container_resource_requests_memory_bytes{container=\"$deployment\"})",
 262 |               "format": "time_series",
 263 |               "intervalFactor": 1,
 264 |               "legendFormat": "Requests",
 265 |               "refId": "D"
 266 |             },
 267 |             {
 268 |               "datasource": "DS_PROM_VALENCE",
 269 |               "expr": "max(kube_pod_container_resource_limits_memory_bytes{container=\"$deployment\"})",
 270 |               "format": "time_series",
 271 |               "intervalFactor": 1,
 272 |               "legendFormat": "Limits",
 273 |               "refId": "E"
 274 |             }
 275 |           ],
 276 |           "thresholds": [],
 277 |           "timeFrom": null,
 278 |           "timeShift": null,
 279 |           "title": "Memory recommendations: $deployment",
 280 |           "tooltip": {
 281 |             "shared": true,
 282 |             "sort": 0,
 283 |             "value_type": "individual"
 284 |           },
 285 |           "transparent": false,
 286 |           "type": "graph",
 287 |           "xaxis": {
 288 |             "buckets": null,
 289 |             "mode": "time",
 290 |             "name": null,
 291 |             "show": true,
 292 |             "values": []
 293 |           },
 294 |           "yaxes": [
 295 |             {
 296 |               "format": "bytes",
 297 |               "label": "Memory",
 298 |               "logBase": 1,
 299 |               "max": null,
 300 |               "min": null,
 301 |               "show": true
 302 |             },
 303 |             {
 304 |               "format": "short",
 305 |               "label": null,
 306 |               "logBase": 1,
 307 |               "max": null,
 308 |               "min": null,
 309 |               "show": true
 310 |             }
 311 |           ],
 312 |           "yaxis": {
 313 |             "align": false,
 314 |             "alignLevel": null
 315 |           }
 316 |         },
 317 |         {
 318 |           "aliasColors": {},
 319 |           "bars": false,
 320 |           "dashLength": 10,
 321 |           "dashes": false,
 322 |           "datasource": "-- Mixed --",
 323 |           "description": "Recommendations of CPU requests and limits to set for $deployment",
 324 |           "fill": 1,
 325 |           "gridPos": {
 326 |             "h": 8,
 327 |             "w": 24,
 328 |             "x": 0,
 329 |             "y": 8
 330 |           },
 331 |           "id": 4,
 332 |           "legend": {
 333 |             "avg": false,
 334 |             "current": true,
 335 |             "max": false,
 336 |             "min": false,
 337 |             "show": true,
 338 |             "total": false,
 339 |             "values": true
 340 |           },
 341 |           "lines": true,
 342 |           "linewidth": 1,
 343 |           "links": [],
 344 |           "nullPointMode": "null",
 345 |           "percentage": false,
 346 |           "pointradius": 5,
 347 |           "points": false,
 348 |           "renderer": "flot",
 349 |           "seriesOverrides": [],
 350 |           "spaceLength": 10,
 351 |           "stack": false,
 352 |           "steppedLine": false,
 353 |           "targets": [
 354 |             {
 355 |               "datasource": "DS_PROM_VALENCE",
 356 |               "expr": "avg(rate(container_cpu_usage_seconds_total{container_name=\"$deployment\"}[2m]))",
 357 |               "format": "time_series",
 358 |               "instant": false,
 359 |               "interval": "5s",
 360 |               "intervalFactor": 1,
 361 |               "legendFormat": "Observed CPU Value",
 362 |               "refId": "A"
 363 |             },
 364 |             {
 365 |               "datasource": "DS_PROM_VALENCE",
 366 |               "expr": "max(valence_recommendations_cpu_requests{container_name=\"$deployment\"} / 1000)",
 367 |               "format": "time_series",
 368 |               "hide": false,
 369 |               "instant": false,
 370 |               "interval": "60s",
 371 |               "intervalFactor": 1,
 372 |               "legendFormat": "Recommended CPU Request",
 373 |               "refId": "B"
 374 |             },
 375 |             {
 376 |               "datasource": "DS_PROM_VALENCE",
 377 |               "expr": "max(valence_recommendations_cpu_limits{container_name=\"$deployment\"} / 1000)",
 378 |               "format": "time_series",
 379 |               "hide": false,
 380 |               "interval": "60s",
 381 |               "intervalFactor": 1,
 382 |               "legendFormat": "Recommended CPU Limit",
 383 |               "refId": "C"
 384 |             },
 385 |             {
 386 |               "datasource": "DS_PROM_VALENCE",
 387 |               "expr": "max(kube_pod_container_resource_requests_cpu_cores{container=\"$deployment\"})",
 388 |               "format": "time_series",
 389 |               "intervalFactor": 1,
 390 |               "legendFormat": "Requests",
 391 |               "refId": "D"
 392 |             },
 393 |             {
 394 |               "datasource": "DS_PROM_VALENCE",
 395 |               "expr": "max(kube_pod_container_resource_limits_cpu_cores{container=\"$deployment\"})",
 396 |               "format": "time_series",
 397 |               "intervalFactor": 1,
 398 |               "legendFormat": "Limits",
 399 |               "refId": "E"
 400 |             }
 401 |           ],
 402 |           "thresholds": [],
 403 |           "timeFrom": null,
 404 |           "timeShift": null,
 405 |           "title": "Cpu recommendations: $deployment",
 406 |           "tooltip": {
 407 |             "shared": true,
 408 |             "sort": 0,
 409 |             "value_type": "individual"
 410 |           },
 411 |           "transparent": false,
 412 |           "type": "graph",
 413 |           "xaxis": {
 414 |             "buckets": null,
 415 |             "mode": "time",
 416 |             "name": null,
 417 |             "show": true,
 418 |             "values": []
 419 |           },
 420 |           "yaxes": [
 421 |             {
 422 |               "format": "short",
 423 |               "label": "CPU",
 424 |               "logBase": 1,
 425 |               "max": null,
 426 |               "min": null,
 427 |               "show": true
 428 |             },
 429 |             {
 430 |               "format": "short",
 431 |               "label": null,
 432 |               "logBase": 1,
 433 |               "max": null,
 434 |               "min": null,
 435 |               "show": true
 436 |             }
 437 |           ],
 438 |           "yaxis": {
 439 |             "align": false,
 440 |             "alignLevel": null
 441 |           }
 442 |         },
 443 |         {
 444 |           "aliasColors": {},
 445 |           "bars": false,
 446 |           "dashLength": 10,
 447 |           "dashes": false,
 448 |           "datasource": "DS_PROM_VALENCE",
 449 |           "fill": 1,
 450 |           "gridPos": {
 451 |             "h": 8,
 452 |             "w": 24,
 453 |             "x": 0,
 454 |             "y": 16
 455 |           },
 456 |           "id": 6,
 457 |           "legend": {
 458 |             "avg": false,
 459 |             "current": true,
 460 |             "max": false,
 461 |             "min": false,
 462 |             "show": true,
 463 |             "total": false,
 464 |             "values": true
 465 |           },
 466 |           "lines": true,
 467 |           "linewidth": 1,
 468 |           "links": [],
 469 |           "nullPointMode": "null",
 470 |           "percentage": false,
 471 |           "pointradius": 5,
 472 |           "points": false,
 473 |           "renderer": "flot",
 474 |           "seriesOverrides": [],
 475 |           "spaceLength": 10,
 476 |           "stack": false,
 477 |           "steppedLine": false,
 478 |           "targets": [
 479 |             {
 480 |               "expr": "sum(rate(envoy_http_downstream_rq_total{service=\"$deployment\"}[1m]))",
 481 |               "format": "time_series",
 482 |               "instant": false,
 483 |               "interval": "",
 484 |               "intervalFactor": 1,
 485 |               "legendFormat": "HTTP Queries Per Second",
 486 |               "refId": "A"
 487 |             },
 488 |             {
 489 |               "expr": "",
 490 |               "format": "time_series",
 491 |               "intervalFactor": 1,
 492 |               "refId": "B"
 493 |             }
 494 |           ],
 495 |           "thresholds": [],
 496 |           "timeFrom": null,
 497 |           "timeShift": null,
 498 |           "title": "HTTP Request Count: $deployment",
 499 |           "tooltip": {
 500 |             "shared": true,
 501 |             "sort": 0,
 502 |             "value_type": "individual"
 503 |           },
 504 |           "type": "graph",
 505 |           "xaxis": {
 506 |             "buckets": null,
 507 |             "mode": "time",
 508 |             "name": null,
 509 |             "show": true,
 510 |             "values": []
 511 |           },
 512 |           "yaxes": [
 513 |             {
 514 |               "format": "short",
 515 |               "label": "QpS",
 516 |               "logBase": 1,
 517 |               "max": null,
 518 |               "min": null,
 519 |               "show": true
 520 |             },
 521 |             {
 522 |               "format": "short",
 523 |               "label": null,
 524 |               "logBase": 1,
 525 |               "max": null,
 526 |               "min": null,
 527 |               "show": true
 528 |             }
 529 |           ],
 530 |           "yaxis": {
 531 |             "align": false,
 532 |             "alignLevel": null
 533 |           }
 534 |         },
 535 |         {
 536 |           "aliasColors": {},
 537 |           "bars": false,
 538 |           "dashLength": 10,
 539 |           "dashes": false,
 540 |           "datasource": "DS_PROM_VALENCE",
 541 |           "fill": 1,
 542 |           "gridPos": {
 543 |             "h": 8,
 544 |             "w": 24,
 545 |             "x": 0,
 546 |             "y": 24
 547 |           },
 548 |           "id": 7,
 549 |           "legend": {
 550 |             "avg": false,
 551 |             "current": true,
 552 |             "max": false,
 553 |             "min": false,
 554 |             "show": true,
 555 |             "total": false,
 556 |             "values": true
 557 |           },
 558 |           "lines": true,
 559 |           "linewidth": 1,
 560 |           "links": [],
 561 |           "nullPointMode": "null",
 562 |           "percentage": false,
 563 |           "pointradius": 5,
 564 |           "points": false,
 565 |           "renderer": "flot",
 566 |           "seriesOverrides": [],
 567 |           "spaceLength": 10,
 568 |           "stack": false,
 569 |           "steppedLine": false,
 570 |           "targets": [
 571 |             {
 572 |               "expr": "avg(histogram_quantile($LatencyPercentile, sum(rate(envoy_http_downstream_rq_time_bucket{service=\"$deployment\"}[1m])) by(le, pod)))",
 573 |               "format": "time_series",
 574 |               "instant": false,
 575 |               "interval": "",
 576 |               "intervalFactor": 1,
 577 |               "legendFormat": "HTTP Request Latency",
 578 |               "refId": "A"
 579 |             },
 580 |             {
 581 |               "expr": "valence_slo_http_latency{name=\"$deployment\"}",
 582 |               "format": "time_series",
 583 |               "intervalFactor": 1,
 584 |               "refId": "B"
 585 |             }
 586 |           ],
 587 |           "thresholds": [],
 588 |           "timeFrom": null,
 589 |           "timeShift": null,
 590 |           "title": "HTTP Request Latency: $deployment",
 591 |           "tooltip": {
 592 |             "shared": true,
 593 |             "sort": 0,
 594 |             "value_type": "individual"
 595 |           },
 596 |           "type": "graph",
 597 |           "xaxis": {
 598 |             "buckets": null,
 599 |             "mode": "time",
 600 |             "name": null,
 601 |             "show": true,
 602 |             "values": []
 603 |           },
 604 |           "yaxes": [
 605 |             {
 606 |               "format": "ms",
 607 |               "label": "",
 608 |               "logBase": 1,
 609 |               "max": null,
 610 |               "min": "0",
 611 |               "show": true
 612 |             },
 613 |             {
 614 |               "format": "short",
 615 |               "label": null,
 616 |               "logBase": 1,
 617 |               "max": null,
 618 |               "min": null,
 619 |               "show": true
 620 |             }
 621 |           ],
 622 |           "yaxis": {
 623 |             "align": false,
 624 |             "alignLevel": null
 625 |           }
 626 |         },
 627 |         {
 628 |           "aliasColors": {},
 629 |           "bars": false,
 630 |           "dashLength": 10,
 631 |           "dashes": false,
 632 |           "datasource": "-- Mixed --",
 633 |           "description": "Number of replicas observed and recommended for: $deployment",
 634 |           "fill": 1,
 635 |           "gridPos": {
 636 |             "h": 8,
 637 |             "w": 24,
 638 |             "x": 0,
 639 |             "y": 32
 640 |           },
 641 |           "id": 9,
 642 |           "legend": {
 643 |             "avg": false,
 644 |             "current": true,
 645 |             "max": false,
 646 |             "min": false,
 647 |             "show": true,
 648 |             "total": false,
 649 |             "values": true
 650 |           },
 651 |           "lines": true,
 652 |           "linewidth": 1,
 653 |           "links": [],
 654 |           "nullPointMode": "null",
 655 |           "percentage": false,
 656 |           "pointradius": 5,
 657 |           "points": false,
 658 |           "renderer": "flot",
 659 |           "seriesOverrides": [],
 660 |           "spaceLength": 10,
 661 |           "stack": false,
 662 |           "steppedLine": false,
 663 |           "targets": [
 664 |             {
 665 |               "datasource": "DS_PROM_VALENCE",
 666 |               "expr": "valence_recommendations_replicas{name=\"$deployment\"}",
 667 |               "format": "time_series",
 668 |               "hide": false,
 669 |               "intervalFactor": 1,
 670 |               "legendFormat": "Recommended Replicas",
 671 |               "refId": "A"
 672 |             },
 673 |             {
 674 |               "datasource": "DS_PROM_VALENCE",
 675 |               "expr": "kube_deployment_status_replicas_available{deployment=\"$deployment\"}",
 676 |               "format": "time_series",
 677 |               "intervalFactor": 1,
 678 |               "legendFormat": "Current Replicas",
 679 |               "refId": "B"
 680 |             }
 681 |           ],
 682 |           "thresholds": [],
 683 |           "timeFrom": null,
 684 |           "timeShift": null,
 685 |           "title": "Replicas: $deployment",
 686 |           "tooltip": {
 687 |             "shared": true,
 688 |             "sort": 0,
 689 |             "value_type": "individual"
 690 |           },
 691 |           "type": "graph",
 692 |           "xaxis": {
 693 |             "buckets": null,
 694 |             "mode": "time",
 695 |             "name": null,
 696 |             "show": true,
 697 |             "values": []
 698 |           },
 699 |           "yaxes": [
 700 |             {
 701 |               "decimals": null,
 702 |               "format": "short",
 703 |               "label": null,
 704 |               "logBase": 1,
 705 |               "max": null,
 706 |               "min": null,
 707 |               "show": true
 708 |             },
 709 |             {
 710 |               "format": "short",
 711 |               "label": null,
 712 |               "logBase": 1,
 713 |               "max": null,
 714 |               "min": null,
 715 |               "show": true
 716 |             }
 717 |           ],
 718 |           "yaxis": {
 719 |             "align": false,
 720 |             "alignLevel": null
 721 |           }
 722 |         }
 723 |       ],
 724 |       "refresh": "5s",
 725 |       "schemaVersion": 16,
 726 |       "style": "dark",
 727 |       "tags": [],
 728 |       "templating": {
 729 |         "list": [
 730 |           {
 731 |             "allValue": null,
 732 |             "current": {
 733 |               "selected": false,
 734 |               "tags": [],
 735 |               "text": "todo-backend-django-valence",
 736 |               "value": "todo-backend-django-valence"
 737 |             },
 738 |             "datasource": "DS_PROM_VALENCE",
 739 |             "hide": 0,
 740 |             "includeAll": false,
 741 |             "label": "Deployment",
 742 |             "multi": false,
 743 |             "name": "deployment",
 744 |             "options": [],
 745 |             "query": "label_values(envoy_http_downstream_rq_total, service)",
 746 |             "refresh": 1,
 747 |             "regex": "",
 748 |             "sort": 0,
 749 |             "tagValuesQuery": "",
 750 |             "tags": [],
 751 |             "tagsQuery": "",
 752 |             "type": "query",
 753 |             "useTags": false
 754 |           },
 755 |           {
 756 |             "allValue": null,
 757 |             "current": {
 758 |               "selected": true,
 759 |               "tags": [],
 760 |               "text": "0.95",
 761 |               "value": "0.95"
 762 |             },
 763 |             "hide": 0,
 764 |             "includeAll": false,
 765 |             "label": "Latency Percentile",
 766 |             "multi": false,
 767 |             "name": "LatencyPercentile",
 768 |             "options": [
 769 |               {
 770 |                 "selected": false,
 771 |                 "text": "0.5",
 772 |                 "value": "0.5"
 773 |               },
 774 |               {
 775 |                 "selected": false,
 776 |                 "text": "0.75",
 777 |                 "value": "0.75"
 778 |               },
 779 |               {
 780 |                 "selected": false,
 781 |                 "text": "0.9",
 782 |                 "value": "0.9"
 783 |               },
 784 |               {
 785 |                 "selected": true,
 786 |                 "text": "0.95",
 787 |                 "value": "0.95"
 788 |               },
 789 |               {
 790 |                 "selected": false,
 791 |                 "text": "0.99",
 792 |                 "value": "0.99"
 793 |               }
 794 |             ],
 795 |             "query": "0.5, 0.75, 0.9, 0.95, 0.99",
 796 |             "type": "custom"
 797 |           }
 798 |         ]
 799 |       },
 800 |       "time": {
 801 |         "from": "now-1h",
 802 |         "to": "now"
 803 |       },
 804 |       "timepicker": {
 805 |         "refresh_intervals": [
 806 |           "1s",
 807 |           "5s",
 808 |           "30s",
 809 |           "1m"
 810 |         ],
 811 |         "time_options": [
 812 |           "5m",
 813 |           "15m",
 814 |           "1h",
 815 |           "6h",
 816 |           "12h",
 817 |           "24h",
 818 |           "2d",
 819 |           "7d",
 820 |           "30d"
 821 |         ]
 822 |       },
 823 |       "timezone": "",
 824 |       "title": "Valence",
 825 |       "uid": "9ri9X0Qiz",
 826 |       "version": 1
 827 |     }
 828 | kind: ConfigMap
 829 | metadata:
 830 |   labels:
 831 |     app.kubernetes.io/name: grafana
 832 |     app.kubernetes.io/part-of: valence
 833 |   name: grafana-dashboards-valence
 834 |   namespace: valence-system
 835 | ---
 836 | apiVersion: v1
 837 | data:
 838 |   prometheus.yaml: |
 839 |     apiVersion: 1
 840 |     datasources:
 841 |       - name: DS_PROM_VALENCE
 842 |         type: prometheus
 843 |         access: proxy
 844 |         url: http://prometheus-valence.valence-system:9090
 845 |         editable: false
 846 |         version: 1
 847 | kind: ConfigMap
 848 | metadata:
 849 |   labels:
 850 |     app.kubernetes.io/name: grafana
 851 |     app.kubernetes.io/part-of: valence
 852 |   name: grafana-datasources
 853 |   namespace: valence-system
 854 | ---
 855 | apiVersion: v1
 856 | data:
 857 |   default.yaml: |
 858 |     apiVersion: 1
 859 |     providers:
 860 |       - name: 'default'
 861 |         org_id: 1
 862 |         folder: ''
 863 |         type: 'file'
 864 |         options:
 865 |           path: '/var/lib/grafana/dashboards'
 866 | kind: ConfigMap
 867 | metadata:
 868 |   labels:
 869 |     app.kubernetes.io/name: grafana
 870 |     app.kubernetes.io/part-of: valence
 871 |   name: grafana-providers
 872 |   namespace: valence-system
 873 | ---
 874 | apiVersion: v1
 875 | data:
 876 |   prometheus.yaml: |
 877 |     global:
 878 |       scrape_interval: 10s
 879 |       scrape_timeout: 10s
 880 |       evaluation_interval: 10s
 881 |     rule_files:
 882 |       - "/etc/prometheus-rules/*.rules"
 883 |     remote_write:
 884 |     - url: http://optimization-operator.valence-system:8080/write
 885 |     scrape_configs:
 886 |     - job_name: kube-state-metrics
 887 |       honor_labels: true
 888 |       scrape_interval: 1m
 889 |       scrape_timeout: 10s
 890 |       metrics_path: /metrics
 891 |       scheme: http
 892 |       kubernetes_sd_configs:
 893 |       - role: endpoints
 894 |       bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 895 |       tls_config:
 896 |         insecure_skip_verify: true
 897 |       metric_relabel_configs:
 898 |       - source_labels: [__name__]
 899 |         regex: (?i)(kube_pod_container_resource_requests_memory_bytes|kube_pod_container_resource_limits_memory_bytes|kube_pod_container_resource_requests_cpu_cores|kube_pod_container_resource_limits_cpu_cores|kube_deployment_status_replicas_available)
 900 |         action: keep
 901 |       relabel_configs:
 902 |       - source_labels: [__meta_kubernetes_service_label_app]
 903 |         separator: ;
 904 |         regex: kube-state-metrics
 905 |         replacement: $1
 906 |         action: keep
 907 |       - source_labels: [__meta_kubernetes_endpoint_port_name]
 908 |         separator: ;
 909 |         regex: metrics
 910 |         replacement: $1
 911 |         action: keep
 912 |       - source_labels: [__meta_kubernetes_namespace]
 913 |         separator: ;
 914 |         regex: (.*)
 915 |         target_label: namespace
 916 |         replacement: $1
 917 |         action: replace
 918 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
 919 |         separator: ;
 920 |         regex: Node;(.*)
 921 |         target_label: node
 922 |         replacement: ${1}
 923 |         action: replace
 924 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
 925 |         separator: ;
 926 |         regex: Pod;(.*)
 927 |         target_label: pod
 928 |         replacement: ${1}
 929 |         action: replace
 930 |       - source_labels: [__meta_kubernetes_service_name]
 931 |         separator: ;
 932 |         regex: (.*)
 933 |         target_label: service
 934 |         replacement: $1
 935 |         action: replace
 936 |       - source_labels: [__meta_kubernetes_service_name]
 937 |         separator: ;
 938 |         regex: (.*)
 939 |         target_label: job
 940 |         replacement: ${1}
 941 |         action: replace
 942 |       - source_labels: [__meta_kubernetes_service_label_app]
 943 |         separator: ;
 944 |         regex: (.+)
 945 |         target_label: job
 946 |         replacement: ${1}
 947 |         action: replace
 948 |       - separator: ;
 949 |         regex: (.*)
 950 |         target_label: endpoint
 951 |         replacement: metrics
 952 |         action: replace
 953 |     - job_name: kubernetes-nodes-cadvisor
 954 |       scrape_interval: 1m
 955 |       scrape_timeout: 10s
 956 |       metrics_path: /metrics
 957 |       scheme: https
 958 |       kubernetes_sd_configs:
 959 |       - api_server: null
 960 |         role: node
 961 |         namespaces:
 962 |           names: []
 963 |       bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 964 |       tls_config:
 965 |         ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 966 |         insecure_skip_verify: true
 967 |       metric_relabel_configs:
 968 |       - source_labels: [__name__]
 969 |         regex: (?i)(container_memory_working_set_bytes|container_cpu_usage_seconds_total)
 970 |         action: keep
 971 |       relabel_configs:
 972 |       - separator: ;
 973 |         regex: __meta_kubernetes_node_label_(.+)
 974 |         replacement: $1
 975 |         action: labelmap
 976 |       - separator: ;
 977 |         regex: (.*)
 978 |         target_label: __address__
 979 |         replacement: kubernetes.default.svc:443
 980 |         action: replace
 981 |       - source_labels: [__meta_kubernetes_node_name]
 982 |         separator: ;
 983 |         regex: (.+)
 984 |         target_label: __metrics_path__
 985 |         replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
 986 |         action: replace
 987 |     - job_name: prometheus-valence
 988 |       scrape_interval: 5s
 989 |       scrape_timeout: 5s
 990 |       metrics_path: /stats/prometheus
 991 |       scheme: http
 992 |       kubernetes_sd_configs:
 993 |       - role: endpoints
 994 |       metric_relabel_configs:
 995 |       - source_labels: [__name__]
 996 |         regex: (?i)(envoy_http_downstream_rq_time_bucket|envoy_http_downstream_rq_total)
 997 |         action: keep
 998 |       relabel_configs:
 999 |       - source_labels: [__meta_kubernetes_service_label_valence_net_prometheus]
1000 |         separator: ;
1001 |         regex: "true"
1002 |         replacement: $1
1003 |         action: keep
1004 |       - source_labels: [__meta_kubernetes_endpoint_port_name]
1005 |         separator: ;
1006 |         regex: prometheus
1007 |         replacement: $1
1008 |         action: keep
1009 |       - source_labels: [__meta_kubernetes_namespace]
1010 |         separator: ;
1011 |         regex: (.*)
1012 |         target_label: namespace
1013 |         replacement: $1
1014 |         action: replace
1015 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
1016 |         separator: ;
1017 |         regex: Node;(.*)
1018 |         target_label: node
1019 |         replacement: ${1}
1020 |         action: replace
1021 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
1022 |         separator: ;
1023 |         regex: Pod;(.*)
1024 |         target_label: pod
1025 |         replacement: ${1}
1026 |         action: replace
1027 |       - source_labels: [__meta_kubernetes_service_name]
1028 |         separator: ;
1029 |         regex: (.*)
1030 |         target_label: service
1031 |         replacement: $1
1032 |         action: replace
1033 |       - source_labels: [__meta_kubernetes_service_name]
1034 |         separator: ;
1035 |         regex: (.*)
1036 |         target_label: job
1037 |         replacement: ${1}
1038 |         action: replace
1039 |       - separator: ;
1040 |         regex: (.*)
1041 |         target_label: endpoint
1042 |         replacement: prometheus
1043 |         action: replace
1044 |     - job_name: valence
1045 |       scrape_interval: 1m
1046 |       scrape_timeout: 10s
1047 |       metrics_path: /metrics
1048 |       scheme: http
1049 |       kubernetes_sd_configs:
1050 |       - role: endpoints
1051 |         namespaces:
1052 |           names:
1053 |           - valence-system
1054 |       metric_relabel_configs:
1055 |       - source_labels: [__name__]
1056 |         regex: (?i)(valence_recommendations_memory_requests|valence_recommendations_memory_limits|valence_recommendations_cpu_requests|valence_recommendations_cpu_limits|valence_recommendations_replicas|valence_original_cpu_limits|valence_original_cpu_requests|valence_original_memory_limits|valence_original_memory_requests|valence_original_replicas|valence_slo_http_latency|valence_slo_http_percentile|valence_slo_http_throughput)
1057 |         action: keep
1058 |       relabel_configs:
1059 |       - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_component]
1060 |         separator: ;
1061 |         regex: operator
1062 |         replacement: $1
1063 |         action: keep
1064 |       - source_labels: [__meta_kubernetes_endpoint_port_name]
1065 |         separator: ;
1066 |         regex: prometheus
1067 |         replacement: $1
1068 |         action: keep
1069 |       - source_labels: [__meta_kubernetes_namespace]
1070 |         separator: ;
1071 |         regex: (.*)
1072 |         target_label: namespace
1073 |         replacement: $1
1074 |         action: replace
1075 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
1076 |         separator: ;
1077 |         regex: Node;(.*)
1078 |         target_label: node
1079 |         replacement: ${1}
1080 |         action: replace
1081 |       - source_labels: [__meta_kubernetes_endpoint_address_target_kind, __meta_kubernetes_endpoint_address_target_name]
1082 |         separator: ;
1083 |         regex: Pod;(.*)
1084 |         target_label: pod
1085 |         replacement: ${1}
1086 |         action: replace
1087 |       - source_labels: [__meta_kubernetes_service_name]
1088 |         separator: ;
1089 |         regex: (.*)
1090 |         target_label: service
1091 |         replacement: $1
1092 |         action: replace
1093 |       - source_labels: [__meta_kubernetes_service_name]
1094 |         separator: ;
1095 |         regex: (.*)
1096 |         target_label: job
1097 |         replacement: ${1}
1098 |         action: replace
1099 |       - separator: ;
1100 |         regex: (.*)
1101 |         target_label: endpoint
1102 |         replacement: prometheus
1103 |         action: replace
1104 | kind: ConfigMap
1105 | metadata:
1106 |   labels:
1107 |     app.kubernetes.io/name: prometheus-valence
1108 |     app.kubernetes.io/part-of: valence
1109 |   name: prometheus-valence
1110 |   namespace: valence-system
1111 | ---
1112 | apiVersion: v1
1113 | kind: Service
1114 | metadata:
1115 |   labels:
1116 |     app.kubernetes.io/name: grafana
1117 |     app.kubernetes.io/part-of: valence
1118 |   name: grafana
1119 |   namespace: valence-system
1120 | spec:
1121 |   ports:
1122 |   - port: 3000
1123 |     protocol: TCP
1124 |     targetPort: 3000
1125 |   selector:
1126 |     app.kubernetes.io/name: grafana
1127 |     app.kubernetes.io/part-of: valence
1128 |   type: NodePort
1129 | ---
1130 | apiVersion: v1
1131 | kind: Service
1132 | metadata:
1133 |   labels:
1134 |     app.kubernetes.io/component: operator
1135 |     app.kubernetes.io/name: valence
1136 |     app.kubernetes.io/part-of: valence
1137 |     app.kubernetes.io/version: 0.3.2
1138 |   name: optimization-operator
1139 |   namespace: valence-system
1140 | spec:
1141 |   ports:
1142 |   - name: prometheus
1143 |     port: 8080
1144 |     targetPort: 8080
1145 |   selector:
1146 |     app.kubernetes.io/component: operator
1147 |     app.kubernetes.io/name: valence
1148 |     app.kubernetes.io/part-of: valence
1149 |     app.kubernetes.io/version: 0.3.2
1150 |   type: NodePort
1151 | ---
1152 | apiVersion: v1
1153 | kind: Service
1154 | metadata:
1155 |   labels:
1156 |     app.kubernetes.io/name: prometheus-valence
1157 |     app.kubernetes.io/part-of: valence
1158 |   name: prometheus-valence
1159 |   namespace: valence-system
1160 | spec:
1161 |   ports:
1162 |   - name: web
1163 |     port: 9090
1164 |     protocol: TCP
1165 |     targetPort: web
1166 |   selector:
1167 |     app.kubernetes.io/name: prometheus-valence
1168 |     app.kubernetes.io/part-of: valence
1169 |   type: NodePort
1170 | ---
1171 | apiVersion: extensions/v1beta1
1172 | kind: Deployment
1173 | metadata:
1174 |   labels:
1175 |     app.kubernetes.io/name: grafana
1176 |     app.kubernetes.io/part-of: valence
1177 |   name: grafana
1178 |   namespace: valence-system
1179 | spec:
1180 |   replicas: 1
1181 |   selector:
1182 |     matchLabels:
1183 |       app.kubernetes.io/name: grafana
1184 |       app.kubernetes.io/part-of: valence
1185 |   strategy:
1186 |     rollingUpdate:
1187 |       maxSurge: 1
1188 |       maxUnavailable: 0
1189 |     type: RollingUpdate
1190 |   template:
1191 |     metadata:
1192 |       labels:
1193 |         app.kubernetes.io/name: grafana
1194 |         app.kubernetes.io/part-of: valence
1195 |     spec:
1196 |       containers:
1197 |       - env:
1198 |         - name: GF_SERVER_ROOT_URL
1199 |           value: /api/v1/namespaces/valence-system/services/grafana/proxy/
1200 |         image: grafana/grafana:5.2.4
1201 |         name: grafana
1202 |         ports:
1203 |         - containerPort: 3000
1204 |           protocol: TCP
1205 |         resources:
1206 |           limits:
1207 |             cpu: 500m
1208 |             memory: 2500Mi
1209 |           requests:
1210 |             cpu: 100m
1211 |             memory: 100Mi
1212 |         volumeMounts:
1213 |         - mountPath: /var/lib/grafana
1214 |           name: data
1215 |         - mountPath: /etc/grafana/provisioning/dashboards
1216 |           name: providers
1217 |         - mountPath: /etc/grafana/provisioning/datasources
1218 |           name: datasources
1219 |         - mountPath: /var/lib/grafana/dashboards/capacity-planning.json
1220 |           name: dashboards-valence
1221 |           subPath: valence.json
1222 |       restartPolicy: Always
1223 |       volumes:
1224 |       - emptyDir: {}
1225 |         name: data
1226 |       - configMap:
1227 |           name: grafana-providers
1228 |         name: providers
1229 |       - configMap:
1230 |           name: grafana-datasources
1231 |         name: datasources
1232 |       - configMap:
1233 |           name: grafana-dashboards-valence
1234 |         name: dashboards-valence
1235 | ---
1236 | apiVersion: extensions/v1beta1
1237 | kind: Deployment
1238 | metadata:
1239 |   labels:
1240 |     app.kubernetes.io/component: operator
1241 |     app.kubernetes.io/name: valence
1242 |     app.kubernetes.io/part-of: valence
1243 |     app.kubernetes.io/version: 0.3.2
1244 |   name: optimization-operator
1245 |   namespace: valence-system
1246 | spec:
1247 |   replicas: 1
1248 |   selector:
1249 |     matchLabels:
1250 |       app.kubernetes.io/component: operator
1251 |       app.kubernetes.io/name: valence
1252 |       app.kubernetes.io/part-of: valence
1253 |       app.kubernetes.io/version: 0.3.2
1254 |   template:
1255 |     metadata:
1256 |       labels:
1257 |         app.kubernetes.io/component: operator
1258 |         app.kubernetes.io/name: valence
1259 |         app.kubernetes.io/part-of: valence
1260 |         app.kubernetes.io/version: 0.3.2
1261 |     spec:
1262 |       containers:
1263 |       - args:
1264 |         - operator
1265 |         env:
1266 |         - name: DATA_OPT_OUT
1267 |           value: "false"
1268 |         - name: LICENSE_KEY
1269 |           value: license key
1270 |         - name: MIN_SAMPLE_SIZE
1271 |           value: "20"
1272 |         - name: PROMETHEUS_URL
1273 |           value: http://prometheus-valence.valence-system.svc:9090
1274 |         image: valencenet/valence:0.3.2
1275 |         imagePullPolicy: Always
1276 |         name: optimization-operator
1277 |         resources:
1278 |           limits:
1279 |             cpu: 500m
1280 |             memory: 500M
1281 |           requests:
1282 |             cpu: 250m
1283 |             memory: 250M
1284 |       serviceAccountName: valence-operator
1285 | ---
1286 | apiVersion: apps/v1
1287 | kind: StatefulSet
1288 | metadata:
1289 |   labels:
1290 |     app.kubernetes.io/name: prometheus-valence
1291 |     app.kubernetes.io/part-of: valence
1292 |   name: prometheus-valence
1293 |   namespace: valence-system
1294 | spec:
1295 |   replicas: 1
1296 |   selector:
1297 |     matchLabels:
1298 |       app.kubernetes.io/name: prometheus-valence
1299 |       app.kubernetes.io/part-of: valence
1300 |   serviceName: prometheus-valence
1301 |   template:
1302 |     metadata:
1303 |       labels:
1304 |         app.kubernetes.io/name: prometheus-valence
1305 |         app.kubernetes.io/part-of: valence
1306 |     spec:
1307 |       containers:
1308 |       - args:
1309 |         - --web.console.templates=/etc/prometheus/consoles
1310 |         - --web.console.libraries=/etc/prometheus/console_libraries
1311 |         - --config.file=/etc/prometheus/prometheus.yaml
1312 |         - --storage.tsdb.path=/prometheus
1313 |         - --web.enable-lifecycle
1314 |         - --storage.tsdb.no-lockfile
1315 |         - --web.route-prefix=/
1316 |         - --storage.tsdb.retention.time=6h
1317 |         image: quay.io/prometheus/prometheus:v2.7.1
1318 |         name: prometheus
1319 |         ports:
1320 |         - containerPort: 9090
1321 |           name: web
1322 |         resources:
1323 |           limits:
1324 |             cpu: 250m
1325 |             memory: 2.5Gi
1326 |           requests:
1327 |             cpu: 100m
1328 |             memory: 2Gi
1329 |         volumeMounts:
1330 |         - mountPath: /etc/prometheus
1331 |           name: config-volume
1332 |         - mountPath: /etc/prometheus-rules
1333 |           name: rules-volume
1334 |         - mountPath: /prometheus
1335 |           name: prometheus-valence-db
1336 |       serviceAccountName: prometheus-valence
1337 |       volumes:
1338 |       - configMap:
1339 |           name: prometheus-valence
1340 |         name: config-volume
1341 |       - emptyDir: {}
1342 |         name: rules-volume
1343 |       - emptyDir: {}
1344 |         name: prometheus-valence-db
1345 |   volumeClaimTemplates: []
1346 | 


--------------------------------------------------------------------------------