├── .github
    └── workflows
    │   ├── benchmarks.yaml
    │   └── teardown.yaml
├── LICENSE
├── README.md
├── benchmarks
    └── ramp_up.js
├── dev
    ├── docker-compose.yml
    ├── prometheus-config.yml
    └── run-benchmark
├── environments
    └── aws
    │   ├── .gitignore
    │   ├── Pulumi.main.yaml
    │   ├── Pulumi.yaml
    │   ├── README.md
    │   ├── index.ts
    │   ├── package-lock.json
    │   └── package.json
├── k8s
    ├── benchmark
    │   ├── kustomization.yaml
    │   ├── monitoring-service.yaml
    │   ├── service-monitor.yaml
    │   ├── soak-test-deployment.yaml
    │   └── workers-deployment.yaml
    ├── monitoring
    │   ├── dashboards
    │   │   ├── cloudwatch.yaml
    │   │   ├── fetch-dashboards.sh
    │   │   ├── folder.yaml
    │   │   ├── patch.yaml
    │   │   ├── soak-test-frontend.json
    │   │   ├── soak-test-history.json
    │   │   ├── soak-test-matching.json
    │   │   ├── soak-test-persistence.json
    │   │   ├── soak-test-pods.json
    │   │   ├── soak-test-polling.json
    │   │   ├── soak-test-services.json
    │   │   ├── soak-test-slo.json
    │   │   ├── soak-test-summary.json
    │   │   └── soak-test-worker.json
    │   ├── grafana.ini
    │   ├── kustomization.yaml
    │   ├── prometheus-rbacTemporal.yaml
    │   └── temporal-rules.yaml
    └── temporal
    │   ├── frontend-deployment.yaml
    │   ├── frontend-service.yaml
    │   ├── history-deployment.yaml
    │   ├── kustomization.yaml
    │   ├── matching-deployment.yaml
    │   ├── monitoring-service.yaml
    │   ├── service-monitor.yaml
    │   └── worker-deployment.yaml
└── stacks
    ├── .gitignore
    ├── Pulumi.eks-cassandra-medium.yaml
    ├── Pulumi.eks-rds-aurora-mysql-medium.yaml
    ├── Pulumi.eks-rds-aurora-postgres-medium.yaml
    ├── Pulumi.eks-rds-mysql-scaling-series.yaml
    ├── Pulumi.eks-rds-postgres-medium.yaml
    ├── Pulumi.eks-rds-postgres-micro.yaml
    ├── Pulumi.yaml
    ├── README.md
    ├── fetch-kubeconfig
    ├── grafana-tunnel
    ├── index.ts
    ├── package-lock.json
    ├── package.json
    ├── port-forward-grafana
    └── tsconfig.json


/.github/workflows/benchmarks.yaml:
--------------------------------------------------------------------------------
 1 | name: Run Benchmarks
 2 | on:
 3 |   - workflow_dispatch 
 4 | jobs:
 5 |   benchmarks:
 6 |     name: Run benchmarks
 7 |     strategy:
 8 |       fail-fast: false
 9 |       matrix:
10 |         stack: [eks-rds-postgres-m6i-large,eks-rds-postgres-m6i-2xlarge,eks-rds-aurora-postgres-r5-2xlarge]
11 |     runs-on: ubuntu-latest
12 |     permissions:
13 |       id-token: write
14 |       contents: read
15 |     steps:
16 |       - uses: actions/checkout@v3
17 |       - uses: actions/setup-node@v3
18 |         with:
19 |           node-version: 16
20 |       - run: npm install
21 |         working-directory: stacks
22 |       - name: Configure AWS Credentials
23 |         uses: aws-actions/configure-aws-credentials@v1
24 |         with:
25 |           aws-region: ${{ secrets.AWS_REGION }}
26 |           role-to-assume: ${{ secrets.AWS_ROLE }}
27 |       - uses: pulumi/actions@v3
28 |         id: pulumi-up
29 |         with:
30 |           work-dir: stacks
31 |           command: up
32 |           stack-name: ${{ matrix.stack }}
33 |         env:
34 |           PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_TOKEN }}
35 |       - run: |
36 |           echo "### Benchmark: ${{ matrix.stack }}" >> $GITHUB_STEP_SUMMARY
37 |           echo "${{ steps.pulumi-up.outputs.clusterSummary }}" >> $GITHUB_STEP_SUMMARY
38 |       - uses: ianbelcher/eks-kubectl-action@master
39 |         id: run-benchmark
40 |         with:
41 |           cluster_name: ${{ steps.pulumi-up.outputs.clusterName }}
42 |           stdin: benchmarks/ramp_up.js
43 |           args: >
44 |             run k6-${{ github.run_attempt }} -i --restart Never
45 |             --image ghcr.io/temporalio/xk6-temporal:v0.1.0
46 |             --env TEMPORAL_GRPC_ENDPOINT=temporal-frontend:7233
47 |             --env K6_OUT=output-prometheus-remote
48 |             --env K6_PROMETHEUS_REMOTE_URL=http://prometheus-k8s.monitoring.svc.cluster.local:9090/api/v1/write
49 |             --env PROMETHEUS_ENDPOINT=http://prometheus-k8s.monitoring.svc.cluster.local:9090/
50 |             --
51 |             k6 run --quiet --no-color -
52 |       - run: |
53 |           echo "${{ steps.run-benchmark.outputs.kubectl-out }}" >> $GITHUB_STEP_SUMMARY
54 |       - uses: pulumi/actions@v3
55 |         with:
56 |           work-dir: stacks
57 |           command: destroy
58 |           stack-name: ${{ matrix.stack }}
59 |         env:
60 |           PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_TOKEN }}
61 | 


--------------------------------------------------------------------------------
/.github/workflows/teardown.yaml:
--------------------------------------------------------------------------------
 1 | name: Teardown clusters
 2 | on:
 3 |   - workflow_dispatch 
 4 | jobs:
 5 |   benchmarks:
 6 |     name: Teardown clusters
 7 |     strategy:
 8 |       fail-fast: false
 9 |       matrix:
10 |         stack: [eks-rds-postgres-m6i-large,eks-rds-postgres-m6i-2xlarge]
11 |     runs-on: ubuntu-latest
12 |     permissions:
13 |       id-token: write
14 |       contents: read
15 |     steps:
16 |       - uses: actions/checkout@v3
17 |       - uses: actions/setup-node@v3
18 |         with:
19 |           node-version: 16
20 |       - run: npm install
21 |         working-directory: stacks
22 |       - name: Configure AWS Credentials
23 |         uses: aws-actions/configure-aws-credentials@v1
24 |         with:
25 |           aws-region: ${{ secrets.AWS_REGION }}
26 |           role-to-assume: ${{ secrets.AWS_ROLE }}
27 |       - uses: pulumi/actions@v3
28 |         id: pulumi-down
29 |         with:
30 |           work-dir: stacks
31 |           command: destroy
32 |           stack-name: ${{ matrix.stack }}
33 |         env:
34 |           PULUMI_ACCESS_TOKEN: ${{ secrets.PULUMI_TOKEN }}
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 temporal.io
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Temporal Benchmark Matrix
 2 | 
 3 | The Benchmark Matrix is designed to give users a guide to what kind of performance they can expect out of various cluster and persistence configurations.
 4 | 
 5 | Please note that this project is in the extremely early stages, clusters are not being tuned, resource limits are not set or enforced. Any performance numbers recorded should be ignored for now.
 6 | 
 7 | Once the matrix is able to run benchmarks across multiple providers and persistence backends we will apply constraints and tuning so that we can get consistent and meaningful benchmark results.
 8 | 
 9 | # Benchmark matrix
10 | 
11 | These are the platforms and persistence types that the benchmark matrix currently tests:
12 | 
13 | | Provider | Platform | Persistence Type | Persistence Size |
14 | |---|---|---|---|
15 | |AWS|EKS|Postgres|m6i-large|
16 | |AWS|EKS|Postgres|m6i-2xlarge|
17 | 
18 | # Running the benchmarks
19 | 
20 | TODO :)
21 | 
22 | # Contributing
23 | 
24 | Currently we only run on AWS EKS with Postgres RDS instances. We would love to support Azure, GCP and more persistence systems. All contributions welcome!


--------------------------------------------------------------------------------
/benchmarks/ramp_up.js:
--------------------------------------------------------------------------------
 1 | import temporal from 'k6/x/temporal';
 2 | import promclient from 'k6/x/prometheus-client';
 3 | import { scenario } from 'k6/execution';
 4 | import { textSummary } from 'https://jslib.k6.io/k6-summary/0.0.2/index.js';
 5 | 
 6 | export const options = {
 7 |   scenarios: {
 8 |     ramp_up: {
 9 |       executor: 'ramping-vus',
10 |       startVUs: 500,
11 |       stages: [
12 |         { duration: '2m', target: 500 },
13 |         { duration: '10s', target: 600 },
14 |         { duration: '2m', target: 600 },
15 |         { duration: '10s', target: 700 },
16 |         { duration: '2m', target: 700 },
17 |         { duration: '10s', target: 800 },
18 |         { duration: '2m', target: 800 },
19 |         { duration: '10s', target: 900 },
20 |         { duration: '2m', target: 900 },
21 |         { duration: '10s', target: 1000 },
22 |         { duration: '2m', target: 1000 },
23 |       ],
24 |     },
25 |   },
26 | };
27 | 
28 | const startWorkflow = (client) => {
29 |   while(true) {
30 |     try {
31 |       const workflow = client.startWorkflow(
32 |         {
33 |           task_queue: 'benchmark',
34 |           id: 'echo-' + scenario.iterationInTest,
35 |         },
36 |         'ExecuteActivity',
37 |         { "Count": 1, "Activity": "Echo", "Input": { "Message": "test" } },
38 |       )
39 | 
40 |       return workflow;
41 |     } catch (err) { console.log("Retrying...", err); }
42 |   }
43 | }
44 | 
45 | const waitForWorkflowCompletion = (workflow) => {
46 |   while(true) {
47 |     try {
48 |       workflow.result()
49 |       return
50 |     } catch (err) { console.log("Retrying...", err); }
51 |   }
52 | }
53 | 
54 | export default () => {
55 |   const client = temporal.newClient()
56 |   
57 |   const workflow = startWorkflow(client);
58 |   waitForWorkflowCompletion(workflow)
59 |   
60 |   client.close()
61 | };
62 | 
63 | const queryProm = (query) => {
64 |   const prom = promclient.newClient(__ENV.PROMETHEUS_ENDPOINT)
65 | 
66 |   const [result, warnings] = prom.query(query, new Date());
67 | 
68 |   if (warnings.length) {
69 |      console.warn("Prometheus warnings:", warnings)
70 |   }
71 | 
72 |   return result
73 | }
74 | 
75 | export function handleSummary(data) {
76 |   delete(data.metrics.data_sent);
77 |   delete(data.metrics.data_received);
78 | 
79 |   data.metrics.actions = {
80 |     "type": "counter",
81 |     "values": {
82 |       "count": queryProm('sum(action{namespace="default"})')[0].value + 0,
83 |       "rate": queryProm('max_over_time(sum(rate(action{namespace="default"}[1m]))[15m:30s])')[0].value + 0,
84 |     }
85 |   }
86 | 
87 |   return {
88 |     'stdout': textSummary(data, { enableColors: false })
89 |   };
90 | };


--------------------------------------------------------------------------------
/dev/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.5"
 2 | services:
 3 |   postgresql:
 4 |     container_name: temporal-postgresql
 5 |     environment:
 6 |       POSTGRES_PASSWORD: temporal
 7 |       POSTGRES_USER: temporal
 8 |     image: postgres:13
 9 |     networks:
10 |       - temporal-network
11 |   temporal:
12 |     container_name: temporal
13 |     depends_on:
14 |       - postgresql
15 |     environment:
16 |       - DB=postgresql
17 |       - DB_PORT=5432
18 |       - POSTGRES_USER=temporal
19 |       - POSTGRES_PWD=temporal
20 |       - POSTGRES_SEEDS=postgresql
21 |       - PROMETHEUS_ENDPOINT=0.0.0.0:8000
22 |     image: temporalio/auto-setup:1.20.0
23 |     networks:
24 |       - temporal-network
25 |     ports:
26 |       - 7233:7233
27 |   benchmark-workers:
28 |     container_name: benchmark-workers
29 |     depends_on:
30 |       - temporal
31 |     environment:
32 |       - TEMPORAL_GRPC_ENDPOINT=temporal:7233
33 |       - TEMPORAL_TASK_QUEUE=benchmark
34 |       - PROMETHEUS_ENDPOINT=0.0.0.0:8000
35 |     image: ghcr.io/temporalio/benchmark-workers:main
36 |     networks:
37 |       - temporal-network
38 |   temporal-admin-tools:
39 |     container_name: temporal-admin-tools
40 |     depends_on:
41 |       - temporal
42 |     environment:
43 |       - TEMPORAL_CLI_ADDRESS=temporal:7233
44 |     image: temporalio/admin-tools:1.20.0
45 |     networks:
46 |       - temporal-network
47 |     stdin_open: true
48 |     tty: true
49 |   prometheus:
50 |     container_name: prometheus
51 |     image: prom/prometheus:v2.39.1
52 |     command:
53 |       - --config.file=/etc/prometheus/prometheus.yml
54 |       - --storage.tsdb.path=/prometheus
55 |       - --web.console.libraries=/usr/share/prometheus/console_libraries
56 |       - --web.console.templates=/usr/share/prometheus/consoles
57 |       - --web.enable-remote-write-receiver
58 |     ports:
59 |       - 9090:9090
60 |     volumes:
61 |       - type: bind
62 |         source: ./prometheus-config.yml
63 |         target: /etc/prometheus/prometheus.yml
64 |     networks:
65 |       - temporal-network
66 | networks:
67 |   temporal-network:
68 |     driver: bridge
69 |     name: temporal-network
70 | 


--------------------------------------------------------------------------------
/dev/prometheus-config.yml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   scrape_interval: 10s
 3 | scrape_configs:
 4 |   - job_name: 'temporal'
 5 |     metrics_path: /metrics
 6 |     scheme: http
 7 |     static_configs:
 8 |       - targets:
 9 |           - 'temporal:8000'
10 |           - 'benchmark-workers:8000'
11 | 


--------------------------------------------------------------------------------
/dev/run-benchmark:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | docker run --network temporal-network -i \
 4 |     --env TEMPORAL_GRPC_ENDPOINT=temporal:7233 \
 5 |     --env K6_OUT=output-prometheus-remote \
 6 |     --env K6_PROMETHEUS_REMOTE_URL=http://prometheus:9090/api/v1/write \
 7 |     --env PROMETHEUS_ENDPOINT=http://prometheus:9090/ \
 8 |     xk6-temporal:main \
 9 |     k6 run --tag testid=$(uuidgen) - < $1
10 | 


--------------------------------------------------------------------------------
/environments/aws/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | 


--------------------------------------------------------------------------------
/environments/aws/Pulumi.main.yaml:
--------------------------------------------------------------------------------
1 | encryptionsalt: v1:JS9jJGyKuKQ=:v1:BKBLkuaWAjVYIS1R:MvrTtSCLoQ3fMHNu/YpzSYFGYoZbeA==
2 | 


--------------------------------------------------------------------------------
/environments/aws/Pulumi.yaml:
--------------------------------------------------------------------------------
1 | name: temporal-benchmarks-aws-environment
2 | runtime: nodejs
3 | description: AWS Environment for Temporal benchmarks
4 | 


--------------------------------------------------------------------------------
/environments/aws/README.md:
--------------------------------------------------------------------------------
 1 | [![Deploy](https://get.pulumi.com/new/button.svg)](https://app.pulumi.com/new?template=https://github.com/temporalio/benchmark-matrix/tree/master/environments/aws)
 2 | 
 3 | # AWS Environment for Temporal Benchmark Clusters
 4 | 
 5 | This [Pulumi](https://pulumi.com) app creates a VPC, subnets and RDS subnet group to hold clusters created for the Temporal Benchmark Matrix.
 6 | 
 7 | ## Deploying
 8 | 
 9 | You can create this application in Pulumi using the button above, or if you'd prefer to use local state storage to experiment with Temporal Benchmark Matrix, you can use:
10 | 
11 | 1. Configure pulumi to use local state:
12 | 
13 |     ```shell
14 |     $ pulumi login --local
15 |     ```
16 | 
17 | 2. Bring up a stack:
18 | 
19 |     ```shell
20 |     $ pulumi -s dev up
21 |     ```
22 | 
23 | For more information on Pulumi state storage, please see [their docs](https://www.pulumi.com/docs/intro/concepts/state/)


--------------------------------------------------------------------------------
/environments/aws/index.ts:
--------------------------------------------------------------------------------
 1 | import * as aws from "@pulumi/aws";
 2 | import * as awsx from "@pulumi/awsx";
 3 | 
 4 | const azCount = 3;
 5 | 
 6 | export const AvailabilityZones = aws.getAvailabilityZones({state: "available"}).then(zones => zones.names.slice(0, azCount))
 7 | 
 8 | const vpc = new awsx.ec2.Vpc("temporal-benchmark", {
 9 |     requestedAvailabilityZones: AvailabilityZones
10 | })
11 | 
12 | const rdsSubnetGroup = new aws.rds.SubnetGroup("temporal-benchmark-rds", {
13 |     subnetIds: vpc.publicSubnetIds
14 | });
15 | 
16 | new aws.iam.ServiceLinkedRole("opensearch", { awsServiceName: "opensearchservice.amazonaws.com" })
17 | 
18 | export const VpcId = vpc.id
19 | export const PrivateSubnetIds = vpc.privateSubnetIds
20 | export const PublicSubnetIds = vpc.publicSubnetIds
21 | export const RdsSubnetGroupName = rdsSubnetGroup.name
22 | export const Role = "BenchmarkClusterAdmin"


--------------------------------------------------------------------------------
/environments/aws/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "foo",
 3 |     "main": "index.ts",
 4 |     "devDependencies": {
 5 |         "@types/node": "^14"
 6 |     },
 7 |     "dependencies": {
 8 |         "@pulumi/aws": "^5.0.0",
 9 |         "@pulumi/awsx": "^0.40.0",
10 |         "@pulumi/eks": "^0.42.5",
11 |         "@pulumi/pulumi": "^3.0.0"
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/k8s/benchmark/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - workers-deployment.yaml
3 | - soak-test-deployment.yaml
4 | - monitoring-service.yaml
5 | - service-monitor.yaml
6 | 


--------------------------------------------------------------------------------
/k8s/benchmark/monitoring-service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   labels:
 5 |     app.kubernetes.io/name: benchmark
 6 |     app.kubernetes.io/component: monitoring
 7 |   name: benchmark-monitoring
 8 | spec:
 9 |   type: ClusterIP
10 |   clusterIP: None
11 |   internalTrafficPolicy: Cluster
12 |   publishNotReadyAddresses: true
13 |   ports:
14 |   - name: metrics
15 |     port: 9090
16 |     targetPort: metrics
17 |     protocol: TCP
18 |   selector:
19 |     app.kubernetes.io/name: benchmark
20 | 


--------------------------------------------------------------------------------
/k8s/benchmark/service-monitor.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: monitoring.coreos.com/v1
 2 | kind: ServiceMonitor
 3 | metadata:
 4 |   name: benchmark-monitor
 5 |   labels:
 6 |     app.kubernetes.io/name: benchmark
 7 |     app.kubernetes.io/component: monitoring
 8 | spec:
 9 |   endpoints:
10 |   - port: metrics
11 |     interval: 30s
12 |   namespaceSelector:
13 |     matchNames:
14 |       - default
15 |   selector:
16 |     matchLabels:
17 |       app.kubernetes.io/name: benchmark
18 |       app.kubernetes.io/component: monitoring
19 | 


--------------------------------------------------------------------------------
/k8s/benchmark/soak-test-deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   labels:
 5 |     app.kubernetes.io/name: benchmark
 6 |     app.kubernetes.io/component: soak-test
 7 |   name: benchmark-soak-test
 8 | spec:
 9 |   replicas: 1
10 |   selector:
11 |     matchLabels:
12 |       app.kubernetes.io/name: benchmark
13 |       app.kubernetes.io/component: soak-test
14 |   template:
15 |     metadata:
16 |       labels:
17 |         app.kubernetes.io/name: benchmark
18 |         app.kubernetes.io/component: soak-test
19 |     spec:
20 |       initContainers:
21 |         - name: create-namespace
22 |           image: temporalio/admin-tools:1.20.0
23 |           env:
24 |           - name: TEMPORAL_CLI_ADDRESS
25 |             value: "temporal-frontend.temporal.svc.cluster.local:7233"
26 |           command: ["bash", "-c"]
27 |           args: ["tctl --namespace default namespace register || tctl --namespace default namespace describe"]
28 |       containers:
29 |       - image: ghcr.io/temporalio/benchmark-workers:main
30 |         imagePullPolicy: Always
31 |         name: benchmark-soak-test
32 |         command: ["runner", "-w", "-c", "$(CONCURRENT_WORKFLOWS)", "-t", "ExecuteActivity", '{ "Count": 3, "Activity": "Echo", "Input": { "Message": "test" } }']
33 |         env:
34 |         - name: TEMPORAL_GRPC_ENDPOINT
35 |           value: "dns:///temporal-frontend.temporal.svc.cluster.local:7233"
36 |         - name: PROMETHEUS_ENDPOINT
37 |           value: 0.0.0.0:8000
38 |         - name: TEMPORAL_NAMESPACE
39 |           value: "default"
40 |         - name: TEMPORAL_TASK_QUEUE
41 |           value: "benchmark"
42 |         envFrom:
43 |         - configMapRef:
44 |             name: benchmark-soaktest-env
45 |         ports:
46 |         - name: metrics
47 |           containerPort: 8000
48 |           protocol: TCP
49 |         resources:
50 |           requests:
51 |             cpu: 100m
52 |             memory: 32Mi
53 |           limits:
54 |             cpu: 200m
55 |             memory: 64Mi
56 |       restartPolicy: Always


--------------------------------------------------------------------------------
/k8s/benchmark/workers-deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   labels:
 5 |     app.kubernetes.io/name: benchmark
 6 |     app.kubernetes.io/component: workers
 7 |   name: benchmark-workers
 8 | spec:
 9 |   replicas: 1
10 |   selector:
11 |     matchLabels:
12 |       app.kubernetes.io/name: benchmark
13 |       app.kubernetes.io/component: workers
14 |   template:
15 |     metadata:
16 |       labels:
17 |         app.kubernetes.io/name: benchmark
18 |         app.kubernetes.io/component: workers
19 |     spec:
20 |       initContainers:
21 |         - name: create-namespace
22 |           image: temporalio/admin-tools:1.20.0
23 |           env:
24 |           - name: TEMPORAL_CLI_ADDRESS
25 |             value: "temporal-frontend.temporal.svc.cluster.local:7233"
26 |           command: ["bash", "-c"]
27 |           args: ["tctl --namespace default namespace register || tctl --namespace default namespace describe"]
28 |       containers:
29 |       - image: ghcr.io/temporalio/benchmark-workers:main
30 |         imagePullPolicy: Always
31 |         name: benchmark-workers
32 |         env:
33 |         - name: TEMPORAL_GRPC_ENDPOINT
34 |           value: "dns:///temporal-frontend.temporal.svc.cluster.local:7233"
35 |         - name: PROMETHEUS_ENDPOINT
36 |           value: 0.0.0.0:8000
37 |         - name: TEMPORAL_NAMESPACE
38 |           value: "default"
39 |         - name: TEMPORAL_TASK_QUEUE
40 |           value: "benchmark"
41 |         envFrom:
42 |         - configMapRef:
43 |             name: benchmark-worker-env          
44 |         ports:
45 |         - name: metrics
46 |           containerPort: 8000
47 |           protocol: TCP
48 |         resources:
49 |           requests:
50 |             cpu: 300m
51 |             memory: 32Mi
52 |           limits:
53 |             cpu: 500m
54 |             memory: 64Mi
55 |       restartPolicy: Always


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/cloudwatch.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Secret
 3 | metadata:
 4 |   name: grafana-datasources
 5 |   namespace: monitoring
 6 | stringData:
 7 |   cloudwatch.yaml: |-
 8 |     {
 9 |         "apiVersion": 1,
10 |         "datasources": [
11 |             {
12 |                 "access": "proxy",
13 |                 "editable": false,
14 |                 "name": "cloudwatch",
15 |                 "orgId": 1,
16 |                 "type": "cloudwatch",
17 |                 "jsonData": {
18 |                     "authType": "default",
19 |                     "defaultRegion": "us-west-2",
20 |                 },
21 |                 "version": 1
22 |             }
23 |         ]
24 |     }
25 | type: Opaque
26 | 


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/fetch-dashboards.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | host="https://temporal-benchmark.ngrok.io"
4 | 
5 | for uid in $(curl -s "${host}/api/search?tag=temporal" | jq -r '.[] | .uid'); do
6 |     dashboard="$(curl -s "${host}/api/dashboards/uid/${uid}")"
7 |     name=$(echo "${dashboard}" | jq -r '.meta.slug')
8 |     echo "${dashboard}" | jq '.dashboard' > ${name}.json
9 | done


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/folder.yaml:
--------------------------------------------------------------------------------
 1 | {
 2 |     "apiVersion": 1,
 3 |     "providers": [
 4 |         {
 5 |             "folder": "Temporal",
 6 |             "folderUid": "",
 7 |             "name": "temporal",
 8 |             "options": {
 9 |                 "path": "/grafana-dashboard-definitions/temporal"
10 |             },
11 |             "orgId": 1,
12 |             "type": "file",
13 |             "allowUiUpdates": true
14 |         }
15 |     ]
16 | }


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/patch.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: grafana
 5 |   namespace: monitoring
 6 | spec:
 7 |   template:
 8 |     spec:
 9 |       containers:
10 |         - name: grafana
11 |           volumeMounts:
12 |             - mountPath: /grafana-dashboard-definitions/temporal
13 |               name: grafana-temporal-dashboard-definitions
14 |       volumes:
15 |         - name: grafana-temporal-dashboard-definitions
16 |           configMap:
17 |             defaultMode: 420
18 |             name: grafana-temporal-dashboard-definitions
19 | 


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/soak-test-frontend.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "datasource",
  8 |           "uid": "grafana"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 33,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "collapsed": false,
 33 |       "datasource": {
 34 |         "type": "datasource",
 35 |         "uid": "grafana"
 36 |       },
 37 |       "gridPos": {
 38 |         "h": 1,
 39 |         "w": 24,
 40 |         "x": 0,
 41 |         "y": 0
 42 |       },
 43 |       "id": 10,
 44 |       "panels": [],
 45 |       "title": "Service",
 46 |       "type": "row"
 47 |     },
 48 |     {
 49 |       "datasource": {
 50 |         "type": "prometheus",
 51 |         "uid": "P1809F7CD0C75ACF3"
 52 |       },
 53 |       "fieldConfig": {
 54 |         "defaults": {
 55 |           "color": {
 56 |             "mode": "palette-classic"
 57 |           },
 58 |           "custom": {
 59 |             "axisCenteredZero": false,
 60 |             "axisColorMode": "text",
 61 |             "axisLabel": "",
 62 |             "axisPlacement": "auto",
 63 |             "barAlignment": 0,
 64 |             "drawStyle": "line",
 65 |             "fillOpacity": 0,
 66 |             "gradientMode": "none",
 67 |             "hideFrom": {
 68 |               "legend": false,
 69 |               "tooltip": false,
 70 |               "viz": false
 71 |             },
 72 |             "lineInterpolation": "linear",
 73 |             "lineWidth": 1,
 74 |             "pointSize": 5,
 75 |             "scaleDistribution": {
 76 |               "type": "linear"
 77 |             },
 78 |             "showPoints": "never",
 79 |             "spanNulls": false,
 80 |             "stacking": {
 81 |               "group": "A",
 82 |               "mode": "none"
 83 |             },
 84 |             "thresholdsStyle": {
 85 |               "mode": "off"
 86 |             }
 87 |           },
 88 |           "mappings": [],
 89 |           "thresholds": {
 90 |             "mode": "absolute",
 91 |             "steps": [
 92 |               {
 93 |                 "color": "green",
 94 |                 "value": null
 95 |               },
 96 |               {
 97 |                 "color": "red",
 98 |                 "value": 80
 99 |               }
100 |             ]
101 |           },
102 |           "unit": "s"
103 |         },
104 |         "overrides": []
105 |       },
106 |       "gridPos": {
107 |         "h": 8,
108 |         "w": 12,
109 |         "x": 0,
110 |         "y": 1
111 |       },
112 |       "id": 19,
113 |       "options": {
114 |         "legend": {
115 |           "calcs": [],
116 |           "displayMode": "list",
117 |           "placement": "bottom",
118 |           "showLegend": true
119 |         },
120 |         "tooltip": {
121 |           "mode": "single",
122 |           "sort": "none"
123 |         }
124 |       },
125 |       "targets": [
126 |         {
127 |           "datasource": {
128 |             "type": "prometheus",
129 |             "uid": "prometheus"
130 |           },
131 |           "editorMode": "code",
132 |           "exemplar": true,
133 |           "expr": "histogram_quantile(0.95, sum by (le, operation) (rate(service_latency_bucket{service_name=\"frontend\",operation!~\"Poll.*\"}[$__rate_interval]))) > 0",
134 |           "interval": "",
135 |           "legendFormat": "__auto",
136 |           "range": true,
137 |           "refId": "A"
138 |         }
139 |       ],
140 |       "title": "Frontend Request Latency p95",
141 |       "type": "timeseries"
142 |     },
143 |     {
144 |       "datasource": {
145 |         "type": "prometheus",
146 |         "uid": "P1809F7CD0C75ACF3"
147 |       },
148 |       "fieldConfig": {
149 |         "defaults": {
150 |           "color": {
151 |             "mode": "palette-classic"
152 |           },
153 |           "custom": {
154 |             "axisCenteredZero": false,
155 |             "axisColorMode": "text",
156 |             "axisLabel": "",
157 |             "axisPlacement": "auto",
158 |             "barAlignment": 0,
159 |             "drawStyle": "line",
160 |             "fillOpacity": 0,
161 |             "gradientMode": "none",
162 |             "hideFrom": {
163 |               "legend": false,
164 |               "tooltip": false,
165 |               "viz": false
166 |             },
167 |             "lineInterpolation": "linear",
168 |             "lineWidth": 1,
169 |             "pointSize": 5,
170 |             "scaleDistribution": {
171 |               "type": "linear"
172 |             },
173 |             "showPoints": "never",
174 |             "spanNulls": false,
175 |             "stacking": {
176 |               "group": "A",
177 |               "mode": "none"
178 |             },
179 |             "thresholdsStyle": {
180 |               "mode": "off"
181 |             }
182 |           },
183 |           "mappings": [],
184 |           "thresholds": {
185 |             "mode": "absolute",
186 |             "steps": [
187 |               {
188 |                 "color": "green",
189 |                 "value": null
190 |               },
191 |               {
192 |                 "color": "red",
193 |                 "value": 80
194 |               }
195 |             ]
196 |           },
197 |           "unit": "short"
198 |         },
199 |         "overrides": []
200 |       },
201 |       "gridPos": {
202 |         "h": 8,
203 |         "w": 12,
204 |         "x": 12,
205 |         "y": 1
206 |       },
207 |       "id": 15,
208 |       "options": {
209 |         "legend": {
210 |           "calcs": [],
211 |           "displayMode": "list",
212 |           "placement": "bottom",
213 |           "showLegend": true
214 |         },
215 |         "tooltip": {
216 |           "mode": "single",
217 |           "sort": "none"
218 |         }
219 |       },
220 |       "targets": [
221 |         {
222 |           "datasource": {
223 |             "type": "prometheus",
224 |             "uid": "prometheus"
225 |           },
226 |           "editorMode": "code",
227 |           "exemplar": true,
228 |           "expr": "sum by (error_type) (rate(service_error_with_type{service_name=\"frontend\"}[$__rate_interval])) > 0",
229 |           "interval": "",
230 |           "legendFormat": "{{error_type}}",
231 |           "range": true,
232 |           "refId": "A"
233 |         }
234 |       ],
235 |       "title": "Frontend Errors",
236 |       "type": "timeseries"
237 |     },
238 |     {
239 |       "collapsed": false,
240 |       "gridPos": {
241 |         "h": 1,
242 |         "w": 24,
243 |         "x": 0,
244 |         "y": 9
245 |       },
246 |       "id": 17,
247 |       "panels": [],
248 |       "title": "Pods",
249 |       "type": "row"
250 |     },
251 |     {
252 |       "datasource": {
253 |         "type": "prometheus",
254 |         "uid": "P1809F7CD0C75ACF3"
255 |       },
256 |       "fieldConfig": {
257 |         "defaults": {
258 |           "color": {
259 |             "mode": "palette-classic"
260 |           },
261 |           "custom": {
262 |             "axisCenteredZero": false,
263 |             "axisColorMode": "text",
264 |             "axisLabel": "",
265 |             "axisPlacement": "auto",
266 |             "barAlignment": 0,
267 |             "drawStyle": "line",
268 |             "fillOpacity": 0,
269 |             "gradientMode": "none",
270 |             "hideFrom": {
271 |               "legend": false,
272 |               "tooltip": false,
273 |               "viz": false
274 |             },
275 |             "lineInterpolation": "linear",
276 |             "lineWidth": 1,
277 |             "pointSize": 5,
278 |             "scaleDistribution": {
279 |               "type": "linear"
280 |             },
281 |             "showPoints": "never",
282 |             "spanNulls": false,
283 |             "stacking": {
284 |               "group": "A",
285 |               "mode": "none"
286 |             },
287 |             "thresholdsStyle": {
288 |               "mode": "off"
289 |             }
290 |           },
291 |           "mappings": [],
292 |           "thresholds": {
293 |             "mode": "absolute",
294 |             "steps": [
295 |               {
296 |                 "color": "green",
297 |                 "value": null
298 |               },
299 |               {
300 |                 "color": "red",
301 |                 "value": 80
302 |               }
303 |             ]
304 |           },
305 |           "unit": "percentunit"
306 |         },
307 |         "overrides": []
308 |       },
309 |       "gridPos": {
310 |         "h": 8,
311 |         "w": 12,
312 |         "x": 0,
313 |         "y": 10
314 |       },
315 |       "id": 12,
316 |       "options": {
317 |         "legend": {
318 |           "calcs": [],
319 |           "displayMode": "list",
320 |           "placement": "bottom",
321 |           "showLegend": true
322 |         },
323 |         "tooltip": {
324 |           "mode": "single",
325 |           "sort": "none"
326 |         }
327 |       },
328 |       "targets": [
329 |         {
330 |           "datasource": {
331 |             "type": "prometheus",
332 |             "uid": "prometheus"
333 |           },
334 |           "editorMode": "code",
335 |           "exemplar": true,
336 |           "expr": "sum(\n  rate(container_cpu_usage_seconds_total{container=\"temporal\"}[$__rate_interval])\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-frontend\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\",resource=\"cpu\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-frontend\"}\n) by (pod)\n",
337 |           "interval": "",
338 |           "legendFormat": "{{pod}}",
339 |           "range": true,
340 |           "refId": "A"
341 |         }
342 |       ],
343 |       "title": "Frontend CPU%",
344 |       "type": "timeseries"
345 |     },
346 |     {
347 |       "datasource": {
348 |         "type": "prometheus",
349 |         "uid": "P1809F7CD0C75ACF3"
350 |       },
351 |       "fieldConfig": {
352 |         "defaults": {
353 |           "color": {
354 |             "mode": "palette-classic"
355 |           },
356 |           "custom": {
357 |             "axisCenteredZero": false,
358 |             "axisColorMode": "text",
359 |             "axisLabel": "",
360 |             "axisPlacement": "auto",
361 |             "barAlignment": 0,
362 |             "drawStyle": "line",
363 |             "fillOpacity": 0,
364 |             "gradientMode": "none",
365 |             "hideFrom": {
366 |               "legend": false,
367 |               "tooltip": false,
368 |               "viz": false
369 |             },
370 |             "lineInterpolation": "linear",
371 |             "lineWidth": 1,
372 |             "pointSize": 5,
373 |             "scaleDistribution": {
374 |               "type": "linear"
375 |             },
376 |             "showPoints": "never",
377 |             "spanNulls": false,
378 |             "stacking": {
379 |               "group": "A",
380 |               "mode": "none"
381 |             },
382 |             "thresholdsStyle": {
383 |               "mode": "off"
384 |             }
385 |           },
386 |           "mappings": [],
387 |           "thresholds": {
388 |             "mode": "absolute",
389 |             "steps": [
390 |               {
391 |                 "color": "green",
392 |                 "value": null
393 |               },
394 |               {
395 |                 "color": "red",
396 |                 "value": 80
397 |               }
398 |             ]
399 |           },
400 |           "unit": "percentunit"
401 |         },
402 |         "overrides": []
403 |       },
404 |       "gridPos": {
405 |         "h": 8,
406 |         "w": 12,
407 |         "x": 12,
408 |         "y": 10
409 |       },
410 |       "id": 13,
411 |       "options": {
412 |         "legend": {
413 |           "calcs": [],
414 |           "displayMode": "list",
415 |           "placement": "bottom",
416 |           "showLegend": true
417 |         },
418 |         "tooltip": {
419 |           "mode": "single",
420 |           "sort": "none"
421 |         }
422 |       },
423 |       "targets": [
424 |         {
425 |           "datasource": {
426 |             "type": "prometheus",
427 |             "uid": "prometheus"
428 |           },
429 |           "editorMode": "code",
430 |           "exemplar": true,
431 |           "expr": "sum(\n    container_memory_working_set_bytes{namespace=\"temporal\", container!=\"\", image!=\"\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-frontend\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\", resource=\"memory\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-frontend\"}\n) by (pod)\n",
432 |           "interval": "",
433 |           "legendFormat": "{{pod}}",
434 |           "range": true,
435 |           "refId": "A"
436 |         }
437 |       ],
438 |       "title": "Frontend Memory%",
439 |       "type": "timeseries"
440 |     },
441 |     {
442 |       "collapsed": false,
443 |       "gridPos": {
444 |         "h": 1,
445 |         "w": 24,
446 |         "x": 0,
447 |         "y": 18
448 |       },
449 |       "id": 22,
450 |       "panels": [],
451 |       "title": "Balance",
452 |       "type": "row"
453 |     },
454 |     {
455 |       "datasource": {
456 |         "type": "prometheus",
457 |         "uid": "P1809F7CD0C75ACF3"
458 |       },
459 |       "fieldConfig": {
460 |         "defaults": {
461 |           "color": {
462 |             "mode": "palette-classic"
463 |           },
464 |           "custom": {
465 |             "axisCenteredZero": false,
466 |             "axisColorMode": "text",
467 |             "axisLabel": "",
468 |             "axisPlacement": "auto",
469 |             "barAlignment": 0,
470 |             "drawStyle": "line",
471 |             "fillOpacity": 0,
472 |             "gradientMode": "none",
473 |             "hideFrom": {
474 |               "legend": false,
475 |               "tooltip": false,
476 |               "viz": false
477 |             },
478 |             "lineInterpolation": "linear",
479 |             "lineWidth": 1,
480 |             "pointSize": 5,
481 |             "scaleDistribution": {
482 |               "type": "linear"
483 |             },
484 |             "showPoints": "never",
485 |             "spanNulls": false,
486 |             "stacking": {
487 |               "group": "A",
488 |               "mode": "none"
489 |             },
490 |             "thresholdsStyle": {
491 |               "mode": "off"
492 |             }
493 |           },
494 |           "mappings": [],
495 |           "thresholds": {
496 |             "mode": "absolute",
497 |             "steps": [
498 |               {
499 |                 "color": "green",
500 |                 "value": null
501 |               },
502 |               {
503 |                 "color": "red",
504 |                 "value": 80
505 |               }
506 |             ]
507 |           },
508 |           "unit": "short"
509 |         },
510 |         "overrides": []
511 |       },
512 |       "gridPos": {
513 |         "h": 8,
514 |         "w": 12,
515 |         "x": 0,
516 |         "y": 19
517 |       },
518 |       "id": 20,
519 |       "options": {
520 |         "legend": {
521 |           "calcs": [],
522 |           "displayMode": "list",
523 |           "placement": "bottom",
524 |           "showLegend": true
525 |         },
526 |         "tooltip": {
527 |           "mode": "single",
528 |           "sort": "none"
529 |         }
530 |       },
531 |       "targets": [
532 |         {
533 |           "datasource": {
534 |             "type": "prometheus",
535 |             "uid": "prometheus"
536 |           },
537 |           "editorMode": "code",
538 |           "exemplar": true,
539 |           "expr": "sum by (pod) (rate(service_latency_count{service_name=\"frontend\"}[$__rate_interval]))",
540 |           "interval": "",
541 |           "legendFormat": "__auto",
542 |           "range": true,
543 |           "refId": "A"
544 |         }
545 |       ],
546 |       "title": "RPS",
547 |       "type": "timeseries"
548 |     }
549 |   ],
550 |   "refresh": "30s",
551 |   "schemaVersion": 37,
552 |   "style": "dark",
553 |   "tags": [
554 |     "frontend",
555 |     "temporal"
556 |   ],
557 |   "templating": {
558 |     "list": []
559 |   },
560 |   "time": {
561 |     "from": "now-15m",
562 |     "to": "now"
563 |   },
564 |   "timepicker": {},
565 |   "timezone": "browser",
566 |   "title": "Soak Test - Frontend",
567 |   "uid": "7e2f4673-fd2a-44a5-8b16-3b20b0427a30",
568 |   "version": 2,
569 |   "weekStart": ""
570 | }
571 | 


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/soak-test-history.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "datasource",
  8 |           "uid": "grafana"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 28,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "collapsed": false,
 33 |       "datasource": {
 34 |         "type": "datasource",
 35 |         "uid": "grafana"
 36 |       },
 37 |       "gridPos": {
 38 |         "h": 1,
 39 |         "w": 24,
 40 |         "x": 0,
 41 |         "y": 0
 42 |       },
 43 |       "id": 10,
 44 |       "panels": [],
 45 |       "title": "Service",
 46 |       "type": "row"
 47 |     },
 48 |     {
 49 |       "datasource": {
 50 |         "type": "prometheus",
 51 |         "uid": "P1809F7CD0C75ACF3"
 52 |       },
 53 |       "fieldConfig": {
 54 |         "defaults": {
 55 |           "color": {
 56 |             "mode": "palette-classic"
 57 |           },
 58 |           "custom": {
 59 |             "axisCenteredZero": false,
 60 |             "axisColorMode": "text",
 61 |             "axisLabel": "",
 62 |             "axisPlacement": "auto",
 63 |             "barAlignment": 0,
 64 |             "drawStyle": "line",
 65 |             "fillOpacity": 0,
 66 |             "gradientMode": "none",
 67 |             "hideFrom": {
 68 |               "legend": false,
 69 |               "tooltip": false,
 70 |               "viz": false
 71 |             },
 72 |             "lineInterpolation": "linear",
 73 |             "lineWidth": 1,
 74 |             "pointSize": 5,
 75 |             "scaleDistribution": {
 76 |               "type": "linear"
 77 |             },
 78 |             "showPoints": "never",
 79 |             "spanNulls": false,
 80 |             "stacking": {
 81 |               "group": "A",
 82 |               "mode": "none"
 83 |             },
 84 |             "thresholdsStyle": {
 85 |               "mode": "off"
 86 |             }
 87 |           },
 88 |           "mappings": [],
 89 |           "thresholds": {
 90 |             "mode": "absolute",
 91 |             "steps": [
 92 |               {
 93 |                 "color": "green",
 94 |                 "value": null
 95 |               },
 96 |               {
 97 |                 "color": "red",
 98 |                 "value": 80
 99 |               }
100 |             ]
101 |           },
102 |           "unit": "s"
103 |         },
104 |         "overrides": []
105 |       },
106 |       "gridPos": {
107 |         "h": 8,
108 |         "w": 12,
109 |         "x": 0,
110 |         "y": 1
111 |       },
112 |       "id": 18,
113 |       "options": {
114 |         "legend": {
115 |           "calcs": [],
116 |           "displayMode": "list",
117 |           "placement": "bottom",
118 |           "showLegend": true
119 |         },
120 |         "tooltip": {
121 |           "mode": "single",
122 |           "sort": "none"
123 |         }
124 |       },
125 |       "targets": [
126 |         {
127 |           "datasource": {
128 |             "type": "prometheus",
129 |             "uid": "prometheus"
130 |           },
131 |           "editorMode": "code",
132 |           "exemplar": true,
133 |           "expr": "histogram_quantile(0.95, sum by (le, operation) (rate(service_latency_bucket{service_name=\"history\",operation!~\"Poll.*\"}[$__rate_interval])))",
134 |           "interval": "",
135 |           "legendFormat": "__auto",
136 |           "range": true,
137 |           "refId": "A"
138 |         }
139 |       ],
140 |       "title": "History Request Latency p95",
141 |       "type": "timeseries"
142 |     },
143 |     {
144 |       "datasource": {
145 |         "type": "prometheus",
146 |         "uid": "P1809F7CD0C75ACF3"
147 |       },
148 |       "fieldConfig": {
149 |         "defaults": {
150 |           "color": {
151 |             "mode": "palette-classic"
152 |           },
153 |           "custom": {
154 |             "axisCenteredZero": false,
155 |             "axisColorMode": "text",
156 |             "axisLabel": "",
157 |             "axisPlacement": "auto",
158 |             "barAlignment": 0,
159 |             "drawStyle": "line",
160 |             "fillOpacity": 0,
161 |             "gradientMode": "none",
162 |             "hideFrom": {
163 |               "legend": false,
164 |               "tooltip": false,
165 |               "viz": false
166 |             },
167 |             "lineInterpolation": "linear",
168 |             "lineWidth": 1,
169 |             "pointSize": 5,
170 |             "scaleDistribution": {
171 |               "type": "linear"
172 |             },
173 |             "showPoints": "never",
174 |             "spanNulls": false,
175 |             "stacking": {
176 |               "group": "A",
177 |               "mode": "none"
178 |             },
179 |             "thresholdsStyle": {
180 |               "mode": "off"
181 |             }
182 |           },
183 |           "mappings": [],
184 |           "thresholds": {
185 |             "mode": "absolute",
186 |             "steps": [
187 |               {
188 |                 "color": "green",
189 |                 "value": null
190 |               },
191 |               {
192 |                 "color": "red",
193 |                 "value": 80
194 |               }
195 |             ]
196 |           },
197 |           "unit": "short"
198 |         },
199 |         "overrides": []
200 |       },
201 |       "gridPos": {
202 |         "h": 8,
203 |         "w": 12,
204 |         "x": 12,
205 |         "y": 1
206 |       },
207 |       "id": 15,
208 |       "options": {
209 |         "legend": {
210 |           "calcs": [],
211 |           "displayMode": "list",
212 |           "placement": "bottom",
213 |           "showLegend": true
214 |         },
215 |         "tooltip": {
216 |           "mode": "single",
217 |           "sort": "none"
218 |         }
219 |       },
220 |       "targets": [
221 |         {
222 |           "datasource": {
223 |             "type": "prometheus",
224 |             "uid": "prometheus"
225 |           },
226 |           "editorMode": "code",
227 |           "exemplar": true,
228 |           "expr": "sum by (error_type) (rate(service_error_with_type{service_name=\"history\"}[$__rate_interval])) > 0",
229 |           "interval": "",
230 |           "legendFormat": "{{error_type}}",
231 |           "range": true,
232 |           "refId": "A"
233 |         }
234 |       ],
235 |       "title": "History Error Rate",
236 |       "type": "timeseries"
237 |     },
238 |     {
239 |       "collapsed": false,
240 |       "gridPos": {
241 |         "h": 1,
242 |         "w": 24,
243 |         "x": 0,
244 |         "y": 9
245 |       },
246 |       "id": 17,
247 |       "panels": [],
248 |       "title": "Pods",
249 |       "type": "row"
250 |     },
251 |     {
252 |       "datasource": {
253 |         "type": "prometheus",
254 |         "uid": "P1809F7CD0C75ACF3"
255 |       },
256 |       "fieldConfig": {
257 |         "defaults": {
258 |           "color": {
259 |             "mode": "palette-classic"
260 |           },
261 |           "custom": {
262 |             "axisCenteredZero": false,
263 |             "axisColorMode": "text",
264 |             "axisLabel": "",
265 |             "axisPlacement": "auto",
266 |             "barAlignment": 0,
267 |             "drawStyle": "line",
268 |             "fillOpacity": 0,
269 |             "gradientMode": "none",
270 |             "hideFrom": {
271 |               "legend": false,
272 |               "tooltip": false,
273 |               "viz": false
274 |             },
275 |             "lineInterpolation": "linear",
276 |             "lineWidth": 1,
277 |             "pointSize": 5,
278 |             "scaleDistribution": {
279 |               "type": "linear"
280 |             },
281 |             "showPoints": "never",
282 |             "spanNulls": false,
283 |             "stacking": {
284 |               "group": "A",
285 |               "mode": "none"
286 |             },
287 |             "thresholdsStyle": {
288 |               "mode": "off"
289 |             }
290 |           },
291 |           "mappings": [],
292 |           "thresholds": {
293 |             "mode": "absolute",
294 |             "steps": [
295 |               {
296 |                 "color": "green",
297 |                 "value": null
298 |               },
299 |               {
300 |                 "color": "red",
301 |                 "value": 80
302 |               }
303 |             ]
304 |           },
305 |           "unit": "percentunit"
306 |         },
307 |         "overrides": [
308 |           {
309 |             "matcher": {
310 |               "id": "byFrameRefID",
311 |               "options": "B"
312 |             },
313 |             "properties": [
314 |               {
315 |                 "id": "color",
316 |                 "value": {
317 |                   "fixedColor": "dark-red",
318 |                   "mode": "fixed"
319 |                 }
320 |               },
321 |               {
322 |                 "id": "custom.fillOpacity",
323 |                 "value": 50
324 |               }
325 |             ]
326 |           }
327 |         ]
328 |       },
329 |       "gridPos": {
330 |         "h": 8,
331 |         "w": 12,
332 |         "x": 0,
333 |         "y": 10
334 |       },
335 |       "id": 12,
336 |       "options": {
337 |         "legend": {
338 |           "calcs": [],
339 |           "displayMode": "list",
340 |           "placement": "bottom",
341 |           "showLegend": true
342 |         },
343 |         "tooltip": {
344 |           "mode": "single",
345 |           "sort": "none"
346 |         }
347 |       },
348 |       "targets": [
349 |         {
350 |           "datasource": {
351 |             "type": "prometheus",
352 |             "uid": "prometheus"
353 |           },
354 |           "editorMode": "code",
355 |           "exemplar": true,
356 |           "expr": "sum(\n  rate(container_cpu_usage_seconds_total{container=\"temporal\"}[$__rate_interval])\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-history\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\",resource=\"cpu\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-history\"}\n) by (pod)\n",
357 |           "interval": "",
358 |           "legendFormat": "__auto",
359 |           "range": true,
360 |           "refId": "A"
361 |         },
362 |         {
363 |           "datasource": {
364 |             "type": "prometheus",
365 |             "uid": "P1809F7CD0C75ACF3"
366 |           },
367 |           "editorMode": "code",
368 |           "expr": "sum(\n    increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}[$__rate_interval])\n    * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-history\"}\n)\n/\nsum(\n    increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}[$__rate_interval])\n    * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-history\"}\n) > 0\n",
369 |           "hide": false,
370 |           "legendFormat": "throttle",
371 |           "range": true,
372 |           "refId": "B"
373 |         }
374 |       ],
375 |       "title": "History CPU Request % Used",
376 |       "type": "timeseries"
377 |     },
378 |     {
379 |       "datasource": {
380 |         "type": "prometheus",
381 |         "uid": "P1809F7CD0C75ACF3"
382 |       },
383 |       "fieldConfig": {
384 |         "defaults": {
385 |           "color": {
386 |             "mode": "palette-classic"
387 |           },
388 |           "custom": {
389 |             "axisCenteredZero": false,
390 |             "axisColorMode": "text",
391 |             "axisLabel": "",
392 |             "axisPlacement": "auto",
393 |             "barAlignment": 0,
394 |             "drawStyle": "line",
395 |             "fillOpacity": 0,
396 |             "gradientMode": "none",
397 |             "hideFrom": {
398 |               "legend": false,
399 |               "tooltip": false,
400 |               "viz": false
401 |             },
402 |             "lineInterpolation": "linear",
403 |             "lineWidth": 1,
404 |             "pointSize": 5,
405 |             "scaleDistribution": {
406 |               "type": "linear"
407 |             },
408 |             "showPoints": "never",
409 |             "spanNulls": false,
410 |             "stacking": {
411 |               "group": "A",
412 |               "mode": "none"
413 |             },
414 |             "thresholdsStyle": {
415 |               "mode": "off"
416 |             }
417 |           },
418 |           "mappings": [],
419 |           "thresholds": {
420 |             "mode": "absolute",
421 |             "steps": [
422 |               {
423 |                 "color": "green",
424 |                 "value": null
425 |               },
426 |               {
427 |                 "color": "red",
428 |                 "value": 80
429 |               }
430 |             ]
431 |           },
432 |           "unit": "percentunit"
433 |         },
434 |         "overrides": []
435 |       },
436 |       "gridPos": {
437 |         "h": 8,
438 |         "w": 12,
439 |         "x": 12,
440 |         "y": 10
441 |       },
442 |       "id": 13,
443 |       "options": {
444 |         "legend": {
445 |           "calcs": [],
446 |           "displayMode": "list",
447 |           "placement": "bottom",
448 |           "showLegend": true
449 |         },
450 |         "tooltip": {
451 |           "mode": "single",
452 |           "sort": "none"
453 |         }
454 |       },
455 |       "targets": [
456 |         {
457 |           "datasource": {
458 |             "type": "prometheus",
459 |             "uid": "prometheus"
460 |           },
461 |           "editorMode": "code",
462 |           "exemplar": true,
463 |           "expr": "sum(\n    container_memory_working_set_bytes{namespace=\"temporal\", container!=\"\", image!=\"\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-history\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\", resource=\"memory\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-history\"}\n) by (pod)\n",
464 |           "interval": "",
465 |           "legendFormat": "{{pod}}",
466 |           "range": true,
467 |           "refId": "A"
468 |         }
469 |       ],
470 |       "title": "History Memory Request % Used",
471 |       "type": "timeseries"
472 |     },
473 |     {
474 |       "collapsed": false,
475 |       "gridPos": {
476 |         "h": 1,
477 |         "w": 24,
478 |         "x": 0,
479 |         "y": 18
480 |       },
481 |       "id": 29,
482 |       "panels": [],
483 |       "title": "Tuning",
484 |       "type": "row"
485 |     },
486 |     {
487 |       "datasource": {
488 |         "type": "prometheus",
489 |         "uid": "P1809F7CD0C75ACF3"
490 |       },
491 |       "fieldConfig": {
492 |         "defaults": {
493 |           "color": {
494 |             "mode": "palette-classic"
495 |           },
496 |           "custom": {
497 |             "axisCenteredZero": false,
498 |             "axisColorMode": "text",
499 |             "axisLabel": "",
500 |             "axisPlacement": "auto",
501 |             "barAlignment": 0,
502 |             "drawStyle": "line",
503 |             "fillOpacity": 0,
504 |             "gradientMode": "none",
505 |             "hideFrom": {
506 |               "legend": false,
507 |               "tooltip": false,
508 |               "viz": false
509 |             },
510 |             "lineInterpolation": "linear",
511 |             "lineWidth": 1,
512 |             "pointSize": 5,
513 |             "scaleDistribution": {
514 |               "type": "linear"
515 |             },
516 |             "showPoints": "never",
517 |             "spanNulls": false,
518 |             "stacking": {
519 |               "group": "A",
520 |               "mode": "none"
521 |             },
522 |             "thresholdsStyle": {
523 |               "mode": "dashed"
524 |             }
525 |           },
526 |           "mappings": [],
527 |           "thresholds": {
528 |             "mode": "absolute",
529 |             "steps": [
530 |               {
531 |                 "color": "green",
532 |                 "value": null
533 |               }
534 |             ]
535 |           },
536 |           "unit": "s"
537 |         },
538 |         "overrides": []
539 |       },
540 |       "gridPos": {
541 |         "h": 8,
542 |         "w": 12,
543 |         "x": 0,
544 |         "y": 19
545 |       },
546 |       "id": 25,
547 |       "options": {
548 |         "legend": {
549 |           "calcs": [],
550 |           "displayMode": "list",
551 |           "placement": "bottom",
552 |           "showLegend": true
553 |         },
554 |         "tooltip": {
555 |           "mode": "single",
556 |           "sort": "none"
557 |         }
558 |       },
559 |       "targets": [
560 |         {
561 |           "datasource": {
562 |             "type": "prometheus",
563 |             "uid": "prometheus"
564 |           },
565 |           "editorMode": "code",
566 |           "expr": "histogram_quantile(0.95, sum by (le) (rate(lock_latency_bucket[$__rate_interval])))",
567 |           "legendFormat": "Shard",
568 |           "range": true,
569 |           "refId": "A"
570 |         },
571 |         {
572 |           "datasource": {
573 |             "type": "prometheus",
574 |             "uid": "P1809F7CD0C75ACF3"
575 |           },
576 |           "editorMode": "code",
577 |           "expr": "histogram_quantile(0.95, sum by (le, operation) (rate(cache_latency_bucket{operation=\"HistoryCacheGetOrCreate\"}[$__rate_interval])))",
578 |           "hide": false,
579 |           "legendFormat": "Workflow",
580 |           "range": true,
581 |           "refId": "B"
582 |         }
583 |       ],
584 |       "title": "Lock Latency p95",
585 |       "type": "timeseries"
586 |     },
587 |     {
588 |       "datasource": {
589 |         "type": "prometheus",
590 |         "uid": "P1809F7CD0C75ACF3"
591 |       },
592 |       "fieldConfig": {
593 |         "defaults": {
594 |           "color": {
595 |             "mode": "palette-classic"
596 |           },
597 |           "custom": {
598 |             "axisCenteredZero": false,
599 |             "axisColorMode": "text",
600 |             "axisLabel": "",
601 |             "axisPlacement": "auto",
602 |             "barAlignment": 0,
603 |             "drawStyle": "line",
604 |             "fillOpacity": 0,
605 |             "gradientMode": "none",
606 |             "hideFrom": {
607 |               "legend": false,
608 |               "tooltip": false,
609 |               "viz": false
610 |             },
611 |             "lineInterpolation": "linear",
612 |             "lineWidth": 1,
613 |             "pointSize": 5,
614 |             "scaleDistribution": {
615 |               "type": "linear"
616 |             },
617 |             "showPoints": "never",
618 |             "spanNulls": false,
619 |             "stacking": {
620 |               "group": "A",
621 |               "mode": "none"
622 |             },
623 |             "thresholdsStyle": {
624 |               "mode": "dashed"
625 |             }
626 |           },
627 |           "mappings": [],
628 |           "thresholds": {
629 |             "mode": "absolute",
630 |             "steps": [
631 |               {
632 |                 "color": "green",
633 |                 "value": null
634 |               }
635 |             ]
636 |           },
637 |           "unit": "s"
638 |         },
639 |         "overrides": []
640 |       },
641 |       "gridPos": {
642 |         "h": 8,
643 |         "w": 12,
644 |         "x": 12,
645 |         "y": 19
646 |       },
647 |       "id": 32,
648 |       "options": {
649 |         "legend": {
650 |           "calcs": [],
651 |           "displayMode": "list",
652 |           "placement": "bottom",
653 |           "showLegend": true
654 |         },
655 |         "tooltip": {
656 |           "mode": "single",
657 |           "sort": "none"
658 |         }
659 |       },
660 |       "targets": [
661 |         {
662 |           "datasource": {
663 |             "type": "prometheus",
664 |             "uid": "prometheus"
665 |           },
666 |           "editorMode": "code",
667 |           "expr": "histogram_quantile(0.50, sum by (le) (rate(task_latency_processing_bucket[$__rate_interval])))",
668 |           "legendFormat": "p50",
669 |           "range": true,
670 |           "refId": "A"
671 |         },
672 |         {
673 |           "datasource": {
674 |             "type": "prometheus",
675 |             "uid": "P1809F7CD0C75ACF3"
676 |           },
677 |           "editorMode": "code",
678 |           "expr": "histogram_quantile(0.95, sum by (le) (rate(task_latency_processing_bucket[$__rate_interval])))",
679 |           "hide": false,
680 |           "legendFormat": "p95",
681 |           "range": true,
682 |           "refId": "B"
683 |         }
684 |       ],
685 |       "title": "Task Latency",
686 |       "type": "timeseries"
687 |     },
688 |     {
689 |       "collapsed": true,
690 |       "gridPos": {
691 |         "h": 1,
692 |         "w": 24,
693 |         "x": 0,
694 |         "y": 27
695 |       },
696 |       "id": 31,
697 |       "panels": [
698 |         {
699 |           "datasource": {
700 |             "type": "prometheus",
701 |             "uid": "P1809F7CD0C75ACF3"
702 |           },
703 |           "fieldConfig": {
704 |             "defaults": {
705 |               "color": {
706 |                 "mode": "palette-classic"
707 |               },
708 |               "custom": {
709 |                 "axisCenteredZero": false,
710 |                 "axisColorMode": "text",
711 |                 "axisLabel": "",
712 |                 "axisPlacement": "auto",
713 |                 "barAlignment": 0,
714 |                 "drawStyle": "line",
715 |                 "fillOpacity": 0,
716 |                 "gradientMode": "none",
717 |                 "hideFrom": {
718 |                   "legend": false,
719 |                   "tooltip": false,
720 |                   "viz": false
721 |                 },
722 |                 "lineInterpolation": "linear",
723 |                 "lineWidth": 1,
724 |                 "pointSize": 5,
725 |                 "scaleDistribution": {
726 |                   "type": "linear"
727 |                 },
728 |                 "showPoints": "never",
729 |                 "spanNulls": false,
730 |                 "stacking": {
731 |                   "group": "A",
732 |                   "mode": "none"
733 |                 },
734 |                 "thresholdsStyle": {
735 |                   "mode": "off"
736 |                 }
737 |               },
738 |               "mappings": [],
739 |               "min": 0,
740 |               "thresholds": {
741 |                 "mode": "absolute",
742 |                 "steps": [
743 |                   {
744 |                     "color": "green",
745 |                     "value": null
746 |                   },
747 |                   {
748 |                     "color": "red",
749 |                     "value": 80
750 |                   }
751 |                 ]
752 |               },
753 |               "unit": "short"
754 |             },
755 |             "overrides": []
756 |           },
757 |           "gridPos": {
758 |             "h": 8,
759 |             "w": 12,
760 |             "x": 0,
761 |             "y": 28
762 |           },
763 |           "id": 19,
764 |           "options": {
765 |             "legend": {
766 |               "calcs": [],
767 |               "displayMode": "list",
768 |               "placement": "bottom",
769 |               "showLegend": true
770 |             },
771 |             "tooltip": {
772 |               "mode": "single",
773 |               "sort": "none"
774 |             }
775 |           },
776 |           "targets": [
777 |             {
778 |               "datasource": {
779 |                 "type": "prometheus",
780 |                 "uid": "prometheus"
781 |               },
782 |               "editorMode": "code",
783 |               "exemplar": true,
784 |               "expr": "sum by (pod) (numshards_gauge)",
785 |               "interval": "",
786 |               "legendFormat": "{{error_type}}",
787 |               "range": true,
788 |               "refId": "A"
789 |             },
790 |             {
791 |               "datasource": {
792 |                 "type": "prometheus",
793 |                 "uid": "prometheus"
794 |               },
795 |               "exemplar": true,
796 |               "expr": "",
797 |               "hide": false,
798 |               "interval": "",
799 |               "legendFormat": "",
800 |               "refId": "B"
801 |             }
802 |           ],
803 |           "title": "History Shard Balance",
804 |           "type": "timeseries"
805 |         }
806 |       ],
807 |       "title": "Misc",
808 |       "type": "row"
809 |     }
810 |   ],
811 |   "refresh": "30s",
812 |   "schemaVersion": 37,
813 |   "style": "dark",
814 |   "tags": [
815 |     "history",
816 |     "temporal"
817 |   ],
818 |   "templating": {
819 |     "list": []
820 |   },
821 |   "time": {
822 |     "from": "now-15m",
823 |     "to": "now"
824 |   },
825 |   "timepicker": {},
826 |   "timezone": "",
827 |   "title": "Soak Test - History",
828 |   "uid": "82d00f2f-2548-496b-a627-0108bf7cb990",
829 |   "version": 3,
830 |   "weekStart": ""
831 | }
832 | 


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/soak-test-matching.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "datasource",
  8 |           "uid": "grafana"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 29,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "collapsed": false,
 33 |       "datasource": {
 34 |         "type": "datasource",
 35 |         "uid": "grafana"
 36 |       },
 37 |       "gridPos": {
 38 |         "h": 1,
 39 |         "w": 24,
 40 |         "x": 0,
 41 |         "y": 0
 42 |       },
 43 |       "id": 10,
 44 |       "panels": [],
 45 |       "title": "Service",
 46 |       "type": "row"
 47 |     },
 48 |     {
 49 |       "datasource": {
 50 |         "type": "prometheus",
 51 |         "uid": "P1809F7CD0C75ACF3"
 52 |       },
 53 |       "fieldConfig": {
 54 |         "defaults": {
 55 |           "color": {
 56 |             "mode": "palette-classic"
 57 |           },
 58 |           "custom": {
 59 |             "axisCenteredZero": false,
 60 |             "axisColorMode": "text",
 61 |             "axisLabel": "",
 62 |             "axisPlacement": "auto",
 63 |             "barAlignment": 0,
 64 |             "drawStyle": "line",
 65 |             "fillOpacity": 0,
 66 |             "gradientMode": "none",
 67 |             "hideFrom": {
 68 |               "legend": false,
 69 |               "tooltip": false,
 70 |               "viz": false
 71 |             },
 72 |             "lineInterpolation": "linear",
 73 |             "lineWidth": 1,
 74 |             "pointSize": 5,
 75 |             "scaleDistribution": {
 76 |               "type": "linear"
 77 |             },
 78 |             "showPoints": "never",
 79 |             "spanNulls": false,
 80 |             "stacking": {
 81 |               "group": "A",
 82 |               "mode": "none"
 83 |             },
 84 |             "thresholdsStyle": {
 85 |               "mode": "off"
 86 |             }
 87 |           },
 88 |           "mappings": [],
 89 |           "thresholds": {
 90 |             "mode": "absolute",
 91 |             "steps": [
 92 |               {
 93 |                 "color": "green",
 94 |                 "value": null
 95 |               },
 96 |               {
 97 |                 "color": "red",
 98 |                 "value": 80
 99 |               }
100 |             ]
101 |           },
102 |           "unit": "s"
103 |         },
104 |         "overrides": []
105 |       },
106 |       "gridPos": {
107 |         "h": 8,
108 |         "w": 12,
109 |         "x": 0,
110 |         "y": 1
111 |       },
112 |       "id": 24,
113 |       "options": {
114 |         "legend": {
115 |           "calcs": [],
116 |           "displayMode": "list",
117 |           "placement": "bottom",
118 |           "showLegend": true
119 |         },
120 |         "tooltip": {
121 |           "mode": "single",
122 |           "sort": "none"
123 |         }
124 |       },
125 |       "targets": [
126 |         {
127 |           "datasource": {
128 |             "type": "prometheus",
129 |             "uid": "prometheus"
130 |           },
131 |           "editorMode": "code",
132 |           "exemplar": true,
133 |           "expr": "histogram_quantile(0.95, sum by (le, operation) (rate(service_latency_bucket{service_name=\"matching\",operation!~\"Poll.*\"}[$__rate_interval])))",
134 |           "interval": "",
135 |           "legendFormat": "__auto",
136 |           "range": true,
137 |           "refId": "A"
138 |         }
139 |       ],
140 |       "title": "Matching Request Latency p95",
141 |       "type": "timeseries"
142 |     },
143 |     {
144 |       "datasource": {
145 |         "type": "prometheus",
146 |         "uid": "P1809F7CD0C75ACF3"
147 |       },
148 |       "fieldConfig": {
149 |         "defaults": {
150 |           "color": {
151 |             "mode": "palette-classic"
152 |           },
153 |           "custom": {
154 |             "axisCenteredZero": false,
155 |             "axisColorMode": "text",
156 |             "axisLabel": "",
157 |             "axisPlacement": "auto",
158 |             "barAlignment": 0,
159 |             "drawStyle": "line",
160 |             "fillOpacity": 0,
161 |             "gradientMode": "none",
162 |             "hideFrom": {
163 |               "legend": false,
164 |               "tooltip": false,
165 |               "viz": false
166 |             },
167 |             "lineInterpolation": "linear",
168 |             "lineWidth": 1,
169 |             "pointSize": 5,
170 |             "scaleDistribution": {
171 |               "type": "linear"
172 |             },
173 |             "showPoints": "never",
174 |             "spanNulls": false,
175 |             "stacking": {
176 |               "group": "A",
177 |               "mode": "none"
178 |             },
179 |             "thresholdsStyle": {
180 |               "mode": "off"
181 |             }
182 |           },
183 |           "mappings": [],
184 |           "thresholds": {
185 |             "mode": "absolute",
186 |             "steps": [
187 |               {
188 |                 "color": "green",
189 |                 "value": null
190 |               },
191 |               {
192 |                 "color": "red",
193 |                 "value": 80
194 |               }
195 |             ]
196 |           },
197 |           "unit": "short"
198 |         },
199 |         "overrides": []
200 |       },
201 |       "gridPos": {
202 |         "h": 8,
203 |         "w": 12,
204 |         "x": 12,
205 |         "y": 1
206 |       },
207 |       "id": 26,
208 |       "options": {
209 |         "legend": {
210 |           "calcs": [],
211 |           "displayMode": "list",
212 |           "placement": "bottom",
213 |           "showLegend": true
214 |         },
215 |         "tooltip": {
216 |           "mode": "single",
217 |           "sort": "none"
218 |         }
219 |       },
220 |       "targets": [
221 |         {
222 |           "datasource": {
223 |             "type": "prometheus",
224 |             "uid": "prometheus"
225 |           },
226 |           "editorMode": "code",
227 |           "exemplar": true,
228 |           "expr": "sum by (error_type) (rate(service_error_with_type{service_name=\"matching\"}[$__rate_interval])) > 0",
229 |           "interval": "",
230 |           "legendFormat": "{{error_type}}",
231 |           "range": true,
232 |           "refId": "A"
233 |         }
234 |       ],
235 |       "title": "Matching Error Rate",
236 |       "type": "timeseries"
237 |     },
238 |     {
239 |       "collapsed": false,
240 |       "gridPos": {
241 |         "h": 1,
242 |         "w": 24,
243 |         "x": 0,
244 |         "y": 9
245 |       },
246 |       "id": 17,
247 |       "panels": [],
248 |       "title": "Pods",
249 |       "type": "row"
250 |     },
251 |     {
252 |       "datasource": {
253 |         "type": "prometheus",
254 |         "uid": "P1809F7CD0C75ACF3"
255 |       },
256 |       "fieldConfig": {
257 |         "defaults": {
258 |           "color": {
259 |             "mode": "palette-classic"
260 |           },
261 |           "custom": {
262 |             "axisCenteredZero": false,
263 |             "axisColorMode": "text",
264 |             "axisLabel": "",
265 |             "axisPlacement": "auto",
266 |             "barAlignment": 0,
267 |             "drawStyle": "line",
268 |             "fillOpacity": 0,
269 |             "gradientMode": "none",
270 |             "hideFrom": {
271 |               "legend": false,
272 |               "tooltip": false,
273 |               "viz": false
274 |             },
275 |             "lineInterpolation": "linear",
276 |             "lineWidth": 1,
277 |             "pointSize": 5,
278 |             "scaleDistribution": {
279 |               "type": "linear"
280 |             },
281 |             "showPoints": "never",
282 |             "spanNulls": false,
283 |             "stacking": {
284 |               "group": "A",
285 |               "mode": "none"
286 |             },
287 |             "thresholdsStyle": {
288 |               "mode": "off"
289 |             }
290 |           },
291 |           "mappings": [],
292 |           "thresholds": {
293 |             "mode": "absolute",
294 |             "steps": [
295 |               {
296 |                 "color": "green",
297 |                 "value": null
298 |               },
299 |               {
300 |                 "color": "red",
301 |                 "value": 80
302 |               }
303 |             ]
304 |           },
305 |           "unit": "percentunit"
306 |         },
307 |         "overrides": [
308 |           {
309 |             "matcher": {
310 |               "id": "byFrameRefID",
311 |               "options": "B"
312 |             },
313 |             "properties": [
314 |               {
315 |                 "id": "color",
316 |                 "value": {
317 |                   "fixedColor": "dark-red",
318 |                   "mode": "fixed"
319 |                 }
320 |               },
321 |               {
322 |                 "id": "custom.fillOpacity",
323 |                 "value": 50
324 |               }
325 |             ]
326 |           }
327 |         ]
328 |       },
329 |       "gridPos": {
330 |         "h": 8,
331 |         "w": 12,
332 |         "x": 0,
333 |         "y": 10
334 |       },
335 |       "id": 28,
336 |       "options": {
337 |         "legend": {
338 |           "calcs": [],
339 |           "displayMode": "list",
340 |           "placement": "bottom",
341 |           "showLegend": true
342 |         },
343 |         "tooltip": {
344 |           "mode": "single",
345 |           "sort": "none"
346 |         }
347 |       },
348 |       "targets": [
349 |         {
350 |           "datasource": {
351 |             "type": "prometheus",
352 |             "uid": "prometheus"
353 |           },
354 |           "editorMode": "code",
355 |           "exemplar": true,
356 |           "expr": "sum(\n  rate(container_cpu_usage_seconds_total{container=\"temporal\"}[$__rate_interval])\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-matching\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\",resource=\"cpu\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-matching\"}\n) by (pod)\n",
357 |           "interval": "",
358 |           "legendFormat": "__auto",
359 |           "range": true,
360 |           "refId": "A"
361 |         },
362 |         {
363 |           "datasource": {
364 |             "type": "prometheus",
365 |             "uid": "P1809F7CD0C75ACF3"
366 |           },
367 |           "editorMode": "code",
368 |           "expr": "sum(\n    increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}[$__rate_interval])\n    * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-history\"}\n)\n/\nsum(\n    increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}[$__rate_interval])\n    * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-history\"}\n) > 0\n",
369 |           "hide": false,
370 |           "legendFormat": "throttle",
371 |           "range": true,
372 |           "refId": "B"
373 |         }
374 |       ],
375 |       "title": "Matching CPU Request % Used",
376 |       "type": "timeseries"
377 |     },
378 |     {
379 |       "datasource": {
380 |         "type": "prometheus",
381 |         "uid": "P1809F7CD0C75ACF3"
382 |       },
383 |       "fieldConfig": {
384 |         "defaults": {
385 |           "color": {
386 |             "mode": "palette-classic"
387 |           },
388 |           "custom": {
389 |             "axisCenteredZero": false,
390 |             "axisColorMode": "text",
391 |             "axisLabel": "",
392 |             "axisPlacement": "auto",
393 |             "barAlignment": 0,
394 |             "drawStyle": "line",
395 |             "fillOpacity": 0,
396 |             "gradientMode": "none",
397 |             "hideFrom": {
398 |               "legend": false,
399 |               "tooltip": false,
400 |               "viz": false
401 |             },
402 |             "lineInterpolation": "linear",
403 |             "lineWidth": 1,
404 |             "pointSize": 5,
405 |             "scaleDistribution": {
406 |               "type": "linear"
407 |             },
408 |             "showPoints": "never",
409 |             "spanNulls": false,
410 |             "stacking": {
411 |               "group": "A",
412 |               "mode": "none"
413 |             },
414 |             "thresholdsStyle": {
415 |               "mode": "off"
416 |             }
417 |           },
418 |           "mappings": [],
419 |           "thresholds": {
420 |             "mode": "absolute",
421 |             "steps": [
422 |               {
423 |                 "color": "green",
424 |                 "value": null
425 |               },
426 |               {
427 |                 "color": "red",
428 |                 "value": 80
429 |               }
430 |             ]
431 |           },
432 |           "unit": "percentunit"
433 |         },
434 |         "overrides": []
435 |       },
436 |       "gridPos": {
437 |         "h": 8,
438 |         "w": 12,
439 |         "x": 12,
440 |         "y": 10
441 |       },
442 |       "id": 30,
443 |       "options": {
444 |         "legend": {
445 |           "calcs": [],
446 |           "displayMode": "list",
447 |           "placement": "bottom",
448 |           "showLegend": true
449 |         },
450 |         "tooltip": {
451 |           "mode": "single",
452 |           "sort": "none"
453 |         }
454 |       },
455 |       "targets": [
456 |         {
457 |           "datasource": {
458 |             "type": "prometheus",
459 |             "uid": "prometheus"
460 |           },
461 |           "editorMode": "code",
462 |           "exemplar": true,
463 |           "expr": "sum(\n    container_memory_working_set_bytes{namespace=\"temporal\", container!=\"\", image!=\"\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-matching\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\", resource=\"memory\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-matching\"}\n) by (pod)\n",
464 |           "interval": "",
465 |           "legendFormat": "{{pod}}",
466 |           "range": true,
467 |           "refId": "A"
468 |         }
469 |       ],
470 |       "title": "Matching Memory Request % Used",
471 |       "type": "timeseries"
472 |     },
473 |     {
474 |       "collapsed": false,
475 |       "gridPos": {
476 |         "h": 1,
477 |         "w": 24,
478 |         "x": 0,
479 |         "y": 18
480 |       },
481 |       "id": 22,
482 |       "panels": [],
483 |       "title": "Misc",
484 |       "type": "row"
485 |     },
486 |     {
487 |       "datasource": {
488 |         "type": "prometheus",
489 |         "uid": "P1809F7CD0C75ACF3"
490 |       },
491 |       "fieldConfig": {
492 |         "defaults": {
493 |           "color": {
494 |             "mode": "palette-classic"
495 |           },
496 |           "custom": {
497 |             "axisCenteredZero": false,
498 |             "axisColorMode": "text",
499 |             "axisLabel": "",
500 |             "axisPlacement": "auto",
501 |             "barAlignment": 0,
502 |             "drawStyle": "line",
503 |             "fillOpacity": 0,
504 |             "gradientMode": "none",
505 |             "hideFrom": {
506 |               "legend": false,
507 |               "tooltip": false,
508 |               "viz": false
509 |             },
510 |             "lineInterpolation": "linear",
511 |             "lineWidth": 1,
512 |             "pointSize": 5,
513 |             "scaleDistribution": {
514 |               "type": "linear"
515 |             },
516 |             "showPoints": "never",
517 |             "spanNulls": false,
518 |             "stacking": {
519 |               "group": "A",
520 |               "mode": "none"
521 |             },
522 |             "thresholdsStyle": {
523 |               "mode": "off"
524 |             }
525 |           },
526 |           "mappings": [],
527 |           "thresholds": {
528 |             "mode": "absolute",
529 |             "steps": [
530 |               {
531 |                 "color": "green",
532 |                 "value": null
533 |               },
534 |               {
535 |                 "color": "red",
536 |                 "value": 80
537 |               }
538 |             ]
539 |           },
540 |           "unit": "short"
541 |         },
542 |         "overrides": []
543 |       },
544 |       "gridPos": {
545 |         "h": 8,
546 |         "w": 12,
547 |         "x": 0,
548 |         "y": 19
549 |       },
550 |       "id": 20,
551 |       "options": {
552 |         "legend": {
553 |           "calcs": [],
554 |           "displayMode": "list",
555 |           "placement": "bottom",
556 |           "showLegend": true
557 |         },
558 |         "tooltip": {
559 |           "mode": "single",
560 |           "sort": "none"
561 |         }
562 |       },
563 |       "targets": [
564 |         {
565 |           "datasource": {
566 |             "type": "prometheus",
567 |             "uid": "prometheus"
568 |           },
569 |           "exemplar": true,
570 |           "expr": "sum by (pod) (loaded_task_queue_count{exported_namespace=\"default\"})",
571 |           "interval": "",
572 |           "legendFormat": "{{error_type}}",
573 |           "refId": "A"
574 |         },
575 |         {
576 |           "datasource": {
577 |             "type": "prometheus",
578 |             "uid": "prometheus"
579 |           },
580 |           "exemplar": true,
581 |           "expr": "",
582 |           "hide": false,
583 |           "interval": "",
584 |           "legendFormat": "",
585 |           "refId": "B"
586 |         }
587 |       ],
588 |       "title": "Matching Partitions",
589 |       "type": "timeseries"
590 |     }
591 |   ],
592 |   "refresh": "30s",
593 |   "schemaVersion": 37,
594 |   "style": "dark",
595 |   "tags": [
596 |     "matching",
597 |     "temporal"
598 |   ],
599 |   "templating": {
600 |     "list": []
601 |   },
602 |   "time": {
603 |     "from": "now-15m",
604 |     "to": "now"
605 |   },
606 |   "timepicker": {},
607 |   "timezone": "",
608 |   "title": "Soak Test - Matching",
609 |   "uid": "d9bfbe59-a99f-4c89-951e-4ec0167ecfb0",
610 |   "version": 2,
611 |   "weekStart": ""
612 | }
613 | 


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/soak-test-persistence.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "datasource",
  8 |           "uid": "grafana"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 34,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "collapsed": false,
 33 |       "datasource": {
 34 |         "type": "datasource",
 35 |         "uid": "grafana"
 36 |       },
 37 |       "gridPos": {
 38 |         "h": 1,
 39 |         "w": 24,
 40 |         "x": 0,
 41 |         "y": 0
 42 |       },
 43 |       "id": 6,
 44 |       "panels": [],
 45 |       "title": "Service",
 46 |       "type": "row"
 47 |     },
 48 |     {
 49 |       "datasource": {
 50 |         "type": "prometheus",
 51 |         "uid": "P1809F7CD0C75ACF3"
 52 |       },
 53 |       "fieldConfig": {
 54 |         "defaults": {
 55 |           "color": {
 56 |             "mode": "palette-classic"
 57 |           },
 58 |           "custom": {
 59 |             "axisCenteredZero": false,
 60 |             "axisColorMode": "text",
 61 |             "axisLabel": "",
 62 |             "axisPlacement": "auto",
 63 |             "barAlignment": 0,
 64 |             "drawStyle": "line",
 65 |             "fillOpacity": 0,
 66 |             "gradientMode": "none",
 67 |             "hideFrom": {
 68 |               "legend": false,
 69 |               "tooltip": false,
 70 |               "viz": false
 71 |             },
 72 |             "lineInterpolation": "linear",
 73 |             "lineWidth": 1,
 74 |             "pointSize": 5,
 75 |             "scaleDistribution": {
 76 |               "type": "linear"
 77 |             },
 78 |             "showPoints": "never",
 79 |             "spanNulls": false,
 80 |             "stacking": {
 81 |               "group": "A",
 82 |               "mode": "none"
 83 |             },
 84 |             "thresholdsStyle": {
 85 |               "mode": "off"
 86 |             }
 87 |           },
 88 |           "mappings": [],
 89 |           "thresholds": {
 90 |             "mode": "absolute",
 91 |             "steps": [
 92 |               {
 93 |                 "color": "green",
 94 |                 "value": null
 95 |               },
 96 |               {
 97 |                 "color": "red",
 98 |                 "value": 80
 99 |               }
100 |             ]
101 |           },
102 |           "unit": "ops"
103 |         },
104 |         "overrides": []
105 |       },
106 |       "gridPos": {
107 |         "h": 8,
108 |         "w": 12,
109 |         "x": 0,
110 |         "y": 1
111 |       },
112 |       "id": 8,
113 |       "options": {
114 |         "legend": {
115 |           "calcs": [],
116 |           "displayMode": "list",
117 |           "placement": "bottom",
118 |           "showLegend": true
119 |         },
120 |         "tooltip": {
121 |           "mode": "single",
122 |           "sort": "none"
123 |         }
124 |       },
125 |       "targets": [
126 |         {
127 |           "datasource": {
128 |             "type": "prometheus",
129 |             "uid": "prometheus"
130 |           },
131 |           "editorMode": "code",
132 |           "exemplar": false,
133 |           "expr": "sum by (operation) (rate(persistence_requests[$__rate_interval])) > 0",
134 |           "interval": "",
135 |           "legendFormat": "{{operation}}",
136 |           "range": true,
137 |           "refId": "A"
138 |         }
139 |       ],
140 |       "title": "RPS",
141 |       "type": "timeseries"
142 |     },
143 |     {
144 |       "datasource": {
145 |         "type": "prometheus",
146 |         "uid": "P1809F7CD0C75ACF3"
147 |       },
148 |       "fieldConfig": {
149 |         "defaults": {
150 |           "color": {
151 |             "mode": "palette-classic"
152 |           },
153 |           "custom": {
154 |             "axisCenteredZero": false,
155 |             "axisColorMode": "text",
156 |             "axisLabel": "",
157 |             "axisPlacement": "auto",
158 |             "barAlignment": 0,
159 |             "drawStyle": "line",
160 |             "fillOpacity": 0,
161 |             "gradientMode": "none",
162 |             "hideFrom": {
163 |               "legend": false,
164 |               "tooltip": false,
165 |               "viz": false
166 |             },
167 |             "lineInterpolation": "linear",
168 |             "lineWidth": 1,
169 |             "pointSize": 5,
170 |             "scaleDistribution": {
171 |               "type": "linear"
172 |             },
173 |             "showPoints": "never",
174 |             "spanNulls": false,
175 |             "stacking": {
176 |               "group": "A",
177 |               "mode": "none"
178 |             },
179 |             "thresholdsStyle": {
180 |               "mode": "off"
181 |             }
182 |           },
183 |           "mappings": [],
184 |           "thresholds": {
185 |             "mode": "absolute",
186 |             "steps": [
187 |               {
188 |                 "color": "green",
189 |                 "value": null
190 |               },
191 |               {
192 |                 "color": "red",
193 |                 "value": 80
194 |               }
195 |             ]
196 |           },
197 |           "unit": "s"
198 |         },
199 |         "overrides": []
200 |       },
201 |       "gridPos": {
202 |         "h": 8,
203 |         "w": 12,
204 |         "x": 12,
205 |         "y": 1
206 |       },
207 |       "id": 4,
208 |       "options": {
209 |         "legend": {
210 |           "calcs": [],
211 |           "displayMode": "list",
212 |           "placement": "bottom",
213 |           "showLegend": true
214 |         },
215 |         "tooltip": {
216 |           "mode": "single",
217 |           "sort": "none"
218 |         }
219 |       },
220 |       "targets": [
221 |         {
222 |           "datasource": {
223 |             "type": "prometheus",
224 |             "uid": "prometheus"
225 |           },
226 |           "editorMode": "code",
227 |           "exemplar": true,
228 |           "expr": "histogram_quantile(0.95, sum by (le, operation) (rate(persistence_latency_bucket[$__rate_interval]))) > 0",
229 |           "interval": "",
230 |           "legendFormat": "{{operation}}",
231 |           "range": true,
232 |           "refId": "A"
233 |         }
234 |       ],
235 |       "title": "Request Latency",
236 |       "type": "timeseries"
237 |     },
238 |     {
239 |       "datasource": {
240 |         "type": "prometheus",
241 |         "uid": "P1809F7CD0C75ACF3"
242 |       },
243 |       "fieldConfig": {
244 |         "defaults": {
245 |           "color": {
246 |             "mode": "palette-classic"
247 |           },
248 |           "custom": {
249 |             "axisCenteredZero": false,
250 |             "axisColorMode": "text",
251 |             "axisLabel": "",
252 |             "axisPlacement": "auto",
253 |             "barAlignment": 0,
254 |             "drawStyle": "line",
255 |             "fillOpacity": 0,
256 |             "gradientMode": "none",
257 |             "hideFrom": {
258 |               "legend": false,
259 |               "tooltip": false,
260 |               "viz": false
261 |             },
262 |             "lineInterpolation": "linear",
263 |             "lineWidth": 1,
264 |             "pointSize": 5,
265 |             "scaleDistribution": {
266 |               "type": "linear"
267 |             },
268 |             "showPoints": "never",
269 |             "spanNulls": false,
270 |             "stacking": {
271 |               "group": "A",
272 |               "mode": "none"
273 |             },
274 |             "thresholdsStyle": {
275 |               "mode": "off"
276 |             }
277 |           },
278 |           "mappings": [],
279 |           "thresholds": {
280 |             "mode": "absolute",
281 |             "steps": [
282 |               {
283 |                 "color": "green",
284 |                 "value": null
285 |               },
286 |               {
287 |                 "color": "red",
288 |                 "value": 80
289 |               }
290 |             ]
291 |           },
292 |           "unit": "reqps"
293 |         },
294 |         "overrides": []
295 |       },
296 |       "gridPos": {
297 |         "h": 8,
298 |         "w": 12,
299 |         "x": 0,
300 |         "y": 9
301 |       },
302 |       "id": 16,
303 |       "options": {
304 |         "legend": {
305 |           "calcs": [],
306 |           "displayMode": "list",
307 |           "placement": "bottom",
308 |           "showLegend": true
309 |         },
310 |         "tooltip": {
311 |           "mode": "single",
312 |           "sort": "none"
313 |         }
314 |       },
315 |       "targets": [
316 |         {
317 |           "datasource": {
318 |             "type": "prometheus",
319 |             "uid": "prometheus"
320 |           },
321 |           "editorMode": "code",
322 |           "exemplar": true,
323 |           "expr": "sum by (error_type) (rate(persistence_error_with_type[$__rate_interval])) > 0",
324 |           "interval": "",
325 |           "legendFormat": "{{operation}}",
326 |           "range": true,
327 |           "refId": "A"
328 |         }
329 |       ],
330 |       "title": "Request Errors",
331 |       "type": "timeseries"
332 |     },
333 |     {
334 |       "datasource": {
335 |         "type": "cloudwatch",
336 |         "uid": "P561CB7FAE9DC47C2"
337 |       },
338 |       "fieldConfig": {
339 |         "defaults": {
340 |           "color": {
341 |             "mode": "palette-classic"
342 |           },
343 |           "custom": {
344 |             "axisCenteredZero": false,
345 |             "axisColorMode": "text",
346 |             "axisLabel": "",
347 |             "axisPlacement": "auto",
348 |             "barAlignment": 0,
349 |             "drawStyle": "line",
350 |             "fillOpacity": 0,
351 |             "gradientMode": "none",
352 |             "hideFrom": {
353 |               "legend": false,
354 |               "tooltip": false,
355 |               "viz": false
356 |             },
357 |             "lineInterpolation": "linear",
358 |             "lineWidth": 1,
359 |             "pointSize": 5,
360 |             "scaleDistribution": {
361 |               "type": "linear"
362 |             },
363 |             "showPoints": "never",
364 |             "spanNulls": false,
365 |             "stacking": {
366 |               "group": "A",
367 |               "mode": "none"
368 |             },
369 |             "thresholdsStyle": {
370 |               "mode": "off"
371 |             }
372 |           },
373 |           "mappings": [],
374 |           "thresholds": {
375 |             "mode": "absolute",
376 |             "steps": [
377 |               {
378 |                 "color": "green",
379 |                 "value": null
380 |               },
381 |               {
382 |                 "color": "red",
383 |                 "value": 80
384 |               }
385 |             ]
386 |           }
387 |         },
388 |         "overrides": []
389 |       },
390 |       "gridPos": {
391 |         "h": 8,
392 |         "w": 12,
393 |         "x": 0,
394 |         "y": 17
395 |       },
396 |       "id": 18,
397 |       "options": {
398 |         "legend": {
399 |           "calcs": [],
400 |           "displayMode": "list",
401 |           "placement": "bottom",
402 |           "showLegend": true
403 |         },
404 |         "tooltip": {
405 |           "mode": "single",
406 |           "sort": "none"
407 |         }
408 |       },
409 |       "targets": [
410 |         {
411 |           "datasource": {
412 |             "type": "cloudwatch",
413 |             "uid": "P561CB7FAE9DC47C2"
414 |           },
415 |           "dimensions": {
416 |             "DBInstanceIdentifier": "$dbidentifier"
417 |           },
418 |           "expression": "",
419 |           "id": "",
420 |           "label": "",
421 |           "matchExact": true,
422 |           "metricEditorMode": 0,
423 |           "metricName": "CPUUtilization",
424 |           "metricQueryType": 0,
425 |           "namespace": "AWS/RDS",
426 |           "period": "",
427 |           "queryMode": "Metrics",
428 |           "refId": "A",
429 |           "region": "default",
430 |           "sqlExpression": "",
431 |           "statistic": "Average"
432 |         }
433 |       ],
434 |       "title": "RDS CPU",
435 |       "type": "timeseries"
436 |     }
437 |   ],
438 |   "refresh": "30s",
439 |   "schemaVersion": 37,
440 |   "style": "dark",
441 |   "tags": [
442 |     "persistence",
443 |     "temporal"
444 |   ],
445 |   "templating": {
446 |     "list": [
447 |       {
448 |         "hide": 2,
449 |         "name": "dbidentifier",
450 |         "query": "eks-rds-mysql-scaling-series20230504100109469600000005",
451 |         "skipUrlSync": false,
452 |         "type": "constant"
453 |       }
454 |     ]
455 |   },
456 |   "time": {
457 |     "from": "now-15m",
458 |     "to": "now"
459 |   },
460 |   "timepicker": {},
461 |   "timezone": "",
462 |   "title": "Soak Test - Persistence",
463 |   "uid": "d2bab180-bba7-4cdd-a946-75543f8c512e",
464 |   "version": 2,
465 |   "weekStart": ""
466 | }
467 | 


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/soak-test-pods.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "grafana",
  8 |           "uid": "-- Grafana --"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 30,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "collapsed": false,
 33 |       "gridPos": {
 34 |         "h": 1,
 35 |         "w": 24,
 36 |         "x": 0,
 37 |         "y": 0
 38 |       },
 39 |       "id": 4,
 40 |       "panels": [],
 41 |       "repeat": "services",
 42 |       "repeatDirection": "h",
 43 |       "title": "$services",
 44 |       "type": "row"
 45 |     },
 46 |     {
 47 |       "datasource": {
 48 |         "type": "prometheus",
 49 |         "uid": "P1809F7CD0C75ACF3"
 50 |       },
 51 |       "fieldConfig": {
 52 |         "defaults": {
 53 |           "color": {
 54 |             "mode": "palette-classic"
 55 |           },
 56 |           "custom": {
 57 |             "axisCenteredZero": false,
 58 |             "axisColorMode": "text",
 59 |             "axisLabel": "",
 60 |             "axisPlacement": "auto",
 61 |             "barAlignment": 0,
 62 |             "drawStyle": "line",
 63 |             "fillOpacity": 0,
 64 |             "gradientMode": "none",
 65 |             "hideFrom": {
 66 |               "legend": false,
 67 |               "tooltip": false,
 68 |               "viz": false
 69 |             },
 70 |             "lineInterpolation": "linear",
 71 |             "lineWidth": 1,
 72 |             "pointSize": 5,
 73 |             "scaleDistribution": {
 74 |               "type": "linear"
 75 |             },
 76 |             "showPoints": "never",
 77 |             "spanNulls": false,
 78 |             "stacking": {
 79 |               "group": "A",
 80 |               "mode": "none"
 81 |             },
 82 |             "thresholdsStyle": {
 83 |               "mode": "off"
 84 |             }
 85 |           },
 86 |           "mappings": [],
 87 |           "thresholds": {
 88 |             "mode": "absolute",
 89 |             "steps": [
 90 |               {
 91 |                 "color": "green",
 92 |                 "value": null
 93 |               },
 94 |               {
 95 |                 "color": "red",
 96 |                 "value": 80
 97 |               }
 98 |             ]
 99 |           },
100 |           "unit": "percentunit"
101 |         },
102 |         "overrides": [
103 |           {
104 |             "matcher": {
105 |               "id": "byFrameRefID",
106 |               "options": "B"
107 |             },
108 |             "properties": [
109 |               {
110 |                 "id": "color",
111 |                 "value": {
112 |                   "fixedColor": "dark-red",
113 |                   "mode": "fixed"
114 |                 }
115 |               },
116 |               {
117 |                 "id": "custom.fillOpacity",
118 |                 "value": 50
119 |               }
120 |             ]
121 |           }
122 |         ]
123 |       },
124 |       "gridPos": {
125 |         "h": 8,
126 |         "w": 12,
127 |         "x": 0,
128 |         "y": 1
129 |       },
130 |       "id": 16,
131 |       "options": {
132 |         "legend": {
133 |           "calcs": [],
134 |           "displayMode": "list",
135 |           "placement": "bottom",
136 |           "showLegend": true
137 |         },
138 |         "tooltip": {
139 |           "mode": "single",
140 |           "sort": "none"
141 |         }
142 |       },
143 |       "targets": [
144 |         {
145 |           "datasource": {
146 |             "type": "prometheus",
147 |             "uid": "prometheus"
148 |           },
149 |           "editorMode": "code",
150 |           "exemplar": true,
151 |           "expr": "sum(\n  rate(container_cpu_usage_seconds_total{container=\"temporal\"}[$__rate_interval])\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\",resource=\"cpu\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"}\n) by (pod)\n",
152 |           "interval": "",
153 |           "legendFormat": "__auto",
154 |           "range": true,
155 |           "refId": "A"
156 |         },
157 |         {
158 |           "datasource": {
159 |             "type": "prometheus",
160 |             "uid": "P1809F7CD0C75ACF3"
161 |           },
162 |           "editorMode": "code",
163 |           "expr": "sum(\n    increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}[$__rate_interval])\n    * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"}\n)\n/\nsum(\n    increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}[$__rate_interval])\n    * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"}\n) > 0\n",
164 |           "hide": false,
165 |           "legendFormat": "throttle",
166 |           "range": true,
167 |           "refId": "B"
168 |         }
169 |       ],
170 |       "title": "$services CPU Request % Used",
171 |       "type": "timeseries"
172 |     },
173 |     {
174 |       "datasource": {
175 |         "type": "prometheus",
176 |         "uid": "P1809F7CD0C75ACF3"
177 |       },
178 |       "fieldConfig": {
179 |         "defaults": {
180 |           "color": {
181 |             "mode": "palette-classic"
182 |           },
183 |           "custom": {
184 |             "axisCenteredZero": false,
185 |             "axisColorMode": "text",
186 |             "axisLabel": "",
187 |             "axisPlacement": "auto",
188 |             "barAlignment": 0,
189 |             "drawStyle": "line",
190 |             "fillOpacity": 0,
191 |             "gradientMode": "none",
192 |             "hideFrom": {
193 |               "legend": false,
194 |               "tooltip": false,
195 |               "viz": false
196 |             },
197 |             "lineInterpolation": "linear",
198 |             "lineWidth": 1,
199 |             "pointSize": 5,
200 |             "scaleDistribution": {
201 |               "type": "linear"
202 |             },
203 |             "showPoints": "never",
204 |             "spanNulls": false,
205 |             "stacking": {
206 |               "group": "A",
207 |               "mode": "none"
208 |             },
209 |             "thresholdsStyle": {
210 |               "mode": "off"
211 |             }
212 |           },
213 |           "mappings": [],
214 |           "thresholds": {
215 |             "mode": "absolute",
216 |             "steps": [
217 |               {
218 |                 "color": "green",
219 |                 "value": null
220 |               },
221 |               {
222 |                 "color": "red",
223 |                 "value": 80
224 |               }
225 |             ]
226 |           },
227 |           "unit": "percentunit"
228 |         },
229 |         "overrides": []
230 |       },
231 |       "gridPos": {
232 |         "h": 8,
233 |         "w": 12,
234 |         "x": 12,
235 |         "y": 1
236 |       },
237 |       "id": 17,
238 |       "options": {
239 |         "legend": {
240 |           "calcs": [],
241 |           "displayMode": "list",
242 |           "placement": "bottom",
243 |           "showLegend": true
244 |         },
245 |         "tooltip": {
246 |           "mode": "single",
247 |           "sort": "none"
248 |         }
249 |       },
250 |       "targets": [
251 |         {
252 |           "datasource": {
253 |             "type": "prometheus",
254 |             "uid": "prometheus"
255 |           },
256 |           "editorMode": "code",
257 |           "exemplar": true,
258 |           "expr": "sum(\n    container_memory_working_set_bytes{namespace=\"temporal\", container!=\"\", image!=\"\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\", resource=\"memory\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"}\n) by (pod)\n",
259 |           "interval": "",
260 |           "legendFormat": "{{pod}}",
261 |           "range": true,
262 |           "refId": "A"
263 |         }
264 |       ],
265 |       "title": "$services Memory Request % Used",
266 |       "type": "timeseries"
267 |     },
268 |     {
269 |       "datasource": {
270 |         "type": "prometheus",
271 |         "uid": "P1809F7CD0C75ACF3"
272 |       },
273 |       "fieldConfig": {
274 |         "defaults": {
275 |           "color": {
276 |             "mode": "palette-classic"
277 |           },
278 |           "custom": {
279 |             "axisCenteredZero": false,
280 |             "axisColorMode": "text",
281 |             "axisLabel": "",
282 |             "axisPlacement": "auto",
283 |             "barAlignment": 0,
284 |             "drawStyle": "line",
285 |             "fillOpacity": 0,
286 |             "gradientMode": "none",
287 |             "hideFrom": {
288 |               "legend": false,
289 |               "tooltip": false,
290 |               "viz": false
291 |             },
292 |             "lineInterpolation": "linear",
293 |             "lineWidth": 1,
294 |             "pointSize": 5,
295 |             "scaleDistribution": {
296 |               "type": "linear"
297 |             },
298 |             "showPoints": "never",
299 |             "spanNulls": false,
300 |             "stacking": {
301 |               "group": "A",
302 |               "mode": "none"
303 |             },
304 |             "thresholdsStyle": {
305 |               "mode": "off"
306 |             }
307 |           },
308 |           "mappings": [],
309 |           "thresholds": {
310 |             "mode": "absolute",
311 |             "steps": [
312 |               {
313 |                 "color": "green",
314 |                 "value": null
315 |               },
316 |               {
317 |                 "color": "red",
318 |                 "value": 80
319 |               }
320 |             ]
321 |           },
322 |           "unit": "short"
323 |         },
324 |         "overrides": [
325 |           {
326 |             "matcher": {
327 |               "id": "byFrameRefID",
328 |               "options": "B"
329 |             },
330 |             "properties": [
331 |               {
332 |                 "id": "color",
333 |                 "value": {
334 |                   "fixedColor": "dark-orange",
335 |                   "mode": "fixed"
336 |                 }
337 |               },
338 |               {
339 |                 "id": "custom.lineStyle",
340 |                 "value": {
341 |                   "dash": [
342 |                     10,
343 |                     10
344 |                   ],
345 |                   "fill": "dash"
346 |                 }
347 |               }
348 |             ]
349 |           },
350 |           {
351 |             "matcher": {
352 |               "id": "byFrameRefID",
353 |               "options": "C"
354 |             },
355 |             "properties": [
356 |               {
357 |                 "id": "color",
358 |                 "value": {
359 |                   "fixedColor": "dark-red",
360 |                   "mode": "fixed"
361 |                 }
362 |               },
363 |               {
364 |                 "id": "custom.lineStyle",
365 |                 "value": {
366 |                   "dash": [
367 |                     10,
368 |                     10
369 |                   ],
370 |                   "fill": "dash"
371 |                 }
372 |               }
373 |             ]
374 |           }
375 |         ]
376 |       },
377 |       "gridPos": {
378 |         "h": 8,
379 |         "w": 12,
380 |         "x": 0,
381 |         "y": 9
382 |       },
383 |       "id": 18,
384 |       "options": {
385 |         "legend": {
386 |           "calcs": [],
387 |           "displayMode": "list",
388 |           "placement": "bottom",
389 |           "showLegend": true
390 |         },
391 |         "tooltip": {
392 |           "mode": "single",
393 |           "sort": "none"
394 |         }
395 |       },
396 |       "targets": [
397 |         {
398 |           "datasource": {
399 |             "type": "prometheus",
400 |             "uid": "prometheus"
401 |           },
402 |           "editorMode": "code",
403 |           "exemplar": true,
404 |           "expr": "sum(\n  rate(container_cpu_usage_seconds_total{container=\"temporal\"}[$__rate_interval])\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"}\n) by (pod)",
405 |           "interval": "",
406 |           "legendFormat": "__auto",
407 |           "range": true,
408 |           "refId": "A"
409 |         },
410 |         {
411 |           "datasource": {
412 |             "type": "prometheus",
413 |             "uid": "P1809F7CD0C75ACF3"
414 |           },
415 |           "editorMode": "code",
416 |           "expr": "avg(\n  kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\",resource=\"cpu\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"})",
417 |           "hide": false,
418 |           "legendFormat": "container request",
419 |           "range": true,
420 |           "refId": "B"
421 |         },
422 |         {
423 |           "datasource": {
424 |             "type": "prometheus",
425 |             "uid": "P1809F7CD0C75ACF3"
426 |           },
427 |           "editorMode": "code",
428 |           "expr": "avg(\n  kube_pod_container_resource_limits{job=\"kube-state-metrics\",namespace=\"temporal\",resource=\"cpu\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"})",
429 |           "hide": false,
430 |           "legendFormat": "container limit",
431 |           "range": true,
432 |           "refId": "C"
433 |         }
434 |       ],
435 |       "title": "$services CPU",
436 |       "type": "timeseries"
437 |     },
438 |     {
439 |       "datasource": {
440 |         "type": "prometheus",
441 |         "uid": "P1809F7CD0C75ACF3"
442 |       },
443 |       "fieldConfig": {
444 |         "defaults": {
445 |           "color": {
446 |             "mode": "palette-classic"
447 |           },
448 |           "custom": {
449 |             "axisCenteredZero": false,
450 |             "axisColorMode": "text",
451 |             "axisLabel": "",
452 |             "axisPlacement": "auto",
453 |             "barAlignment": 0,
454 |             "drawStyle": "line",
455 |             "fillOpacity": 0,
456 |             "gradientMode": "none",
457 |             "hideFrom": {
458 |               "legend": false,
459 |               "tooltip": false,
460 |               "viz": false
461 |             },
462 |             "lineInterpolation": "linear",
463 |             "lineWidth": 1,
464 |             "pointSize": 5,
465 |             "scaleDistribution": {
466 |               "type": "linear"
467 |             },
468 |             "showPoints": "never",
469 |             "spanNulls": false,
470 |             "stacking": {
471 |               "group": "A",
472 |               "mode": "none"
473 |             },
474 |             "thresholdsStyle": {
475 |               "mode": "off"
476 |             }
477 |           },
478 |           "mappings": [],
479 |           "thresholds": {
480 |             "mode": "absolute",
481 |             "steps": [
482 |               {
483 |                 "color": "green",
484 |                 "value": null
485 |               },
486 |               {
487 |                 "color": "red",
488 |                 "value": 80
489 |               }
490 |             ]
491 |           },
492 |           "unit": "bytes"
493 |         },
494 |         "overrides": [
495 |           {
496 |             "matcher": {
497 |               "id": "byFrameRefID",
498 |               "options": "B"
499 |             },
500 |             "properties": [
501 |               {
502 |                 "id": "color",
503 |                 "value": {
504 |                   "fixedColor": "dark-orange",
505 |                   "mode": "fixed"
506 |                 }
507 |               },
508 |               {
509 |                 "id": "custom.lineStyle",
510 |                 "value": {
511 |                   "dash": [
512 |                     10,
513 |                     10
514 |                   ],
515 |                   "fill": "dash"
516 |                 }
517 |               }
518 |             ]
519 |           },
520 |           {
521 |             "matcher": {
522 |               "id": "byFrameRefID",
523 |               "options": "C"
524 |             },
525 |             "properties": [
526 |               {
527 |                 "id": "color",
528 |                 "value": {
529 |                   "fixedColor": "dark-red",
530 |                   "mode": "fixed"
531 |                 }
532 |               },
533 |               {
534 |                 "id": "custom.lineStyle",
535 |                 "value": {
536 |                   "dash": [
537 |                     10,
538 |                     10
539 |                   ],
540 |                   "fill": "dash"
541 |                 }
542 |               }
543 |             ]
544 |           }
545 |         ]
546 |       },
547 |       "gridPos": {
548 |         "h": 8,
549 |         "w": 12,
550 |         "x": 12,
551 |         "y": 9
552 |       },
553 |       "id": 19,
554 |       "options": {
555 |         "legend": {
556 |           "calcs": [],
557 |           "displayMode": "list",
558 |           "placement": "bottom",
559 |           "showLegend": true
560 |         },
561 |         "tooltip": {
562 |           "mode": "single",
563 |           "sort": "none"
564 |         }
565 |       },
566 |       "targets": [
567 |         {
568 |           "datasource": {
569 |             "type": "prometheus",
570 |             "uid": "prometheus"
571 |           },
572 |           "editorMode": "code",
573 |           "exemplar": true,
574 |           "expr": "sum(\n    container_memory_working_set_bytes{namespace=\"temporal\", container!=\"\", image!=\"\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"}\n) by (pod)",
575 |           "interval": "",
576 |           "legendFormat": "{{pod}}",
577 |           "range": true,
578 |           "refId": "A"
579 |         },
580 |         {
581 |           "datasource": {
582 |             "type": "prometheus",
583 |             "uid": "P1809F7CD0C75ACF3"
584 |           },
585 |           "editorMode": "code",
586 |           "expr": "avg(\n  kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\",resource=\"memory\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"})",
587 |           "hide": false,
588 |           "legendFormat": "container request",
589 |           "range": true,
590 |           "refId": "B"
591 |         },
592 |         {
593 |           "datasource": {
594 |             "type": "prometheus",
595 |             "uid": "P1809F7CD0C75ACF3"
596 |           },
597 |           "editorMode": "code",
598 |           "expr": "avg(\n  kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\",resource=\"memory\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-$services\"})",
599 |           "hide": false,
600 |           "legendFormat": "container limit",
601 |           "range": true,
602 |           "refId": "C"
603 |         }
604 |       ],
605 |       "title": "$services memory",
606 |       "type": "timeseries"
607 |     }
608 |   ],
609 |   "refresh": "30s",
610 |   "schemaVersion": 37,
611 |   "style": "dark",
612 |   "tags": [
613 |     "temporal",
614 |     "pods"
615 |   ],
616 |   "templating": {
617 |     "list": [
618 |       {
619 |         "current": {
620 |           "selected": true,
621 |           "text": [
622 |             "history"
623 |           ],
624 |           "value": [
625 |             "history"
626 |           ]
627 |         },
628 |         "hide": 0,
629 |         "includeAll": true,
630 |         "multi": true,
631 |         "name": "services",
632 |         "options": [
633 |           {
634 |             "selected": false,
635 |             "text": "All",
636 |             "value": "$__all"
637 |           },
638 |           {
639 |             "selected": false,
640 |             "text": "frontend",
641 |             "value": "frontend"
642 |           },
643 |           {
644 |             "selected": true,
645 |             "text": "history",
646 |             "value": "history"
647 |           },
648 |           {
649 |             "selected": false,
650 |             "text": "matching",
651 |             "value": "matching"
652 |           },
653 |           {
654 |             "selected": false,
655 |             "text": "worker",
656 |             "value": "worker"
657 |           }
658 |         ],
659 |         "query": "frontend,history,matching,worker",
660 |         "queryValue": "",
661 |         "skipUrlSync": false,
662 |         "type": "custom"
663 |       }
664 |     ]
665 |   },
666 |   "time": {
667 |     "from": "now-15m",
668 |     "to": "now"
669 |   },
670 |   "timepicker": {},
671 |   "timezone": "",
672 |   "title": "Soak Test - Pods",
673 |   "uid": "JjZ8XAsVk",
674 |   "version": 3,
675 |   "weekStart": ""
676 | }
677 | 


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/soak-test-polling.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "datasource",
  8 |           "uid": "grafana"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 35,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "datasource": {
 33 |         "type": "prometheus",
 34 |         "uid": "P1809F7CD0C75ACF3"
 35 |       },
 36 |       "fieldConfig": {
 37 |         "defaults": {
 38 |           "color": {
 39 |             "mode": "palette-classic"
 40 |           },
 41 |           "custom": {
 42 |             "axisCenteredZero": false,
 43 |             "axisColorMode": "text",
 44 |             "axisLabel": "",
 45 |             "axisPlacement": "auto",
 46 |             "barAlignment": 0,
 47 |             "drawStyle": "line",
 48 |             "fillOpacity": 0,
 49 |             "gradientMode": "none",
 50 |             "hideFrom": {
 51 |               "legend": false,
 52 |               "tooltip": false,
 53 |               "viz": false
 54 |             },
 55 |             "lineInterpolation": "linear",
 56 |             "lineWidth": 1,
 57 |             "pointSize": 5,
 58 |             "scaleDistribution": {
 59 |               "type": "linear"
 60 |             },
 61 |             "showPoints": "never",
 62 |             "spanNulls": false,
 63 |             "stacking": {
 64 |               "group": "A",
 65 |               "mode": "none"
 66 |             },
 67 |             "thresholdsStyle": {
 68 |               "mode": "off"
 69 |             }
 70 |           },
 71 |           "mappings": [],
 72 |           "thresholds": {
 73 |             "mode": "absolute",
 74 |             "steps": [
 75 |               {
 76 |                 "color": "green",
 77 |                 "value": null
 78 |               },
 79 |               {
 80 |                 "color": "red",
 81 |                 "value": 80
 82 |               }
 83 |             ]
 84 |           },
 85 |           "unit": "percentunit"
 86 |         },
 87 |         "overrides": []
 88 |       },
 89 |       "gridPos": {
 90 |         "h": 8,
 91 |         "w": 12,
 92 |         "x": 0,
 93 |         "y": 0
 94 |       },
 95 |       "id": 28,
 96 |       "options": {
 97 |         "legend": {
 98 |           "calcs": [],
 99 |           "displayMode": "list",
100 |           "placement": "bottom",
101 |           "showLegend": true
102 |         },
103 |         "tooltip": {
104 |           "mode": "single",
105 |           "sort": "none"
106 |         }
107 |       },
108 |       "targets": [
109 |         {
110 |           "datasource": {
111 |             "type": "prometheus",
112 |             "uid": "P1809F7CD0C75ACF3"
113 |           },
114 |           "editorMode": "code",
115 |           "expr": "sum (rate(poll_success_sync{task_type=\"Workflow\"}[$__rate_interval])) / sum  (rate(poll_success{task_type=\"Workflow\"}[$__rate_interval]))",
116 |           "legendFormat": "Workflow",
117 |           "range": true,
118 |           "refId": "A"
119 |         },
120 |         {
121 |           "datasource": {
122 |             "type": "prometheus",
123 |             "uid": "P1809F7CD0C75ACF3"
124 |           },
125 |           "editorMode": "code",
126 |           "expr": "sum (rate(poll_success_sync{task_type=\"Activity\"}[$__rate_interval])) / sum  (rate(poll_success{task_type=\"Activity\"}[$__rate_interval]))",
127 |           "hide": false,
128 |           "legendFormat": "Activity",
129 |           "range": true,
130 |           "refId": "B"
131 |         }
132 |       ],
133 |       "title": "Poll Sync Rate",
134 |       "type": "timeseries"
135 |     },
136 |     {
137 |       "datasource": {
138 |         "type": "prometheus",
139 |         "uid": "P1809F7CD0C75ACF3"
140 |       },
141 |       "fieldConfig": {
142 |         "defaults": {
143 |           "color": {
144 |             "mode": "palette-classic"
145 |           },
146 |           "custom": {
147 |             "axisCenteredZero": false,
148 |             "axisColorMode": "text",
149 |             "axisLabel": "",
150 |             "axisPlacement": "auto",
151 |             "barAlignment": 0,
152 |             "drawStyle": "line",
153 |             "fillOpacity": 0,
154 |             "gradientMode": "none",
155 |             "hideFrom": {
156 |               "legend": false,
157 |               "tooltip": false,
158 |               "viz": false
159 |             },
160 |             "lineInterpolation": "linear",
161 |             "lineWidth": 1,
162 |             "pointSize": 5,
163 |             "scaleDistribution": {
164 |               "type": "linear"
165 |             },
166 |             "showPoints": "never",
167 |             "spanNulls": false,
168 |             "stacking": {
169 |               "group": "A",
170 |               "mode": "none"
171 |             },
172 |             "thresholdsStyle": {
173 |               "mode": "off"
174 |             }
175 |           },
176 |           "mappings": [],
177 |           "thresholds": {
178 |             "mode": "absolute",
179 |             "steps": [
180 |               {
181 |                 "color": "green",
182 |                 "value": null
183 |               },
184 |               {
185 |                 "color": "red",
186 |                 "value": 80
187 |               }
188 |             ]
189 |           },
190 |           "unit": "percentunit"
191 |         },
192 |         "overrides": []
193 |       },
194 |       "gridPos": {
195 |         "h": 8,
196 |         "w": 12,
197 |         "x": 12,
198 |         "y": 0
199 |       },
200 |       "id": 31,
201 |       "options": {
202 |         "legend": {
203 |           "calcs": [],
204 |           "displayMode": "list",
205 |           "placement": "bottom",
206 |           "showLegend": true
207 |         },
208 |         "tooltip": {
209 |           "mode": "single",
210 |           "sort": "none"
211 |         }
212 |       },
213 |       "targets": [
214 |         {
215 |           "datasource": {
216 |             "type": "prometheus",
217 |             "uid": "P1809F7CD0C75ACF3"
218 |           },
219 |           "editorMode": "code",
220 |           "expr": "sum (rate(poll_timeouts{task_type=\"Workflow\"}[$__rate_interval])) / sum (rate(poll_success{task_type=\"Workflow\"}[$__rate_interval]) + rate(poll_success_sync{task_type=\"Workflow\"}[$__rate_interval]) + rate(poll_timeouts{task_type=\"Workflow\"}[$__rate_interval]))",
221 |           "legendFormat": "Workflow",
222 |           "range": true,
223 |           "refId": "A"
224 |         },
225 |         {
226 |           "datasource": {
227 |             "type": "prometheus",
228 |             "uid": "P1809F7CD0C75ACF3"
229 |           },
230 |           "editorMode": "code",
231 |           "expr": "sum (rate(poll_timeouts{task_type=\"Activity\"}[$__rate_interval])) / sum (rate(poll_success{task_type=\"Activity\"}[$__rate_interval]) + rate(poll_success_sync{task_type=\"Activity\"}[$__rate_interval]) + rate(poll_timeouts{task_type=\"Activity\"}[$__rate_interval]))",
232 |           "hide": false,
233 |           "legendFormat": "Activity",
234 |           "range": true,
235 |           "refId": "B"
236 |         }
237 |       ],
238 |       "title": "Poll Timeout Rate",
239 |       "type": "timeseries"
240 |     },
241 |     {
242 |       "datasource": {
243 |         "type": "prometheus",
244 |         "uid": "P1809F7CD0C75ACF3"
245 |       },
246 |       "fieldConfig": {
247 |         "defaults": {
248 |           "color": {
249 |             "mode": "palette-classic"
250 |           },
251 |           "custom": {
252 |             "axisCenteredZero": false,
253 |             "axisColorMode": "text",
254 |             "axisLabel": "",
255 |             "axisPlacement": "auto",
256 |             "barAlignment": 0,
257 |             "drawStyle": "line",
258 |             "fillOpacity": 0,
259 |             "gradientMode": "none",
260 |             "hideFrom": {
261 |               "legend": false,
262 |               "tooltip": false,
263 |               "viz": false
264 |             },
265 |             "lineInterpolation": "linear",
266 |             "lineWidth": 1,
267 |             "pointSize": 5,
268 |             "scaleDistribution": {
269 |               "type": "linear"
270 |             },
271 |             "showPoints": "never",
272 |             "spanNulls": false,
273 |             "stacking": {
274 |               "group": "A",
275 |               "mode": "none"
276 |             },
277 |             "thresholdsStyle": {
278 |               "mode": "dashed"
279 |             }
280 |           },
281 |           "mappings": [],
282 |           "thresholds": {
283 |             "mode": "absolute",
284 |             "steps": [
285 |               {
286 |                 "color": "green",
287 |                 "value": null
288 |               },
289 |               {
290 |                 "color": "red",
291 |                 "value": 0.15
292 |               }
293 |             ]
294 |           },
295 |           "unit": "s"
296 |         },
297 |         "overrides": []
298 |       },
299 |       "gridPos": {
300 |         "h": 8,
301 |         "w": 12,
302 |         "x": 12,
303 |         "y": 8
304 |       },
305 |       "id": 30,
306 |       "options": {
307 |         "legend": {
308 |           "calcs": [],
309 |           "displayMode": "list",
310 |           "placement": "bottom",
311 |           "showLegend": true
312 |         },
313 |         "tooltip": {
314 |           "mode": "single",
315 |           "sort": "none"
316 |         }
317 |       },
318 |       "targets": [
319 |         {
320 |           "datasource": {
321 |             "type": "prometheus",
322 |             "uid": "prometheus"
323 |           },
324 |           "editorMode": "code",
325 |           "exemplar": true,
326 |           "expr": "histogram_quantile(0.95, sum by(le) (rate(temporal_workflow_task_schedule_to_start_latency_bucket{namespace=\"default\"}[$__rate_interval])))",
327 |           "interval": "",
328 |           "legendFormat": "Workflow",
329 |           "range": true,
330 |           "refId": "A"
331 |         },
332 |         {
333 |           "datasource": {
334 |             "type": "prometheus",
335 |             "uid": "P1809F7CD0C75ACF3"
336 |           },
337 |           "editorMode": "code",
338 |           "expr": "histogram_quantile(0.95, sum by(le) (rate(temporal_activity_schedule_to_start_latency_bucket{namespace=\"default\"}[$__rate_interval])))",
339 |           "hide": false,
340 |           "legendFormat": "Activity",
341 |           "range": true,
342 |           "refId": "B"
343 |         }
344 |       ],
345 |       "title": "Schedule to Start Latency p95",
346 |       "type": "timeseries"
347 |     },
348 |     {
349 |       "collapsed": false,
350 |       "gridPos": {
351 |         "h": 1,
352 |         "w": 24,
353 |         "x": 0,
354 |         "y": 16
355 |       },
356 |       "id": 17,
357 |       "panels": [],
358 |       "title": "Worker Pods",
359 |       "type": "row"
360 |     },
361 |     {
362 |       "datasource": {
363 |         "type": "prometheus",
364 |         "uid": "P1809F7CD0C75ACF3"
365 |       },
366 |       "fieldConfig": {
367 |         "defaults": {
368 |           "color": {
369 |             "mode": "palette-classic"
370 |           },
371 |           "custom": {
372 |             "axisCenteredZero": false,
373 |             "axisColorMode": "text",
374 |             "axisLabel": "",
375 |             "axisPlacement": "auto",
376 |             "barAlignment": 0,
377 |             "drawStyle": "line",
378 |             "fillOpacity": 0,
379 |             "gradientMode": "none",
380 |             "hideFrom": {
381 |               "legend": false,
382 |               "tooltip": false,
383 |               "viz": false
384 |             },
385 |             "lineInterpolation": "linear",
386 |             "lineWidth": 1,
387 |             "pointSize": 5,
388 |             "scaleDistribution": {
389 |               "type": "linear"
390 |             },
391 |             "showPoints": "never",
392 |             "spanNulls": false,
393 |             "stacking": {
394 |               "group": "A",
395 |               "mode": "none"
396 |             },
397 |             "thresholdsStyle": {
398 |               "mode": "off"
399 |             }
400 |           },
401 |           "mappings": [],
402 |           "thresholds": {
403 |             "mode": "absolute",
404 |             "steps": [
405 |               {
406 |                 "color": "green",
407 |                 "value": null
408 |               },
409 |               {
410 |                 "color": "red",
411 |                 "value": 80
412 |               }
413 |             ]
414 |           },
415 |           "unit": "percentunit"
416 |         },
417 |         "overrides": [
418 |           {
419 |             "matcher": {
420 |               "id": "byFrameRefID",
421 |               "options": "B"
422 |             },
423 |             "properties": [
424 |               {
425 |                 "id": "color",
426 |                 "value": {
427 |                   "fixedColor": "dark-red",
428 |                   "mode": "fixed"
429 |                 }
430 |               },
431 |               {
432 |                 "id": "custom.fillOpacity",
433 |                 "value": 50
434 |               }
435 |             ]
436 |           }
437 |         ]
438 |       },
439 |       "gridPos": {
440 |         "h": 8,
441 |         "w": 12,
442 |         "x": 0,
443 |         "y": 17
444 |       },
445 |       "id": 24,
446 |       "options": {
447 |         "legend": {
448 |           "calcs": [],
449 |           "displayMode": "list",
450 |           "placement": "bottom",
451 |           "showLegend": true
452 |         },
453 |         "tooltip": {
454 |           "mode": "single",
455 |           "sort": "none"
456 |         }
457 |       },
458 |       "targets": [
459 |         {
460 |           "datasource": {
461 |             "type": "prometheus",
462 |             "uid": "prometheus"
463 |           },
464 |           "editorMode": "code",
465 |           "exemplar": true,
466 |           "expr": "sum(\n    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"default\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"default\", workload=\"benchmark-workers\", workload_type=\"statefulset\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"default\",resource=\"cpu\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"default\", workload=\"benchmark-workers\", workload_type=\"statefulset\"}\n) by (pod)\n",
467 |           "interval": "",
468 |           "legendFormat": "__auto",
469 |           "range": true,
470 |           "refId": "A"
471 |         },
472 |         {
473 |           "datasource": {
474 |             "type": "prometheus",
475 |             "uid": "P1809F7CD0C75ACF3"
476 |           },
477 |           "editorMode": "code",
478 |           "expr": "sum(\n    increase(container_cpu_cfs_throttled_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}[$__rate_interval])\n    * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"default\", workload=\"benchmark-workers\", workload_type=\"statefulset\"}\n)\n/\nsum(\n    increase(container_cpu_cfs_periods_total{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", container!=\"\"}[$__rate_interval])\n    * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"default\", workload=\"benchmark-workers\", workload_type=\"statefulset\"}\n) > 0\n",
479 |           "hide": false,
480 |           "legendFormat": "throttle",
481 |           "range": true,
482 |           "refId": "B"
483 |         }
484 |       ],
485 |       "title": "Benchmark Workers CPU Request % Used",
486 |       "type": "timeseries"
487 |     },
488 |     {
489 |       "datasource": {
490 |         "type": "prometheus",
491 |         "uid": "P1809F7CD0C75ACF3"
492 |       },
493 |       "fieldConfig": {
494 |         "defaults": {
495 |           "color": {
496 |             "mode": "palette-classic"
497 |           },
498 |           "custom": {
499 |             "axisCenteredZero": false,
500 |             "axisColorMode": "text",
501 |             "axisLabel": "",
502 |             "axisPlacement": "auto",
503 |             "barAlignment": 0,
504 |             "drawStyle": "line",
505 |             "fillOpacity": 0,
506 |             "gradientMode": "none",
507 |             "hideFrom": {
508 |               "legend": false,
509 |               "tooltip": false,
510 |               "viz": false
511 |             },
512 |             "lineInterpolation": "linear",
513 |             "lineWidth": 1,
514 |             "pointSize": 5,
515 |             "scaleDistribution": {
516 |               "type": "linear"
517 |             },
518 |             "showPoints": "never",
519 |             "spanNulls": false,
520 |             "stacking": {
521 |               "group": "A",
522 |               "mode": "none"
523 |             },
524 |             "thresholdsStyle": {
525 |               "mode": "off"
526 |             }
527 |           },
528 |           "mappings": [],
529 |           "thresholds": {
530 |             "mode": "absolute",
531 |             "steps": [
532 |               {
533 |                 "color": "green",
534 |                 "value": null
535 |               },
536 |               {
537 |                 "color": "red",
538 |                 "value": 80
539 |               }
540 |             ]
541 |           },
542 |           "unit": "percentunit"
543 |         },
544 |         "overrides": []
545 |       },
546 |       "gridPos": {
547 |         "h": 8,
548 |         "w": 12,
549 |         "x": 12,
550 |         "y": 17
551 |       },
552 |       "id": 26,
553 |       "options": {
554 |         "legend": {
555 |           "calcs": [],
556 |           "displayMode": "list",
557 |           "placement": "bottom",
558 |           "showLegend": true
559 |         },
560 |         "tooltip": {
561 |           "mode": "single",
562 |           "sort": "none"
563 |         }
564 |       },
565 |       "targets": [
566 |         {
567 |           "datasource": {
568 |             "type": "prometheus",
569 |             "uid": "prometheus"
570 |           },
571 |           "editorMode": "code",
572 |           "exemplar": true,
573 |           "expr": "sum(\n    container_memory_working_set_bytes{namespace=\"default\", container!=\"\", image!=\"\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"default\", workload=\"benchmark-workers\", workload_type=\"statefulset\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"default\", resource=\"memory\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"default\", workload=\"benchmark-workers\", workload_type=\"statefulset\"}\n) by (pod)\n",
574 |           "interval": "",
575 |           "legendFormat": "{{pod}}",
576 |           "range": true,
577 |           "refId": "A"
578 |         }
579 |       ],
580 |       "title": "Benchmark Workers Memory Request % Used",
581 |       "type": "timeseries"
582 |     }
583 |   ],
584 |   "refresh": "30s",
585 |   "schemaVersion": 37,
586 |   "style": "dark",
587 |   "tags": [
588 |     "temporal",
589 |     "polling"
590 |   ],
591 |   "templating": {
592 |     "list": []
593 |   },
594 |   "time": {
595 |     "from": "now-15m",
596 |     "to": "now"
597 |   },
598 |   "timepicker": {},
599 |   "timezone": "",
600 |   "title": "Soak Test - Polling",
601 |   "uid": "trsgBasVk",
602 |   "version": 3,
603 |   "weekStart": ""
604 | }
605 | 


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/soak-test-services.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "grafana",
  8 |           "uid": "-- Grafana --"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 31,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "collapsed": false,
 33 |       "gridPos": {
 34 |         "h": 1,
 35 |         "w": 24,
 36 |         "x": 0,
 37 |         "y": 0
 38 |       },
 39 |       "id": 2,
 40 |       "panels": [],
 41 |       "title": "Services",
 42 |       "type": "row"
 43 |     },
 44 |     {
 45 |       "datasource": {
 46 |         "type": "prometheus",
 47 |         "uid": "P1809F7CD0C75ACF3"
 48 |       },
 49 |       "fieldConfig": {
 50 |         "defaults": {
 51 |           "color": {
 52 |             "mode": "palette-classic"
 53 |           },
 54 |           "custom": {
 55 |             "axisCenteredZero": false,
 56 |             "axisColorMode": "text",
 57 |             "axisLabel": "",
 58 |             "axisPlacement": "auto",
 59 |             "barAlignment": 0,
 60 |             "drawStyle": "line",
 61 |             "fillOpacity": 0,
 62 |             "gradientMode": "none",
 63 |             "hideFrom": {
 64 |               "legend": false,
 65 |               "tooltip": false,
 66 |               "viz": false
 67 |             },
 68 |             "lineInterpolation": "linear",
 69 |             "lineWidth": 1,
 70 |             "pointSize": 5,
 71 |             "scaleDistribution": {
 72 |               "type": "linear"
 73 |             },
 74 |             "showPoints": "never",
 75 |             "spanNulls": false,
 76 |             "stacking": {
 77 |               "group": "A",
 78 |               "mode": "none"
 79 |             },
 80 |             "thresholdsStyle": {
 81 |               "mode": "off"
 82 |             }
 83 |           },
 84 |           "mappings": [],
 85 |           "thresholds": {
 86 |             "mode": "absolute",
 87 |             "steps": [
 88 |               {
 89 |                 "color": "green",
 90 |                 "value": null
 91 |               },
 92 |               {
 93 |                 "color": "red",
 94 |                 "value": 80
 95 |               }
 96 |             ]
 97 |           },
 98 |           "unit": "short"
 99 |         },
100 |         "overrides": []
101 |       },
102 |       "gridPos": {
103 |         "h": 8,
104 |         "w": 12,
105 |         "x": 0,
106 |         "y": 1
107 |       },
108 |       "id": 6,
109 |       "options": {
110 |         "legend": {
111 |           "calcs": [],
112 |           "displayMode": "list",
113 |           "placement": "bottom",
114 |           "showLegend": true
115 |         },
116 |         "tooltip": {
117 |           "mode": "single",
118 |           "sort": "none"
119 |         }
120 |       },
121 |       "targets": [
122 |         {
123 |           "datasource": {
124 |             "type": "prometheus",
125 |             "uid": "prometheus"
126 |           },
127 |           "editorMode": "code",
128 |           "exemplar": true,
129 |           "expr": "sum by (service_name) (rate(service_requests[$__rate_interval]))",
130 |           "interval": "",
131 |           "legendFormat": "{{pod}}",
132 |           "range": true,
133 |           "refId": "A"
134 |         }
135 |       ],
136 |       "title": "RPS",
137 |       "type": "timeseries"
138 |     },
139 |     {
140 |       "datasource": {
141 |         "type": "prometheus",
142 |         "uid": "P1809F7CD0C75ACF3"
143 |       },
144 |       "fieldConfig": {
145 |         "defaults": {
146 |           "color": {
147 |             "mode": "palette-classic"
148 |           },
149 |           "custom": {
150 |             "axisCenteredZero": false,
151 |             "axisColorMode": "text",
152 |             "axisLabel": "",
153 |             "axisPlacement": "auto",
154 |             "barAlignment": 0,
155 |             "drawStyle": "line",
156 |             "fillOpacity": 0,
157 |             "gradientMode": "none",
158 |             "hideFrom": {
159 |               "legend": false,
160 |               "tooltip": false,
161 |               "viz": false
162 |             },
163 |             "lineInterpolation": "linear",
164 |             "lineWidth": 1,
165 |             "pointSize": 5,
166 |             "scaleDistribution": {
167 |               "type": "linear"
168 |             },
169 |             "showPoints": "never",
170 |             "spanNulls": false,
171 |             "stacking": {
172 |               "group": "A",
173 |               "mode": "none"
174 |             },
175 |             "thresholdsStyle": {
176 |               "mode": "off"
177 |             }
178 |           },
179 |           "mappings": [],
180 |           "thresholds": {
181 |             "mode": "absolute",
182 |             "steps": [
183 |               {
184 |                 "color": "green",
185 |                 "value": null
186 |               },
187 |               {
188 |                 "color": "red",
189 |                 "value": 80
190 |               }
191 |             ]
192 |           },
193 |           "unit": "s"
194 |         },
195 |         "overrides": []
196 |       },
197 |       "gridPos": {
198 |         "h": 8,
199 |         "w": 12,
200 |         "x": 12,
201 |         "y": 1
202 |       },
203 |       "id": 8,
204 |       "options": {
205 |         "legend": {
206 |           "calcs": [],
207 |           "displayMode": "list",
208 |           "placement": "bottom",
209 |           "showLegend": true
210 |         },
211 |         "tooltip": {
212 |           "mode": "single",
213 |           "sort": "none"
214 |         }
215 |       },
216 |       "targets": [
217 |         {
218 |           "datasource": {
219 |             "type": "prometheus",
220 |             "uid": "prometheus"
221 |           },
222 |           "editorMode": "code",
223 |           "exemplar": true,
224 |           "expr": "histogram_quantile(0.95, sum by (le, service_name) (rate(service_latency_bucket{operation!~\"Poll.*\"}[$__rate_interval])))",
225 |           "interval": "",
226 |           "legendFormat": "__auto",
227 |           "range": true,
228 |           "refId": "A"
229 |         }
230 |       ],
231 |       "title": "Latency",
232 |       "type": "timeseries"
233 |     },
234 |     {
235 |       "datasource": {
236 |         "type": "prometheus",
237 |         "uid": "P1809F7CD0C75ACF3"
238 |       },
239 |       "fieldConfig": {
240 |         "defaults": {
241 |           "color": {
242 |             "mode": "palette-classic"
243 |           },
244 |           "custom": {
245 |             "axisCenteredZero": false,
246 |             "axisColorMode": "text",
247 |             "axisLabel": "",
248 |             "axisPlacement": "auto",
249 |             "barAlignment": 0,
250 |             "drawStyle": "line",
251 |             "fillOpacity": 0,
252 |             "gradientMode": "none",
253 |             "hideFrom": {
254 |               "legend": false,
255 |               "tooltip": false,
256 |               "viz": false
257 |             },
258 |             "lineInterpolation": "linear",
259 |             "lineWidth": 1,
260 |             "pointSize": 5,
261 |             "scaleDistribution": {
262 |               "type": "linear"
263 |             },
264 |             "showPoints": "never",
265 |             "spanNulls": false,
266 |             "stacking": {
267 |               "group": "A",
268 |               "mode": "none"
269 |             },
270 |             "thresholdsStyle": {
271 |               "mode": "off"
272 |             }
273 |           },
274 |           "mappings": [],
275 |           "thresholds": {
276 |             "mode": "absolute",
277 |             "steps": [
278 |               {
279 |                 "color": "green",
280 |                 "value": null
281 |               },
282 |               {
283 |                 "color": "red",
284 |                 "value": 80
285 |               }
286 |             ]
287 |           },
288 |           "unit": "short"
289 |         },
290 |         "overrides": []
291 |       },
292 |       "gridPos": {
293 |         "h": 8,
294 |         "w": 12,
295 |         "x": 0,
296 |         "y": 9
297 |       },
298 |       "id": 12,
299 |       "options": {
300 |         "legend": {
301 |           "calcs": [],
302 |           "displayMode": "list",
303 |           "placement": "bottom",
304 |           "showLegend": true
305 |         },
306 |         "tooltip": {
307 |           "mode": "single",
308 |           "sort": "none"
309 |         }
310 |       },
311 |       "targets": [
312 |         {
313 |           "datasource": {
314 |             "type": "prometheus",
315 |             "uid": "prometheus"
316 |           },
317 |           "editorMode": "code",
318 |           "exemplar": true,
319 |           "expr": "sum by (error_type) (rate(service_error_with_type[$__rate_interval])) > 0",
320 |           "interval": "",
321 |           "legendFormat": "__auto",
322 |           "range": true,
323 |           "refId": "A"
324 |         }
325 |       ],
326 |       "title": "Errors",
327 |       "type": "timeseries"
328 |     },
329 |     {
330 |       "datasource": {
331 |         "type": "prometheus",
332 |         "uid": "P1809F7CD0C75ACF3"
333 |       },
334 |       "fieldConfig": {
335 |         "defaults": {
336 |           "color": {
337 |             "mode": "palette-classic"
338 |           },
339 |           "custom": {
340 |             "axisCenteredZero": false,
341 |             "axisColorMode": "text",
342 |             "axisLabel": "",
343 |             "axisPlacement": "auto",
344 |             "barAlignment": 0,
345 |             "drawStyle": "line",
346 |             "fillOpacity": 0,
347 |             "gradientMode": "none",
348 |             "hideFrom": {
349 |               "legend": false,
350 |               "tooltip": false,
351 |               "viz": false
352 |             },
353 |             "lineInterpolation": "linear",
354 |             "lineWidth": 1,
355 |             "pointSize": 5,
356 |             "scaleDistribution": {
357 |               "type": "linear"
358 |             },
359 |             "showPoints": "never",
360 |             "spanNulls": false,
361 |             "stacking": {
362 |               "group": "A",
363 |               "mode": "none"
364 |             },
365 |             "thresholdsStyle": {
366 |               "mode": "off"
367 |             }
368 |           },
369 |           "mappings": [],
370 |           "thresholds": {
371 |             "mode": "absolute",
372 |             "steps": [
373 |               {
374 |                 "color": "green",
375 |                 "value": null
376 |               },
377 |               {
378 |                 "color": "red",
379 |                 "value": 80
380 |               }
381 |             ]
382 |           },
383 |           "unit": "short"
384 |         },
385 |         "overrides": []
386 |       },
387 |       "gridPos": {
388 |         "h": 8,
389 |         "w": 12,
390 |         "x": 12,
391 |         "y": 9
392 |       },
393 |       "id": 10,
394 |       "options": {
395 |         "legend": {
396 |           "calcs": [],
397 |           "displayMode": "list",
398 |           "placement": "bottom",
399 |           "showLegend": true
400 |         },
401 |         "tooltip": {
402 |           "mode": "single",
403 |           "sort": "none"
404 |         }
405 |       },
406 |       "targets": [
407 |         {
408 |           "datasource": {
409 |             "type": "prometheus",
410 |             "uid": "prometheus"
411 |           },
412 |           "editorMode": "code",
413 |           "exemplar": true,
414 |           "expr": "sum by (operation, resource_exhausted_cause) (rate(service_errors_resource_exhausted[$__rate_interval])) > 0",
415 |           "interval": "",
416 |           "legendFormat": "{{operation}}: {{resource_exhausted_cause}}",
417 |           "range": true,
418 |           "refId": "A"
419 |         }
420 |       ],
421 |       "title": "Resource Exhausted",
422 |       "type": "timeseries"
423 |     }
424 |   ],
425 |   "refresh": "30s",
426 |   "schemaVersion": 37,
427 |   "style": "dark",
428 |   "tags": [
429 |     "temporal",
430 |     "services"
431 |   ],
432 |   "templating": {
433 |     "list": []
434 |   },
435 |   "time": {
436 |     "from": "now-15m",
437 |     "to": "now"
438 |   },
439 |   "timepicker": {},
440 |   "timezone": "",
441 |   "title": "Soak Test - Services",
442 |   "uid": "BwQ6UzyVk",
443 |   "version": 2,
444 |   "weekStart": ""
445 | }
446 | 


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/soak-test-slo.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "datasource",
  8 |           "uid": "grafana"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 36,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "collapsed": false,
 33 |       "datasource": {
 34 |         "type": "datasource",
 35 |         "uid": "grafana"
 36 |       },
 37 |       "gridPos": {
 38 |         "h": 1,
 39 |         "w": 24,
 40 |         "x": 0,
 41 |         "y": 0
 42 |       },
 43 |       "id": 6,
 44 |       "panels": [],
 45 |       "title": "Overview",
 46 |       "type": "row"
 47 |     },
 48 |     {
 49 |       "datasource": {
 50 |         "type": "prometheus",
 51 |         "uid": "prometheus"
 52 |       },
 53 |       "fieldConfig": {
 54 |         "defaults": {
 55 |           "color": {
 56 |             "mode": "palette-classic"
 57 |           },
 58 |           "custom": {
 59 |             "axisCenteredZero": false,
 60 |             "axisColorMode": "text",
 61 |             "axisLabel": "",
 62 |             "axisPlacement": "auto",
 63 |             "barAlignment": 0,
 64 |             "drawStyle": "line",
 65 |             "fillOpacity": 0,
 66 |             "gradientMode": "none",
 67 |             "hideFrom": {
 68 |               "legend": false,
 69 |               "tooltip": false,
 70 |               "viz": false
 71 |             },
 72 |             "lineInterpolation": "linear",
 73 |             "lineWidth": 1,
 74 |             "pointSize": 5,
 75 |             "scaleDistribution": {
 76 |               "type": "linear"
 77 |             },
 78 |             "showPoints": "never",
 79 |             "spanNulls": false,
 80 |             "stacking": {
 81 |               "group": "A",
 82 |               "mode": "none"
 83 |             },
 84 |             "thresholdsStyle": {
 85 |               "mode": "off"
 86 |             }
 87 |           },
 88 |           "mappings": [],
 89 |           "thresholds": {
 90 |             "mode": "absolute",
 91 |             "steps": [
 92 |               {
 93 |                 "color": "green",
 94 |                 "value": null
 95 |               },
 96 |               {
 97 |                 "color": "red",
 98 |                 "value": 80
 99 |               }
100 |             ]
101 |           },
102 |           "unit": "short"
103 |         },
104 |         "overrides": []
105 |       },
106 |       "gridPos": {
107 |         "h": 8,
108 |         "w": 12,
109 |         "x": 0,
110 |         "y": 1
111 |       },
112 |       "id": 8,
113 |       "options": {
114 |         "legend": {
115 |           "calcs": [],
116 |           "displayMode": "list",
117 |           "placement": "bottom",
118 |           "showLegend": false
119 |         },
120 |         "tooltip": {
121 |           "mode": "single",
122 |           "sort": "none"
123 |         }
124 |       },
125 |       "targets": [
126 |         {
127 |           "datasource": {
128 |             "type": "prometheus",
129 |             "uid": "prometheus"
130 |           },
131 |           "exemplar": false,
132 |           "expr": "sum(rate(state_transition_count_count{exported_namespace=\"default\"}[$__rate_interval]))",
133 |           "interval": "",
134 |           "legendFormat": "",
135 |           "refId": "A"
136 |         }
137 |       ],
138 |       "title": "State Transitions Per Second",
139 |       "type": "timeseries"
140 |     },
141 |     {
142 |       "collapsed": false,
143 |       "datasource": {
144 |         "type": "datasource",
145 |         "uid": "grafana"
146 |       },
147 |       "gridPos": {
148 |         "h": 1,
149 |         "w": 24,
150 |         "x": 0,
151 |         "y": 9
152 |       },
153 |       "id": 2,
154 |       "panels": [],
155 |       "title": "SLO",
156 |       "type": "row"
157 |     },
158 |     {
159 |       "datasource": {
160 |         "type": "prometheus",
161 |         "uid": "prometheus"
162 |       },
163 |       "fieldConfig": {
164 |         "defaults": {
165 |           "color": {
166 |             "mode": "palette-classic"
167 |           },
168 |           "custom": {
169 |             "axisCenteredZero": false,
170 |             "axisColorMode": "text",
171 |             "axisLabel": "",
172 |             "axisPlacement": "auto",
173 |             "barAlignment": 0,
174 |             "drawStyle": "line",
175 |             "fillOpacity": 0,
176 |             "gradientMode": "none",
177 |             "hideFrom": {
178 |               "legend": false,
179 |               "tooltip": false,
180 |               "viz": false
181 |             },
182 |             "lineInterpolation": "linear",
183 |             "lineWidth": 1,
184 |             "pointSize": 5,
185 |             "scaleDistribution": {
186 |               "type": "linear"
187 |             },
188 |             "showPoints": "never",
189 |             "spanNulls": false,
190 |             "stacking": {
191 |               "group": "A",
192 |               "mode": "none"
193 |             },
194 |             "thresholdsStyle": {
195 |               "mode": "dashed"
196 |             }
197 |           },
198 |           "mappings": [],
199 |           "min": 0,
200 |           "thresholds": {
201 |             "mode": "absolute",
202 |             "steps": [
203 |               {
204 |                 "color": "green",
205 |                 "value": null
206 |               },
207 |               {
208 |                 "color": "red",
209 |                 "value": 0.15
210 |               }
211 |             ]
212 |           },
213 |           "unit": "s"
214 |         },
215 |         "overrides": []
216 |       },
217 |       "gridPos": {
218 |         "h": 8,
219 |         "w": 12,
220 |         "x": 0,
221 |         "y": 10
222 |       },
223 |       "id": 4,
224 |       "options": {
225 |         "legend": {
226 |           "calcs": [],
227 |           "displayMode": "list",
228 |           "placement": "bottom",
229 |           "showLegend": true
230 |         },
231 |         "tooltip": {
232 |           "mode": "single",
233 |           "sort": "none"
234 |         }
235 |       },
236 |       "targets": [
237 |         {
238 |           "datasource": {
239 |             "type": "prometheus",
240 |             "uid": "prometheus"
241 |           },
242 |           "exemplar": true,
243 |           "expr": "histogram_quantile(0.95, sum by (le, operation) (rate(temporal_request_latency_attempt_bucket{job=\"benchmark-monitoring\",operation=\"StartWorkflowExecution\"}[$__rate_interval])))",
244 |           "interval": "",
245 |           "legendFormat": "{{operation}}",
246 |           "refId": "A"
247 |         }
248 |       ],
249 |       "title": "Request Latency p95",
250 |       "type": "timeseries"
251 |     },
252 |     {
253 |       "datasource": {
254 |         "type": "datasource",
255 |         "uid": "grafana"
256 |       },
257 |       "gridPos": {
258 |         "h": 8,
259 |         "w": 12,
260 |         "x": 12,
261 |         "y": 10
262 |       },
263 |       "id": 20,
264 |       "options": {
265 |         "alertInstanceLabelFilter": "{namespace=\"temporal\"}",
266 |         "alertName": "",
267 |         "dashboardAlerts": false,
268 |         "groupBy": [],
269 |         "groupMode": "default",
270 |         "maxItems": 20,
271 |         "sortOrder": 1,
272 |         "stateFilter": {
273 |           "error": true,
274 |           "firing": true,
275 |           "noData": false,
276 |           "normal": false,
277 |           "pending": true
278 |         },
279 |         "viewMode": "list"
280 |       },
281 |       "title": "Temporal Alerts",
282 |       "type": "alertlist"
283 |     }
284 |   ],
285 |   "refresh": "30s",
286 |   "schemaVersion": 37,
287 |   "style": "dark",
288 |   "tags": [
289 |     "slo",
290 |     "temporal"
291 |   ],
292 |   "templating": {
293 |     "list": []
294 |   },
295 |   "time": {
296 |     "from": "now-15m",
297 |     "to": "now"
298 |   },
299 |   "timepicker": {},
300 |   "timezone": "",
301 |   "title": "Soak Test - SLO",
302 |   "uid": "3f8dd4d1-9c3b-4ead-955b-f94000cc2273",
303 |   "version": 2,
304 |   "weekStart": ""
305 | }
306 | 


--------------------------------------------------------------------------------
/k8s/monitoring/dashboards/soak-test-worker.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "datasource",
  8 |           "uid": "grafana"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 32,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "collapsed": false,
 33 |       "gridPos": {
 34 |         "h": 1,
 35 |         "w": 24,
 36 |         "x": 0,
 37 |         "y": 0
 38 |       },
 39 |       "id": 17,
 40 |       "panels": [],
 41 |       "title": "Pods",
 42 |       "type": "row"
 43 |     },
 44 |     {
 45 |       "datasource": {
 46 |         "type": "prometheus",
 47 |         "uid": "P1809F7CD0C75ACF3"
 48 |       },
 49 |       "fieldConfig": {
 50 |         "defaults": {
 51 |           "color": {
 52 |             "mode": "palette-classic"
 53 |           },
 54 |           "custom": {
 55 |             "axisCenteredZero": false,
 56 |             "axisColorMode": "text",
 57 |             "axisLabel": "",
 58 |             "axisPlacement": "auto",
 59 |             "barAlignment": 0,
 60 |             "drawStyle": "line",
 61 |             "fillOpacity": 0,
 62 |             "gradientMode": "none",
 63 |             "hideFrom": {
 64 |               "legend": false,
 65 |               "tooltip": false,
 66 |               "viz": false
 67 |             },
 68 |             "lineInterpolation": "linear",
 69 |             "lineWidth": 1,
 70 |             "pointSize": 5,
 71 |             "scaleDistribution": {
 72 |               "type": "linear"
 73 |             },
 74 |             "showPoints": "auto",
 75 |             "spanNulls": false,
 76 |             "stacking": {
 77 |               "group": "A",
 78 |               "mode": "none"
 79 |             },
 80 |             "thresholdsStyle": {
 81 |               "mode": "off"
 82 |             }
 83 |           },
 84 |           "mappings": [],
 85 |           "thresholds": {
 86 |             "mode": "absolute",
 87 |             "steps": [
 88 |               {
 89 |                 "color": "green",
 90 |                 "value": null
 91 |               },
 92 |               {
 93 |                 "color": "red",
 94 |                 "value": 80
 95 |               }
 96 |             ]
 97 |           },
 98 |           "unit": "percentunit"
 99 |         },
100 |         "overrides": []
101 |       },
102 |       "gridPos": {
103 |         "h": 8,
104 |         "w": 12,
105 |         "x": 0,
106 |         "y": 1
107 |       },
108 |       "id": 12,
109 |       "options": {
110 |         "legend": {
111 |           "calcs": [],
112 |           "displayMode": "list",
113 |           "placement": "bottom",
114 |           "showLegend": true
115 |         },
116 |         "tooltip": {
117 |           "mode": "single",
118 |           "sort": "none"
119 |         }
120 |       },
121 |       "targets": [
122 |         {
123 |           "datasource": {
124 |             "type": "prometheus",
125 |             "uid": "prometheus"
126 |           },
127 |           "exemplar": true,
128 |           "expr": "sum(\n    node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"temporal\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-worker\", workload_type=\"statefulset\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\",resource=\"cpu\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-worker\", workload_type=\"statefulset\"}\n) by (pod)\n",
129 |           "interval": "",
130 |           "legendFormat": "{{pod}}",
131 |           "refId": "A"
132 |         }
133 |       ],
134 |       "title": "Worker CPU%",
135 |       "type": "timeseries"
136 |     },
137 |     {
138 |       "datasource": {
139 |         "type": "prometheus",
140 |         "uid": "prometheus"
141 |       },
142 |       "fieldConfig": {
143 |         "defaults": {
144 |           "color": {
145 |             "mode": "palette-classic"
146 |           },
147 |           "custom": {
148 |             "axisCenteredZero": false,
149 |             "axisColorMode": "text",
150 |             "axisLabel": "",
151 |             "axisPlacement": "auto",
152 |             "barAlignment": 0,
153 |             "drawStyle": "line",
154 |             "fillOpacity": 0,
155 |             "gradientMode": "none",
156 |             "hideFrom": {
157 |               "legend": false,
158 |               "tooltip": false,
159 |               "viz": false
160 |             },
161 |             "lineInterpolation": "linear",
162 |             "lineWidth": 1,
163 |             "pointSize": 5,
164 |             "scaleDistribution": {
165 |               "type": "linear"
166 |             },
167 |             "showPoints": "auto",
168 |             "spanNulls": false,
169 |             "stacking": {
170 |               "group": "A",
171 |               "mode": "none"
172 |             },
173 |             "thresholdsStyle": {
174 |               "mode": "off"
175 |             }
176 |           },
177 |           "mappings": [],
178 |           "thresholds": {
179 |             "mode": "absolute",
180 |             "steps": [
181 |               {
182 |                 "color": "green",
183 |                 "value": null
184 |               },
185 |               {
186 |                 "color": "red",
187 |                 "value": 80
188 |               }
189 |             ]
190 |           },
191 |           "unit": "percentunit"
192 |         },
193 |         "overrides": []
194 |       },
195 |       "gridPos": {
196 |         "h": 8,
197 |         "w": 12,
198 |         "x": 12,
199 |         "y": 1
200 |       },
201 |       "id": 13,
202 |       "options": {
203 |         "legend": {
204 |           "calcs": [],
205 |           "displayMode": "list",
206 |           "placement": "bottom",
207 |           "showLegend": true
208 |         },
209 |         "tooltip": {
210 |           "mode": "single",
211 |           "sort": "none"
212 |         }
213 |       },
214 |       "targets": [
215 |         {
216 |           "datasource": {
217 |             "type": "prometheus",
218 |             "uid": "prometheus"
219 |           },
220 |           "exemplar": true,
221 |           "expr": "sum(\n    container_memory_working_set_bytes{namespace=\"temporal\", container!=\"\", image!=\"\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-worker\", workload_type=\"statefulset\"}\n) by (pod)\n/sum(\n    kube_pod_container_resource_requests{job=\"kube-state-metrics\",namespace=\"temporal\", resource=\"memory\"}\n  * on(namespace,pod)\n    group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{namespace=\"temporal\", workload=\"temporal-worker\", workload_type=\"statefulset\"}\n) by (pod)\n",
222 |           "interval": "",
223 |           "legendFormat": "{{pod}}",
224 |           "refId": "A"
225 |         }
226 |       ],
227 |       "title": "Worker Memory%",
228 |       "type": "timeseries"
229 |     }
230 |   ],
231 |   "refresh": "30s",
232 |   "schemaVersion": 37,
233 |   "style": "dark",
234 |   "tags": [
235 |     "worker",
236 |     "temporal"
237 |   ],
238 |   "templating": {
239 |     "list": []
240 |   },
241 |   "time": {
242 |     "from": "now-1h",
243 |     "to": "now"
244 |   },
245 |   "timepicker": {},
246 |   "timezone": "browser",
247 |   "title": "Soak Test - Worker",
248 |   "uid": "af4475f3-4457-42b5-9586-d396aa45319c",
249 |   "version": 2,
250 |   "weekStart": ""
251 | }
252 | 


--------------------------------------------------------------------------------
/k8s/monitoring/grafana.ini:
--------------------------------------------------------------------------------
1 | [date_formats]
2 | default_timezone = UTC
3 | 
4 | [auth.anonymous]
5 | enabled = true
6 | org_role = Admin
7 | 


--------------------------------------------------------------------------------
/k8s/monitoring/kustomization.yaml:
--------------------------------------------------------------------------------
 1 | bases:
 2 | - "github.com/prometheus-operator/kube-prometheus?ref=v0.12.0"
 3 | secretGenerator:
 4 | - name: grafana-config
 5 |   namespace: monitoring
 6 |   behavior: replace
 7 |   options:
 8 |       disableNameSuffixHash: true
 9 |   files:
10 |   - grafana.ini
11 | configMapGenerator:
12 | - name: grafana-dashboards
13 |   namespace: monitoring
14 |   behavior: merge
15 |   options:
16 |       disableNameSuffixHash: true
17 |   files:
18 |   - dashboards/folder.yaml
19 | - name: grafana-temporal-dashboard-definitions
20 |   namespace: monitoring
21 |   options:
22 |       disableNameSuffixHash: true
23 |   files:
24 |   - dashboards/soak-test-frontend.json
25 |   - dashboards/soak-test-history.json
26 |   - dashboards/soak-test-matching.json
27 |   - dashboards/soak-test-persistence.json
28 |   - dashboards/soak-test-pods.json
29 |   - dashboards/soak-test-polling.json
30 |   - dashboards/soak-test-services.json
31 |   - dashboards/soak-test-slo.json
32 |   - dashboards/soak-test-worker.json
33 | resources:
34 | - prometheus-rbacTemporal.yaml
35 | - temporal-rules.yaml
36 | patchesStrategicMerge:
37 | - ./dashboards/cloudwatch.yaml
38 | - ./dashboards/patch.yaml


--------------------------------------------------------------------------------
/k8s/monitoring/prometheus-rbacTemporal.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1
 2 | kind: Role
 3 | metadata:
 4 |   labels:
 5 |     app.kubernetes.io/component: prometheus
 6 |     app.kubernetes.io/instance: k8s
 7 |     app.kubernetes.io/name: prometheus
 8 |     app.kubernetes.io/part-of: kube-prometheus
 9 |     app.kubernetes.io/version: 2.42.0
10 |   name: prometheus-k8s
11 |   namespace: temporal
12 | rules:
13 | - apiGroups:
14 |   - ""
15 |   resources:
16 |   - services
17 |   - endpoints
18 |   - pods
19 |   verbs:
20 |   - get
21 |   - list
22 |   - watch
23 | - apiGroups:
24 |   - extensions
25 |   resources:
26 |   - ingresses
27 |   verbs:
28 |   - get
29 |   - list
30 |   - watch
31 | - apiGroups:
32 |   - networking.k8s.io
33 |   resources:
34 |   - ingresses
35 |   verbs:
36 |   - get
37 |   - list
38 |   - watch
39 | ---
40 | apiVersion: rbac.authorization.k8s.io/v1
41 | kind: RoleBinding
42 | metadata:
43 |   labels:
44 |     app.kubernetes.io/component: prometheus
45 |     app.kubernetes.io/instance: k8s
46 |     app.kubernetes.io/name: prometheus
47 |     app.kubernetes.io/part-of: kube-prometheus
48 |     app.kubernetes.io/version: 2.42.0
49 |   name: prometheus-k8s
50 |   namespace: temporal
51 | roleRef:
52 |   apiGroup: rbac.authorization.k8s.io
53 |   kind: Role
54 |   name: prometheus-k8s
55 | subjects:
56 | - kind: ServiceAccount
57 |   name: prometheus-k8s
58 |   namespace: monitoring
59 | 


--------------------------------------------------------------------------------
/k8s/monitoring/temporal-rules.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: monitoring.coreos.com/v1
 2 | kind: PrometheusRule
 3 | metadata:
 4 |   labels:
 5 |     prometheus: k8s
 6 |     role: alert-rules
 7 |   name: temporal-rules
 8 |   namespace: monitoring
 9 | spec:
10 |   groups:
11 |   - name: temporal-metrics
12 |     rules:
13 |     - alert: TemporalRequestLatencyHigh
14 |       annotations:
15 |         description: Temporal {{ $labels.operation }} request latency is currently {{ $value | humanize }}, outside of SLO 150ms.
16 |         summary: Temporal request latency is too high.
17 |       expr: |
18 |         histogram_quantile(0.95, sum by (le, operation) (rate(temporal_request_latency_bucket{job="benchmark-monitoring",operation="StartWorkflowExecution"}[5m])))
19 |         > 0.150
20 |       for: 5m
21 |       labels:
22 |         namespace: temporal
23 |         severity: critical
24 |     - alert: TemporalWorkflowTaskScheduleToStartLatencyHigh
25 |       annotations:
26 |         description: Temporal Workflow Task Schedule to Start latency is currently {{ $value | humanize }}, outside of SLO 150ms.
27 |         summary: Temporal Workflow Task Schedule to Start latency is too high.
28 |       expr: |
29 |         histogram_quantile(0.95, sum by (le) (rate(temporal_workflow_task_schedule_to_start_latency_bucket{namespace="default"}[5m])))
30 |         > 0.150
31 |       for: 5m
32 |       labels:
33 |         namespace: temporal
34 |         severity: critical
35 |     - alert: TemporalActivityScheduleToStartLatencyHigh
36 |       annotations:
37 |         description: Temporal Activity Schedule to Start latency is currently {{ $value | humanize }}, outside of SLO 150ms.
38 |         summary: Temporal Activity Schedule to Start latency is too high.
39 |       expr: |
40 |         histogram_quantile(0.95, sum by (le) (rate(temporal_activity_schedule_to_start_latency_bucket{namespace="default"}[5m])))
41 |         > 0.150
42 |       for: 5m
43 |       labels:
44 |         namespace: temporal
45 |         severity: critical
46 | 


--------------------------------------------------------------------------------
/k8s/temporal/frontend-deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: temporal-frontend
 5 |   labels:
 6 |     app.kubernetes.io/component: frontend
 7 |     app.kubernetes.io/instance: temporal
 8 |     app.kubernetes.io/name: temporal
 9 |     app.kubernetes.io/part-of: temporal
10 |     app.kubernetes.io/version: 1.20.3
11 | spec:
12 |   progressDeadlineSeconds: 600
13 |   replicas: 2
14 |   revisionHistoryLimit: 10
15 |   selector:
16 |     matchLabels:
17 |       app.kubernetes.io/component: frontend
18 |       app.kubernetes.io/instance: temporal
19 |       app.kubernetes.io/name: temporal
20 |   strategy:
21 |     rollingUpdate:
22 |       maxSurge: 25%
23 |       maxUnavailable: 25%
24 |     type: RollingUpdate
25 |   template:
26 |     metadata:
27 |       labels:
28 |         app.kubernetes.io/component: frontend
29 |         app.kubernetes.io/instance: temporal
30 |         app.kubernetes.io/name: temporal
31 |         app.kubernetes.io/part-of: temporal
32 |         app.kubernetes.io/version: 1.20.3
33 |     spec:
34 |       containers:
35 |       - env:
36 |         - name: BIND_ON_IP
37 |           valueFrom:
38 |             fieldRef:
39 |               apiVersion: v1
40 |               fieldPath: status.podIP
41 |         - name: SERVICES
42 |           value: frontend
43 |         - name: PROMETHEUS_ENDPOINT
44 |           value: 0.0.0.0:8000
45 |         - name: DYNAMIC_CONFIG_FILE_PATH
46 |           value: /etc/temporal/dynamic_config/dynamic_config.yaml
47 |         envFrom:
48 |         - configMapRef:
49 |             name: temporal-env
50 |         image: temporalio/server:1.20.3
51 |         imagePullPolicy: IfNotPresent
52 |         livenessProbe:
53 |           failureThreshold: 3
54 |           initialDelaySeconds: 150
55 |           periodSeconds: 10
56 |           successThreshold: 1
57 |           tcpSocket:
58 |             port: rpc
59 |           timeoutSeconds: 1
60 |         name: temporal
61 |         ports:
62 |         - name: rpc
63 |           containerPort: 7233
64 |           protocol: TCP
65 |         - name: metrics
66 |           containerPort: 8000
67 |           protocol: TCP
68 |         terminationMessagePath: /dev/termination-log
69 |         terminationMessagePolicy: File
70 |         volumeMounts:
71 |         - mountPath: /etc/temporal/dynamic_config
72 |           name: dynamic-config
73 |       dnsPolicy: ClusterFirst
74 |       restartPolicy: Always
75 |       schedulerName: default-scheduler
76 |       securityContext: {}
77 |       terminationGracePeriodSeconds: 30
78 |       volumes:
79 |       - configMap:
80 |           defaultMode: 420
81 |           items:
82 |           - key: dynamic_config.yaml
83 |             path: dynamic_config.yaml
84 |           name: temporal-dynamic-config
85 |         name: dynamic-config


--------------------------------------------------------------------------------
/k8s/temporal/frontend-service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: temporal-frontend
 5 |   labels:
 6 |     app.kubernetes.io/component: frontend
 7 |     app.kubernetes.io/instance: temporal
 8 |     app.kubernetes.io/name: temporal
 9 |     app.kubernetes.io/part-of: temporal
10 |     app.kubernetes.io/version: 1.18.0
11 | spec:
12 |   internalTrafficPolicy: Cluster
13 |   ports:
14 |   - name: grpc-rpc
15 |     port: 7233
16 |     protocol: TCP
17 |     targetPort: rpc
18 |   selector:
19 |     app.kubernetes.io/component: frontend
20 |     app.kubernetes.io/instance: temporal
21 |     app.kubernetes.io/name: temporal
22 |   sessionAffinity: None
23 |   type: ClusterIP
24 |   clusterIP: None
25 | 


--------------------------------------------------------------------------------
/k8s/temporal/history-deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: temporal-history
 5 |   labels:
 6 |     app.kubernetes.io/component: history
 7 |     app.kubernetes.io/instance: temporal
 8 |     app.kubernetes.io/name: temporal
 9 |     app.kubernetes.io/part-of: temporal
10 |     app.kubernetes.io/version: 1.20.3
11 | spec:
12 |   replicas: 2
13 |   revisionHistoryLimit: 10
14 |   selector:
15 |     matchLabels:
16 |       app.kubernetes.io/component: history
17 |       app.kubernetes.io/instance: temporal
18 |       app.kubernetes.io/name: temporal
19 |   strategy:
20 |     rollingUpdate:
21 |       maxSurge: 100%
22 |       maxUnavailable: 100%
23 |     type: RollingUpdate
24 |   template:
25 |     metadata:
26 |       labels:
27 |         app.kubernetes.io/component: history
28 |         app.kubernetes.io/instance: temporal
29 |         app.kubernetes.io/name: temporal
30 |         app.kubernetes.io/part-of: temporal
31 |         app.kubernetes.io/version: 1.20.3
32 |     spec:
33 |       containers:
34 |       - env:
35 |         - name: BIND_ON_IP
36 |           valueFrom:
37 |             fieldRef:
38 |               apiVersion: v1
39 |               fieldPath: status.podIP
40 |         - name: SERVICES
41 |           value: history
42 |         - name: PROMETHEUS_ENDPOINT
43 |           value: 0.0.0.0:8000
44 |         - name: DYNAMIC_CONFIG_FILE_PATH
45 |           value: /etc/temporal/dynamic_config/dynamic_config.yaml
46 |         - name: SQL_MAX_CONNS
47 |           value: "40"
48 |         envFrom:
49 |         - configMapRef:
50 |             name: temporal-env
51 |         image: temporalio/server:1.20.3
52 |         imagePullPolicy: IfNotPresent
53 |         livenessProbe:
54 |           failureThreshold: 3
55 |           initialDelaySeconds: 150
56 |           periodSeconds: 10
57 |           successThreshold: 1
58 |           tcpSocket:
59 |             port: rpc
60 |           timeoutSeconds: 1
61 |         name: temporal
62 |         ports:
63 |         - name: rpc
64 |           containerPort: 7234
65 |           protocol: TCP
66 |         - name: metrics
67 |           containerPort: 8000
68 |           protocol: TCP
69 |         terminationMessagePath: /dev/termination-log
70 |         terminationMessagePolicy: File
71 |         volumeMounts:
72 |         - mountPath: /etc/temporal/dynamic_config
73 |           name: dynamic-config
74 |       dnsPolicy: ClusterFirst
75 |       restartPolicy: Always
76 |       schedulerName: default-scheduler
77 |       securityContext: {}
78 |       terminationGracePeriodSeconds: 30
79 |       volumes:
80 |       - configMap:
81 |           defaultMode: 420
82 |           items:
83 |           - key: dynamic_config.yaml
84 |             path: dynamic_config.yaml
85 |           name: temporal-dynamic-config
86 |         name: dynamic-config


--------------------------------------------------------------------------------
/k8s/temporal/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - frontend-service.yaml
3 | - frontend-deployment.yaml
4 | - worker-deployment.yaml
5 | - matching-deployment.yaml
6 | - history-deployment.yaml
7 | - monitoring-service.yaml
8 | - service-monitor.yaml
9 | namespace: temporal


--------------------------------------------------------------------------------
/k8s/temporal/matching-deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: temporal-matching
 5 |   labels:
 6 |     app.kubernetes.io/component: matching
 7 |     app.kubernetes.io/instance: temporal
 8 |     app.kubernetes.io/name: temporal
 9 |     app.kubernetes.io/part-of: temporal
10 |     app.kubernetes.io/version: 1.20.3
11 | spec:
12 |   replicas: 2
13 |   revisionHistoryLimit: 10
14 |   selector:
15 |     matchLabels:
16 |       app.kubernetes.io/component: matching
17 |       app.kubernetes.io/instance: temporal
18 |       app.kubernetes.io/name: temporal
19 |   strategy:
20 |     rollingUpdate:
21 |       maxSurge: 100%
22 |       maxUnavailable: 100%
23 |     type: RollingUpdate
24 |   template:
25 |     metadata:
26 |       labels:
27 |         app.kubernetes.io/component: matching
28 |         app.kubernetes.io/instance: temporal
29 |         app.kubernetes.io/name: temporal
30 |         app.kubernetes.io/part-of: temporal
31 |         app.kubernetes.io/version: 1.20.3
32 |     spec:
33 |       containers:
34 |       - env:
35 |         - name: BIND_ON_IP
36 |           valueFrom:
37 |             fieldRef:
38 |               apiVersion: v1
39 |               fieldPath: status.podIP
40 |         - name: SERVICES
41 |           value: matching
42 |         - name: PROMETHEUS_ENDPOINT
43 |           value: 0.0.0.0:8000
44 |         - name: DYNAMIC_CONFIG_FILE_PATH
45 |           value: /etc/temporal/dynamic_config/dynamic_config.yaml
46 |         envFrom:
47 |         - configMapRef:
48 |             name: temporal-env
49 |         image: temporalio/server:1.20.3
50 |         imagePullPolicy: IfNotPresent
51 |         livenessProbe:
52 |           failureThreshold: 3
53 |           initialDelaySeconds: 150
54 |           periodSeconds: 10
55 |           successThreshold: 1
56 |           tcpSocket:
57 |             port: rpc
58 |           timeoutSeconds: 1
59 |         name: temporal
60 |         ports:
61 |         - name: rpc
62 |           containerPort: 7235
63 |           protocol: TCP
64 |         - name: metrics
65 |           containerPort: 8000
66 |           protocol: TCP
67 |         terminationMessagePath: /dev/termination-log
68 |         terminationMessagePolicy: File
69 |         volumeMounts:
70 |         - mountPath: /etc/temporal/dynamic_config
71 |           name: dynamic-config
72 |       dnsPolicy: ClusterFirst
73 |       restartPolicy: Always
74 |       schedulerName: default-scheduler
75 |       securityContext: {}
76 |       terminationGracePeriodSeconds: 30
77 |       volumes:
78 |       - configMap:
79 |           defaultMode: 420
80 |           items:
81 |           - key: dynamic_config.yaml
82 |             path: dynamic_config.yaml
83 |           name: temporal-dynamic-config
84 |         name: dynamic-config


--------------------------------------------------------------------------------
/k8s/temporal/monitoring-service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: temporal-monitoring
 5 |   labels:
 6 |     app.kubernetes.io/instance: temporal
 7 |     app.kubernetes.io/name: temporal
 8 |     app.kubernetes.io/part-of: temporal
 9 |     app.kubernetes.io/version: 1.18.0
10 |     app.kubernetes.io/component: monitoring
11 | spec:
12 |   type: ClusterIP
13 |   clusterIP: None
14 |   internalTrafficPolicy: Cluster
15 |   publishNotReadyAddresses: true
16 |   ports:
17 |   - name: metrics
18 |     port: 9090
19 |     targetPort: metrics
20 |     protocol: TCP
21 |   selector:
22 |     app.kubernetes.io/instance: temporal
23 |     app.kubernetes.io/name: temporal
24 | 


--------------------------------------------------------------------------------
/k8s/temporal/service-monitor.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: monitoring.coreos.com/v1
 2 | kind: ServiceMonitor
 3 | metadata:
 4 |   name: temporal-monitor
 5 |   labels:
 6 |     app.kubernetes.io/name: temporal
 7 |     app.kubernetes.io/instance: temporal
 8 |     app.kubernetes.io/component: monitoring
 9 | spec:
10 |   endpoints:
11 |   - port: metrics
12 |     interval: 10s
13 |   namespaceSelector:
14 |     matchNames:
15 |       - temporal
16 |   selector:
17 |     matchLabels:
18 |       app.kubernetes.io/name: temporal
19 |       app.kubernetes.io/instance: temporal
20 |       app.kubernetes.io/component: monitoring
21 | 


--------------------------------------------------------------------------------
/k8s/temporal/worker-deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: temporal-worker
 5 |   labels:
 6 |     app.kubernetes.io/component: worker
 7 |     app.kubernetes.io/instance: temporal
 8 |     app.kubernetes.io/name: temporal
 9 |     app.kubernetes.io/part-of: temporal
10 |     app.kubernetes.io/version: 1.20.3
11 | spec:
12 |   progressDeadlineSeconds: 600
13 |   replicas: 1
14 |   revisionHistoryLimit: 10
15 |   selector:
16 |     matchLabels:
17 |       app.kubernetes.io/component: worker
18 |       app.kubernetes.io/instance: temporal
19 |       app.kubernetes.io/name: temporal
20 |   strategy:
21 |     rollingUpdate:
22 |       maxSurge: 25%
23 |       maxUnavailable: 25%
24 |     type: RollingUpdate
25 |   template:
26 |     metadata:
27 |       labels:
28 |         app.kubernetes.io/component: worker
29 |         app.kubernetes.io/instance: temporal
30 |         app.kubernetes.io/name: temporal
31 |         app.kubernetes.io/part-of: temporal
32 |         app.kubernetes.io/version: 1.20.3
33 |     spec:
34 |       containers:
35 |       - env:
36 |         - name: BIND_ON_IP
37 |           valueFrom:
38 |             fieldRef:
39 |               apiVersion: v1
40 |               fieldPath: status.podIP
41 |         - name: SERVICES
42 |           value: worker
43 |         - name: PROMETHEUS_ENDPOINT
44 |           value: 0.0.0.0:8000
45 |         - name: DYNAMIC_CONFIG_FILE_PATH
46 |           value: /etc/temporal/dynamic_config/dynamic_config.yaml
47 |         envFrom:
48 |         - configMapRef:
49 |             name: temporal-env
50 |         image: temporalio/server:1.20.3
51 |         imagePullPolicy: IfNotPresent
52 |         name: temporal
53 |         ports:
54 |         - name: rpc
55 |           containerPort: 7239
56 |           protocol: TCP
57 |         - name: metrics
58 |           containerPort: 8000
59 |           protocol: TCP
60 |         terminationMessagePath: /dev/termination-log
61 |         terminationMessagePolicy: File
62 |         volumeMounts:
63 |         - mountPath: /etc/temporal/dynamic_config
64 |           name: dynamic-config
65 |       dnsPolicy: ClusterFirst
66 |       restartPolicy: Always
67 |       schedulerName: default-scheduler
68 |       securityContext: {}
69 |       terminationGracePeriodSeconds: 30
70 |       volumes:
71 |       - configMap:
72 |           defaultMode: 420
73 |           items:
74 |           - key: dynamic_config.yaml
75 |             path: dynamic_config.yaml
76 |           name: temporal-dynamic-config
77 |         name: dynamic-config


--------------------------------------------------------------------------------
/stacks/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | 


--------------------------------------------------------------------------------
/stacks/Pulumi.eks-cassandra-medium.yaml:
--------------------------------------------------------------------------------
 1 | config:
 2 |   aws:assumeRole:
 3 |     roleArn: arn:aws:iam::912773994842:role/BenchmarkClusterAdmin
 4 |   aws:defaultTags:
 5 |     tags:
 6 |       Stack: eks-rds-cassandra-medium
 7 |   pulumi:template: kubernetes-go
 8 |   temporal-benchmarks-k8s:EnvironmentStackName: temporalio/temporal-benchmarks-aws-environment/main
 9 |   temporal-benchmarks-k8s:Cluster:
10 |     EKS:
11 |       NodeCount: 9
12 |       NodeType: m5.2xlarge
13 |   temporal-benchmarks-k8s:Persistence:
14 |     Cassandra:
15 |       NodeCount: 3
16 |       ReplicaCount: 3
17 |       NodeType: m5.xlarge
18 |     Visibility:
19 |       OpenSearch:
20 |         EngineVersion: OpenSearch_2.3
21 |         InstanceType: m5.large.search
22 |   temporal-benchmarks-k8s:Temporal:
23 |     Frontend:
24 |       Pods: 4
25 |       CPU:
26 |         request: 1.5
27 |         limit: 2
28 |     History:
29 |       Shards: 4096
30 |       Pods: 8
31 |       CPU:
32 |         request: 2
33 |         limit: 2.5
34 |       Memory:
35 |         request: 5Gi
36 |         limit: 6Gi
37 |     Matching:
38 |       Pods: 4
39 |       TaskQueuePartitions: 4
40 |       CPU:
41 |         request: 1
42 |         limit: 1.2
43 |       Memory:
44 |         request: 150Mi
45 |         limit: 200Mi
46 |     Workers:
47 |       Pods: 16
48 |       WorkflowPollers: 40
49 |       ActivityPollers: 100
50 |       CPU:
51 |         request: 250m
52 |         limit: 500m
53 |     SoakTest:
54 |       ConcurrentWorkflows: 200
55 |     DynamicConfig:
56 |       frontend.rps:
57 |       - value: 2400
58 |       matching.rps:
59 |       - value: 2400
60 | 


--------------------------------------------------------------------------------
/stacks/Pulumi.eks-rds-aurora-mysql-medium.yaml:
--------------------------------------------------------------------------------
 1 | config:
 2 |   aws:assumeRole:
 3 |     roleArn: arn:aws:iam::912773994842:role/BenchmarkClusterAdmin
 4 |   aws:defaultTags:
 5 |     tags:
 6 |       Stack: eks-rds-aurora-mysql-medium
 7 |   pulumi:template: kubernetes-go
 8 |   temporal-benchmarks-k8s:EnvironmentStackName: temporalio/temporal-benchmarks-aws-environment/main
 9 |   temporal-benchmarks-k8s:Cluster:
10 |     EKS:
11 |       NodeCount: 9
12 |       NodeType: m5.2xlarge
13 |   temporal-benchmarks-k8s:Persistence:
14 |     RDS:
15 |       Engine: aurora-mysql
16 |       EngineVersion: 5.7.mysql_aurora.2.11.1
17 |       InstanceType: db.r5.4xlarge
18 |     Visibility:
19 |       OpenSearch:
20 |         EngineVersion: OpenSearch_2.3
21 |         InstanceType: c6g.large.search
22 |   temporal-benchmarks-k8s:Temporal:
23 |     Frontend:
24 |       Pods: 4
25 |       CPU:
26 |         request: 1.5
27 |         limit: 2
28 |       Memory:
29 |         request: 256Mi
30 |         limit: 512Mi
31 |     History:
32 |       Shards: 4096
33 |       Pods: 8
34 |       CPU:
35 |         request: 3
36 |         limit: 3.5
37 |       Memory:
38 |         request: 5Gi
39 |         limit: 6Gi
40 |     Matching:
41 |       Pods: 4
42 |       TaskQueuePartitions: 4
43 |       CPU:
44 |         request: 1
45 |         limit: 1.5
46 |       Memory:
47 |         request: 150Mi
48 |         limit: 250Mi        
49 |     Workers:
50 |       Pods: 16
51 |       WorkflowPollers: 40
52 |       ActivityPollers: 120
53 |       CPU:
54 |         request: 250m
55 |         limit: 400m
56 |       Memory:
57 |         request: 25Mi
58 |         limit: 50Mi
59 |     SoakTest:
60 |       ConcurrentWorkflows: 140
61 |     DynamicConfig:
62 |       frontend.rps:
63 |       - value: 2400
64 |       matching.rps:
65 |       - value: 2400
66 | 


--------------------------------------------------------------------------------
/stacks/Pulumi.eks-rds-aurora-postgres-medium.yaml:
--------------------------------------------------------------------------------
 1 | config:
 2 |   aws:assumeRole:
 3 |     roleArn: arn:aws:iam::912773994842:role/BenchmarkClusterAdmin
 4 |   aws:defaultTags:
 5 |     tags:
 6 |       Stack: eks-rds-aurora-postgres-medium
 7 |   pulumi:template: kubernetes-go
 8 |   temporal-benchmarks-k8s:EnvironmentStackName: temporalio/temporal-benchmarks-aws-environment/main
 9 |   temporal-benchmarks-k8s:Cluster:
10 |     EKS:
11 |       NodeCount: 5
12 |       NodeType: m5.2xlarge
13 |   temporal-benchmarks-k8s:Persistence:
14 |     RDS:
15 |       Engine: aurora-postgresql
16 |       EngineVersion: 14.4
17 |       InstanceType: db.r5.8xlarge
18 |     Visibility:
19 |       OpenSearch:
20 |         EngineVersion: OpenSearch_2.3
21 |         InstanceType: m5.large.search
22 |   temporal-benchmarks-k8s:Temporal:
23 |     Frontend:
24 |       Pods: 4
25 |       CPU:
26 |         request: 1.5
27 |         limit: 2
28 |     History:
29 |       Shards: 4096
30 |       CPU:
31 |         request: 2
32 |         limit: 2.5
33 |       Memory:
34 |         request: 3Gi
35 |         limit: 4Gi
36 |     Matching:
37 |       TaskQueuePartitions: 4
38 |       CPU:
39 |         request: 750m
40 |         limit: 1000m
41 |     Workers:
42 |       Pods: 16
43 |       WorkflowPollers: 40
44 |       ActivityPollers: 80
45 |       CPU:
46 |         request: 250m
47 |         limit: 500m
48 |     SoakTest:
49 |       Pods: 2
50 |     DynamicConfig:
51 |       frontend.rps:
52 |       - value: 2400
53 |       matching.rps:
54 |       - value: 2400
55 | 


--------------------------------------------------------------------------------
/stacks/Pulumi.eks-rds-mysql-scaling-series.yaml:
--------------------------------------------------------------------------------
 1 | encryptionsalt: v1:2OehxWopbd0=:v1:YZaAahJaE8EWnmpw:Ygo7ewgyYj0zdFZlzOCsS4s5J5HjfQ==
 2 | config:
 3 |   aws:assumeRole:
 4 |     roleArn: arn:aws:iam::912773994842:role/BenchmarkClusterAdmin
 5 |   aws:defaultTags:
 6 |     tags:
 7 |       Stack: eks-rds-aurora-mysql-medium
 8 |   pulumi:template: kubernetes-go
 9 |   temporal-benchmarks-k8s:AWS:
10 |     Region: us-west-2
11 |     AvailabilityZones: ["us-west-2a", "us-west-2b", "us-west-2c"]
12 |     PrivateSubnetIds: ["subnet-072e150708ec6a826", "subnet-0d8f7e9920a4f4cd0", "subnet-0ae931146b4d89ed9"]
13 |     PublicSubnetIds: ["subnet-0e09633802916e33f", "subnet-0a414606075f3c508", "subnet-0281dbc974eec544d"]
14 |     RdsSubnetGroupName: "temporal-benchmark-rds-2e8d5ab"
15 |     Role: BenchmarkClusterAdmin
16 |     VpcId: "vpc-0e66ce57785e3e745"
17 |   temporal-benchmarks-k8s:Benchmark:
18 |     Workers:
19 |       CPU:
20 |         Request: 2
21 |       Memory:
22 |         Request: 1Gi
23 |       Pods: 12
24 |       WorkflowPollers: 1200
25 |       ActivityPollers: 2000
26 |     SoakTest:
27 |       ConcurrentWorkflows: 300
28 |       Pods: 4
29 |       CPU:
30 |         Request: 1
31 |       Memory:
32 |         Request: 1Gi
33 |   temporal-benchmarks-k8s:Cluster:
34 |     EKS:
35 |       NodeCount: 10
36 |       TemporalNodeCount: 8
37 |       NodeType: m5.2xlarge
38 |   temporal-benchmarks-k8s:Persistence:
39 |     RDS:
40 |       Engine: mysql
41 |       EngineVersion: "8.0.32"
42 |       InstanceType: db.r5.16xlarge
43 |       IOPS: 20000
44 |   temporal-benchmarks-k8s:Temporal:
45 |     SetCPULimits: false
46 |     SetGoMaxProcs: true
47 |     Frontend:
48 |       Pods: 8
49 |       CPU:
50 |         Request: 2
51 |       Memory:
52 |         Request: 1Gi
53 |     History:
54 |       Pods: 4
55 |       CPU:
56 |         Request: 4
57 |       Memory:
58 |         Request: 8Gi
59 |       Shards: 1024
60 |     Matching:
61 |       Pods: 8
62 |       CPU:
63 |         Request: 2
64 |       Memory:
65 |         Request: 1Gi
66 |       TaskQueuePartitions: 8
67 |     Worker:
68 |       Pods: 2
69 |       CPU:
70 |         Request: 1
71 |       Memory:
72 |         Request: 1Gi
73 |     DynamicConfig:
74 |       frontend.rps:
75 |       - value: 6000
76 |       frontend.namespaceCount:
77 |       - value: 6000
78 |       matching.rps:
79 |       - value: 6000


--------------------------------------------------------------------------------
/stacks/Pulumi.eks-rds-postgres-medium.yaml:
--------------------------------------------------------------------------------
 1 | config:
 2 |   aws:assumeRole:
 3 |     roleArn: arn:aws:iam::912773994842:role/BenchmarkClusterAdmin
 4 |   aws:defaultTags:
 5 |     tags:
 6 |       Stack: eks-rds-postgres-medium
 7 |   pulumi:template: kubernetes-go
 8 |   temporal-benchmarks-k8s:EnvironmentStackName: temporalio/temporal-benchmarks-aws-environment/main
 9 |   temporal-benchmarks-k8s:Cluster:
10 |     EKS:
11 |       NodeCount: 3
12 |       NodeType: m5.2xlarge
13 |   temporal-benchmarks-k8s:Persistence:
14 |     RDS:
15 |       Engine: postgres
16 |       EngineVersion: 14.4
17 |       InstanceType: db.r5.4xlarge
18 |     Visibility:
19 |       OpenSearch:
20 |         EngineVersion: OpenSearch_2.3
21 |         InstanceType: m5.large.search
22 |   temporal-benchmarks-k8s:Temporal:
23 |     Frontend:
24 |       Pods: 3
25 |       CPU:
26 |         request: 1.5
27 |         limit: 2
28 |     History:
29 |       Shards: 4096
30 |       CPU:
31 |         request: 1.5
32 |         limit: 2
33 |     Matching:
34 |       TaskQueuePartitions: 4
35 |       CPU:
36 |         request: 750m
37 |         limit: 1000m
38 |     Workers:
39 |       Pods: 16
40 |       WorkflowPollers: 40
41 |       ActivityPollers: 80
42 |       CPU:
43 |         request: 150m
44 |         limit: 200m
45 |     SoakTest:
46 |       Pods: 2
47 |     DynamicConfig:
48 |       frontend.rps:
49 |       - value: 2400
50 |       matching.rps:
51 |       - value: 2400
52 |       frontend.persistenceMaxQPS:
53 |       - value: 1200
54 |       history.persistenceMaxQPS:
55 |       - value: 1200
56 |       matching.persistenceMaxQPS:
57 |       - value: 1200
58 |       worker.persistenceMaxQPS:
59 |       - value: 1200
60 | 


--------------------------------------------------------------------------------
/stacks/Pulumi.eks-rds-postgres-micro.yaml:
--------------------------------------------------------------------------------
 1 | config:
 2 |   aws:assumeRole:
 3 |     roleArn: arn:aws:iam::912773994842:role/BenchmarkClusterAdmin
 4 |   aws:defaultTags:
 5 |     tags:
 6 |       Stack: eks-rds-postgres-micro
 7 |   pulumi:template: kubernetes-go
 8 |   temporal-benchmarks-k8s:EnvironmentStackName: temporalio/temporal-benchmarks-aws-environment/main
 9 |   temporal-benchmarks-k8s:Cluster:
10 |     EKS:
11 |       NodeCount: 1
12 |       NodeType: m5.2xlarge
13 |   temporal-benchmarks-k8s:Persistence:
14 |     RDS:
15 |       Engine: postgres
16 |       EngineVersion: 14.4
17 |       InstanceType: db.r5.large
18 |       SingleAZ: true
19 |   temporal-benchmarks-k8s:Temporal:
20 |     Frontend:
21 |       Pods: 1
22 |       CPU:
23 |         request: 1.5
24 |         limit: 2
25 |     History:
26 |       Shards: 512
27 |       CPU:
28 |         request: 1.5
29 |         limit: 2
30 |     Matching:
31 |       TaskQueuePartitions: 4
32 |       CPU:
33 |         request: 750m
34 |         limit: 1000m
35 |     Workers:
36 |       Pods: 4
37 |       WorkflowPollers: 40
38 |       ActivityPollers: 80
39 |       CPU:
40 |         request: 150m
41 |         limit: 200m
42 |     SoakTest:
43 |       ConcurrentWorkflows: 15
44 | 


--------------------------------------------------------------------------------
/stacks/Pulumi.yaml:
--------------------------------------------------------------------------------
 1 | name: temporal-benchmarks-k8s
 2 | runtime: nodejs
 3 | description: Temporal benchmarks k8s clusters
 4 | template:
 5 |   config:
 6 |     temporal-benchmarks-k8s:EnvironmentStackName:
 7 |       description: AWS Environment stack to deploy into. See ../environment/aws.
 8 |       default: temporalio/temporal-benchmarks-aws-environment/main
 9 |     temporal-benchmarks-k8s:NodeType:
10 |       description: Node instance type to use for EKS
11 |       default: t2.medium
12 |     temporal-benchmarks-k8s:NodeCount: 
13 |       description: Number of nodes to use for EKS
14 |       default: 6
15 |     temporal-benchmarks-k8s:PersistenceEngine:
16 |       description: RDS engine
17 |     temporal-benchmarks-k8s:PersistenceEngineVersion:
18 |       description: RDS engine version
19 |     temporal-benchmarks-k8s:PersistenceParameterGroupName:
20 |       description: RDS engine parameter group
21 |     temporal-benchmarks-k8s:PersistenceInstance:
22 |       description: RDS instance type
23 |     temporal-benchmarks-k8s:HistoryShards:
24 |       description: Number of history shards to use (https://docs.temporal.io/references/configuration#numhistoryshards)
25 |       default: 2048
26 |     temporal-benchmarks-k8s:TaskQueuePartitions:
27 |       description: Number of task queue partitions to use for the benchmark task queue. This value is only by the stack to scale the matching system, it is not passed on as configuration for Temporal Server. Dynamic config should be set to actually apply the required settings for the server, see DynamicConfig.
28 |       default: 8
29 |     temporal-benchmarks-k8s:WorkerCount:
30 |       description: Number of benchmark worker processes to run
31 |       default: 16
32 |     temporal-benchmarks-k8s:WorkerWorkflowPollers:
33 |       description: Number of workflow task pollers for each benchmark worker (https://pkg.go.dev/go.temporal.io/sdk@v1.17.0/internal#WorkerOptions)
34 |       default: 32
35 |     temporal-benchmarks-k8s:WorkerActivityPollers:
36 |       description: Number of activity task pollers for each benchmark worker (https://pkg.go.dev/go.temporal.io/sdk@v1.17.0/internal#WorkerOptions)
37 |       default: 16
38 |     temporal-benchmarks-k8s:DynamicConfig:
39 |       description: "Temporal Server dynamic config. Note: this should be used to set task queue partitions as shown in the example below"
40 |       default: |
41 |         matching.numTaskqueueReadPartitions:
42 |         - value: 8
43 |         matching.numTaskqueueWritePartitions:
44 |         - value: 8
45 | 


--------------------------------------------------------------------------------
/stacks/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmark stacks
 2 | 
 3 | These stacks build Temporal clusters ready for running benchmarks on. For this first stage only EKS clusters using RDS are supported.
 4 | 
 5 | ## Deployment
 6 | 
 7 | Before deploying a stack you will need to have an AWS environment stack deployed. Please see the [AWS Environment stack](../environment/aws/README.md).
 8 | 
 9 | Once the environment stack is deployed, unless you are a Temporal employee you will need to adjust the `EnvironmentStackName` config value in the stack you wish to deploy to point to your environment stack rather than `temporalio/temporal-benchmarks-aws-environment/main` that we use for our CI.
10 | 
11 | If you'd like to replicate one of our existing benchmarks, that is all you should need to adjust. You can then bring up the stack with `pulumi -s <stack name> up`.
12 | 
13 | For example, to bring up an EKS cluster with Temporal running against an RDS postgres m6i.2xlarge instance you can use our existing stack configuration with:
14 | 
15 | ```shell
16 | $ pulumi -s eks-rds-postgres-m6i-2xlarge up
17 | ```
18 | 


--------------------------------------------------------------------------------
/stacks/fetch-kubeconfig:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | KUBECONFIG="/tmp/kubeconfig.${1//\//-}"
 6 | 
 7 | if [ ! -f $KUBECONFIG ]; then
 8 | 	pulumi stack -s "$1" output kubeconfig --json > "${KUBECONFIG}"
 9 | fi
10 | 
11 | echo "export KUBECONFIG=${KUBECONFIG}"
12 | 


--------------------------------------------------------------------------------
/stacks/grafana-tunnel:
--------------------------------------------------------------------------------
1 | kubectl run grafana-tunnel \
2 |     --image ngrok/ngrok \
3 |     --image-pull-policy=Always \
4 |     --env NGROK_AUTHTOKEN=$NGROK_AUTHTOKEN \
5 |     -- http grafana.monitoring.svc.cluster.local:3000 --region=us --domain=temporal-benchmark.ngrok.io
6 | 


--------------------------------------------------------------------------------
/stacks/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "foo",
 3 |     "main": "index.ts",
 4 |     "devDependencies": {
 5 |         "@types/node": "^14"
 6 |     },
 7 |     "dependencies": {
 8 |         "@pulumi/aws": "^5.0.0",
 9 |         "@pulumi/awsx": "^0.40.0",
10 |         "@pulumi/eks": "^0.42.5",
11 |         "@pulumi/gcp": "^6.41.0",
12 |         "@pulumi/pulumi": "^3.0.0",
13 |         "@types/js-yaml": "^4.0.5"
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/stacks/port-forward-grafana:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | kubectl --namespace monitoring port-forward svc/grafana 3000


--------------------------------------------------------------------------------
/stacks/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "strict": true,
 4 |         "outDir": "bin",
 5 |         "target": "es2016",
 6 |         "module": "commonjs",
 7 |         "moduleResolution": "node",
 8 |         "sourceMap": true,
 9 |         "experimentalDecorators": true,
10 |         "pretty": true,
11 |         "noFallthroughCasesInSwitch": true,
12 |         "noImplicitReturns": true,
13 |         "forceConsistentCasingInFileNames": true
14 |     },
15 |     "files": [
16 |         "index.ts"
17 |     ]
18 | }
19 | 


--------------------------------------------------------------------------------