├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .lint ├── Makefile ├── README.md ├── alerts └── alerts.libsonnet ├── config.libsonnet ├── dashboards ├── argo-cd-applications.libsonnet ├── argo-cd-notifications.libsonnet ├── argo-cd-operational.libsonnet └── dashboards.libsonnet ├── dashboards_out ├── argo-cd-application-overview.json ├── argo-cd-notifications-overview.json └── argo-cd-operational-overview.json ├── images ├── argo-cd-application-overview.png ├── argo-cd-notifications-overview.png └── argo-cd-operational-overview.png ├── jsonnetfile.json ├── lib ├── alerts.jsonnet └── dashboards.jsonnet ├── mixin.libsonnet ├── prometheus_alerts.yaml ├── scripts ├── go.mod ├── go.sum └── tools.go └── tests.yaml /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | branches: 8 | - main 9 | env: 10 | golang-version: "1.21" 11 | jobs: 12 | generate: 13 | runs-on: ubuntu-latest 14 | name: Generate yaml 15 | steps: 16 | - uses: actions/checkout@v2 17 | with: 18 | persist-credentials: false 19 | - uses: actions/setup-go@v2 20 | with: 21 | go-version: ${{ env.golang-version }} 22 | - run: make --always-make generate && git diff --exit-code 23 | 24 | jsonnet-lint: 25 | runs-on: ubuntu-latest 26 | name: Jsonnet linter 27 | steps: 28 | - uses: actions/checkout@v2 29 | with: 30 | persist-credentials: false 31 | - uses: actions/setup-go@v2 32 | with: 33 | go-version: ${{ env.golang-version }} 34 | - run: make --always-make jsonnet-lint 35 | 36 | dashboards-lint: 37 | runs-on: ubuntu-latest 38 | name: Grafana dashboard linter 39 | steps: 40 | - uses: actions/checkout@v2 41 | with: 42 | persist-credentials: false 43 | - uses: actions/setup-go@v2 44 | with: 45 | go-version: ${{ env.golang-version }} 46 | - run: make --always-make dashboards-lint 47 | 48 | alerts-lint: 49 | runs-on: ubuntu-latest 50 | name: Alerts linter 51 | steps: 52 | - uses: actions/checkout@v2 53 | with: 54 | persist-credentials: false 55 | - uses: actions/setup-go@v2 56 | with: 57 | go-version: ${{ env.golang-version }} 58 | - run: make --always-make alerts-lint 59 | 60 | fmt: 61 | runs-on: ubuntu-latest 62 | name: Jsonnet formatter 63 | steps: 64 | - uses: actions/checkout@v2 65 | with: 66 | persist-credentials: false 67 | - uses: actions/setup-go@v2 68 | with: 69 | go-version: ${{ env.golang-version }} 70 | - run: make --always-make fmt && git diff --exit-code 71 | 72 | unit-tests: 73 | runs-on: ubuntu-latest 74 | name: Unit tests 75 | steps: 76 | - uses: actions/checkout@v2 77 | with: 78 | persist-credentials: false 79 | - uses: actions/setup-go@v2 80 | with: 81 | go-version: ${{ env.golang-version }} 82 | - run: make --always-make test 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | vendor 2 | tmp 3 | jsonnetfile.lock.json 4 | dashboards_out/.lint 5 | -------------------------------------------------------------------------------- /.lint: -------------------------------------------------------------------------------- 1 | --- 2 | exclusions: 3 | template-job-rule: 4 | reason: Jobs are set to multi in our case. 5 | target-job-rule: 6 | reason: Jobs are set to multi in our case. 7 | template-instance-rule: 8 | reason: We don't use instances. 9 | panel-datasource-rule: 10 | reason: Using a datasource for each panel. 11 | panel-title-description-rule: 12 | reason: TODO(adinhodovic) 13 | target-instance-rule: 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | BIN_DIR ?= $(shell pwd)/tmp/bin 2 | 3 | JSONNET_VENDOR=vendor 4 | GRAFANA_DASHBOARD_LINTER_BIN=$(BIN_DIR)/dashboard-linter 5 | JB_BIN=$(BIN_DIR)/jb 6 | JSONNET_BIN=$(BIN_DIR)/jsonnet 7 | JSONNETLINT_BIN=$(BIN_DIR)/jsonnet-lint 8 | JSONNETFMT_BIN=$(BIN_DIR)/jsonnetfmt 9 | PROMTOOL_BIN=$(BIN_DIR)/promtool 10 | TOOLING=$(JB_BIN) $(JSONNETLINT_BIN) $(JSONNET_BIN) $(JSONNETFMT_BIN) $(PROMTOOL_BIN) $(GRAFANA_DASHBOARD_LINTER_BIN) 11 | JSONNETFMT_ARGS=-n 2 --max-blank-lines 2 --string-style s --comment-style s 12 | SRC_DIR ?=dashboards 13 | OUT_DIR ?=dashboards_out 14 | 15 | .PHONY: all 16 | all: fmt generate lint test 17 | 18 | .PHONY: generate 19 | generate: prometheus_alerts.yaml $(OUT_DIR) # prometheus_rules.yaml 20 | 21 | $(JSONNET_VENDOR): $(JB_BIN) jsonnetfile.json 22 | $(JB_BIN) install 23 | 24 | .PHONY: fmt 25 | fmt: $(JSONNETFMT_BIN) 26 | find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \ 27 | xargs -n 1 -- $(JSONNETFMT_BIN) $(JSONNETFMT_ARGS) -i 28 | 29 | prometheus_alerts.yaml: $(JSONNET_BIN) mixin.libsonnet lib/alerts.jsonnet alerts/*.libsonnet 30 | @$(JSONNET_BIN) -J vendor -S lib/alerts.jsonnet > $@ 31 | 32 | prometheus_rules.yaml: $(JSONNET_BIN) mixin.libsonnet lib/rules.jsonnet rules/*.libsonnet 33 | @$(JSONNET_BIN) -J vendor -S lib/rules.jsonnet > $@ 34 | 35 | $(OUT_DIR): $(JSONNET_BIN) $(JSONNET_VENDOR) mixin.libsonnet lib/dashboards.jsonnet $(SRC_DIR)/*.libsonnet 36 | @mkdir -p $(OUT_DIR) 37 | @$(JSONNET_BIN) -J vendor -m $(OUT_DIR) lib/dashboards.jsonnet 38 | 39 | .PHONY: lint 40 | lint: jsonnet-lint alerts-lint dashboards-lint 41 | 42 | .PHONY: jsonnet-lint 43 | jsonnet-lint: $(JSONNETLINT_BIN) $(JSONNET_VENDOR) 44 | @find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \ 45 | xargs -n 1 -- $(JSONNETLINT_BIN) -J vendor 46 | 47 | 48 | .PHONY: alerts-lint 49 | alerts-lint: $(PROMTOOL_BIN) prometheus_alerts.yaml # prometheus_rules.yaml 50 | # @$(PROMTOOL_BIN) check rules prometheus_rules.yaml 51 | @$(PROMTOOL_BIN) check rules prometheus_alerts.yaml 52 | 53 | $(OUT_DIR)/.lint: $(OUT_DIR) 54 | @cp .lint $@ 55 | 56 | .PHONY: dashboards-lint 57 | dashboards-lint: $(GRAFANA_DASHBOARD_LINTER_BIN) $(OUT_DIR)/.lint 58 | @find $(OUT_DIR) -name '*.json' -print0 | xargs -n 1 -0 $(GRAFANA_DASHBOARD_LINTER_BIN) lint --strict --config .lint 59 | 60 | 61 | .PHONY: clean 62 | clean: 63 | # Remove all files and directories ignored by git. 64 | git clean -Xfd . 65 | 66 | .PHONY: test 67 | test: $(PROMTOOL_BIN) prometheus_alerts.yaml # prometheus_rules.yaml 68 | @$(PROMTOOL_BIN) test rules tests.yaml 69 | 70 | $(BIN_DIR): 71 | mkdir -p $(BIN_DIR) 72 | 73 | $(TOOLING): $(BIN_DIR) 74 | @echo Installing tools from hack/tools.go 75 | @cd scripts && go list -e -mod=mod -e -tags tools -f '{{ range .Imports }}{{ printf "%s\n" .}}{{end}}' ./ | xargs -tI % go build -mod=mod -o $(BIN_DIR) % 76 | 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Prometheus Monitoring Mixin for ArgoCD 2 | 3 | A set of Grafana dashboards and Prometheus alerts for ArgoCD. 4 | 5 | ## How to use 6 | 7 | This mixin is designed to be vendored into the repo with your infrastructure config. 8 | To do this, use [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler): 9 | 10 | You then have three options for deploying your dashboards 11 | 12 | 1. Generate the config files and deploy them yourself 13 | 2. Use jsonnet to deploy this mixin along with Prometheus and Grafana 14 | 3. Use prometheus-operator to deploy this mixin 15 | 16 | Or import the dashboard using json in `./dashboards_out`, alternatively import them from the `Grafana.com` dashboard page. 17 | 18 | ## Generate config files 19 | 20 | You can manually generate the alerts, dashboards and rules files, but first you 21 | must install some tools: 22 | 23 | ```sh 24 | go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb 25 | brew install jsonnet 26 | ``` 27 | 28 | Then, grab the mixin and its dependencies: 29 | 30 | ```sh 31 | git clone https://github.com/adinhodovic/argo-cd-mixin 32 | cd argo-cd-mixin 33 | jb install 34 | ``` 35 | 36 | Finally, build the mixin: 37 | 38 | ```sh 39 | make prometheus_alerts.yaml 40 | make dashboards_out 41 | ``` 42 | 43 | The `prometheus_alerts.yaml` file then need to passed 44 | to your Prometheus server, and the files in `dashboards_out` need to be imported 45 | into you Grafana server. The exact details will depending on how you deploy your 46 | monitoring stack. 47 | 48 | ## Alerts 49 | 50 | The mixin follows the [monitoring-mixins guidelines](https://github.com/monitoring-mixins/docs#guidelines-for-alert-names-labels-and-annotations) for alerts. 51 | 52 | ## Preview 53 | 54 | ### Operational Dashboard 55 | 56 | ![argo-cd-operational-overview](images/argo-cd-operational-overview.png) 57 | 58 | ### Application Dashboard 59 | 60 | ![argo-cd-application-overview](images/argo-cd-application-overview.png) 61 | 62 | ### Notification Dashboard 63 | 64 | ![argo-cd-notifications-overview](images/argo-cd-notifications-overview.png) 65 | -------------------------------------------------------------------------------- /alerts/alerts.libsonnet: -------------------------------------------------------------------------------- 1 | { 2 | local clusterVariableQueryString = if $._config.showMultiCluster then '&var-%(clusterLabel)s={{ $labels.%(clusterLabel)s}}' % $._config else '', 3 | prometheusAlerts+:: { 4 | groups+: [ 5 | { 6 | name: 'argo-cd', 7 | rules: std.prune([ 8 | { 9 | alert: 'ArgoCdAppSyncFailed', 10 | expr: ||| 11 | sum( 12 | round( 13 | increase( 14 | argocd_app_sync_total{ 15 | %(argoCdSelector)s, 16 | phase!="Succeeded" 17 | }[%(argoCdAppSyncInterval)s] 18 | ) 19 | ) 20 | ) by (%(clusterLabel)s, job, dest_server, project, name, phase) > 0 21 | ||| % $._config, 22 | labels: { 23 | severity: 'warning', 24 | }, 25 | 'for': '1m', 26 | annotations: { 27 | summary: 'An ArgoCD Application has Failed to Sync.', 28 | description: 'The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has failed to sync with the status {{ $labels.phase }} the past %s.' % $._config.argoCdAppSyncInterval, 29 | dashboard_url: $._config.applicationOverviewDashboardUrl + '?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}' + clusterVariableQueryString, 30 | }, 31 | }, 32 | if $._config.argoCdAppUnhealthyEnabled then { 33 | alert: 'ArgoCdAppUnhealthy', 34 | expr: ||| 35 | sum( 36 | argocd_app_info{ 37 | %(argoCdSelector)s, 38 | health_status!~"Healthy|Progressing" 39 | } 40 | ) by (%(clusterLabel)s, job, dest_server, project, name, health_status) 41 | > 0 42 | ||| % $._config, 43 | labels: { 44 | severity: 'warning', 45 | }, 46 | 'for': $._config.argoCdAppUnhealthyFor, 47 | annotations: { 48 | summary: 'An ArgoCD Application is Unhealthy.', 49 | description: 'The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is unhealthy with the health status {{ $labels.health_status }} for the past %s.' % $._config.argoCdAppUnhealthyFor, 50 | dashboard_url: $._config.applicationOverviewDashboardUrl + '?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}' + clusterVariableQueryString, 51 | }, 52 | }, 53 | if $._config.argoCdAppOutOfSyncEnabled then { 54 | alert: 'ArgoCdAppOutOfSync', 55 | expr: ||| 56 | sum( 57 | argocd_app_info{ 58 | %(argoCdSelector)s, 59 | sync_status!="Synced" 60 | } 61 | ) by (%(clusterLabel)s, job, dest_server, project, name, sync_status) 62 | > 0 63 | ||| % $._config, 64 | labels: { 65 | severity: 'warning', 66 | }, 67 | 'for': $._config.argoCdAppOutOfSyncFor, 68 | annotations: { 69 | summary: 'An ArgoCD Application is Out Of Sync.', 70 | description: 'The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is out of sync with the sync status {{ $labels.sync_status }} for the past %s.' % $._config.argoCdAppOutOfSyncFor, 71 | dashboard_url: $._config.applicationOverviewDashboardUrl + '?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}' + clusterVariableQueryString, 72 | }, 73 | }, 74 | if $._config.argoCdAppAutoSyncDisabledEnabled then { 75 | alert: 'ArgoCdAppAutoSyncDisabled', 76 | expr: ||| 77 | sum( 78 | argocd_app_info{ 79 | %(argoCdSelector)s, 80 | autosync_enabled!="true", 81 | name!~"%(argoAutoSyncDisabledIgnoredApps)s" 82 | } 83 | ) by (%(clusterLabel)s, job, dest_server, project, name, autosync_enabled) 84 | > 0 85 | ||| % $._config, 86 | labels: { 87 | severity: 'warning', 88 | }, 89 | 'for': $._config.argoCdAppAutoSyncDisabledFor, 90 | annotations: { 91 | summary: 'An ArgoCD Application has AutoSync Disabled.', 92 | description: 'The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has autosync disabled for the past %s.' % $._config.argoCdAppAutoSyncDisabledFor, 93 | dashboard_url: $._config.applicationOverviewDashboardUrl + '?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}' + clusterVariableQueryString, 94 | }, 95 | }, 96 | { 97 | alert: 'ArgoCdNotificationDeliveryFailed', 98 | expr: ||| 99 | sum( 100 | round( 101 | increase( 102 | argocd_notifications_deliveries_total{ 103 | %(argoCdSelector)s, 104 | succeeded!="true" 105 | }[%(argoCdNotificationDeliveryInterval)s] 106 | ) 107 | ) 108 | ) by (%(clusterLabel)s, job, exported_service, succeeded) > 0 109 | ||| % $._config, 110 | 'for': '1m', 111 | labels: { 112 | severity: 'warning', 113 | }, 114 | annotations: { 115 | summary: 'ArgoCD Notification Delivery Failed.', 116 | description: 'The notification job {{ $labels.job }} has failed to deliver to {{ $labels.exported_service }} for the past %s.' % $._config.argoCdNotificationDeliveryInterval, 117 | dashboard_url: $._config.notificationsOverviewDashboardUrl + '?var-job={{ $labels.job }}&var-exported_service={{ $labels.exported_service }}' + clusterVariableQueryString, 118 | }, 119 | }, 120 | ]), 121 | }, 122 | ], 123 | }, 124 | } 125 | -------------------------------------------------------------------------------- /config.libsonnet: -------------------------------------------------------------------------------- 1 | local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; 2 | local annotation = g.dashboard.annotation; 3 | 4 | { 5 | _config+:: { 6 | // Bypasses grafana.com/dashboards validator 7 | bypassDashboardValidation: { 8 | __inputs: [], 9 | __requires: [], 10 | }, 11 | 12 | argoCdSelector: 'job=~".*"', 13 | 14 | // Default datasource name 15 | datasourceName: 'default', 16 | 17 | // Opt-in to multiCluster dashboards by overriding this and the clusterLabel. 18 | showMultiCluster: false, 19 | clusterLabel: 'cluster', 20 | 21 | grafanaUrl: 'https://grafana.com', 22 | argoCdUrl: 'https://argocd.com', 23 | 24 | operationalOverviewDashboardUid: 'argo-cd-operational-overview-kask', 25 | applicationOverviewDashboardUid: 'argo-cd-application-overview-kask', 26 | notificationsOverviewDashboardUid: 'argo-cd-notifications-overview-kask', 27 | 28 | applicationOverviewDashboardUrl: '%s/d/%s/argocd-application-overview' % [self.grafanaUrl, self.applicationOverviewDashboardUid], 29 | notificationsOverviewDashboardUrl: '%s/d/%s/argocd-notifications-overview' % [self.grafanaUrl, self.notificationsOverviewDashboardUid], 30 | 31 | tags: ['ci/cd', 'argo-cd'], 32 | 33 | argoCdAppOutOfSyncEnabled: true, 34 | argoCdAppOutOfSyncFor: '15m', 35 | argoCdAppUnhealthyEnabled: true, 36 | argoCdAppUnhealthyFor: '15m', 37 | argoCdAppAutoSyncDisabledEnabled: true, 38 | argoCdAppAutoSyncDisabledFor: '2h', 39 | argoCdAppSyncInterval: '10m', 40 | argoCdNotificationDeliveryInterval: '10m', 41 | 42 | // List of applications to ignore in the auto sync disabled alert 43 | argoAutoSyncDisabledIgnoredApps: '', 44 | 45 | // Custom annotations to display in graphs 46 | annotation: { 47 | enabled: false, 48 | name: 'Custom Annotation', 49 | datasource: '-- Grafana --', 50 | iconColor: 'green', 51 | tags: [], 52 | }, 53 | 54 | // Render ArgoCD badges in the dashboards 55 | // []struct{} 56 | // [ 57 | // { 58 | // name: 'ArgoCD', 59 | // applicationName: 'ArgoCD', // or self.name 60 | // environment: 'Production', 61 | // argoCdUrl: "https://argo-cd.example.com" // or $._config.argoCdUrl 62 | // } 63 | // ] 64 | applications: [], 65 | 66 | customAnnotation:: if $._config.annotation.enabled then 67 | annotation.withName($._config.annotation.name) + 68 | annotation.withIconColor($._config.annotation.iconColor) + 69 | annotation.withHide(false) + 70 | annotation.datasource.withUid($._config.annotation.datasource) + 71 | annotation.target.withMatchAny(true) + 72 | annotation.target.withTags($._config.annotation.tags) + 73 | annotation.target.withType('tags') 74 | else {}, 75 | }, 76 | } 77 | -------------------------------------------------------------------------------- /dashboards/argo-cd-applications.libsonnet: -------------------------------------------------------------------------------- 1 | local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; 2 | local dashboard = g.dashboard; 3 | local row = g.panel.row; 4 | local grid = g.util.grid; 5 | 6 | local tablePanel = g.panel.table; 7 | local timeSeriesPanel = g.panel.timeSeries; 8 | local textPanel = g.panel.text; 9 | 10 | local variable = dashboard.variable; 11 | local datasource = variable.datasource; 12 | local query = variable.query; 13 | local prometheus = g.query.prometheus; 14 | 15 | // Timeseries 16 | local tsOptions = timeSeriesPanel.options; 17 | local tsStandardOptions = timeSeriesPanel.standardOptions; 18 | local tsQueryOptions = timeSeriesPanel.queryOptions; 19 | local tsFieldConfig = timeSeriesPanel.fieldConfig; 20 | local tsCustom = tsFieldConfig.defaults.custom; 21 | local tsLegend = tsOptions.legend; 22 | 23 | // Table 24 | local tbOptions = tablePanel.options; 25 | local tbStandardOptions = tablePanel.standardOptions; 26 | local tbPanelOptions = tablePanel.panelOptions; 27 | local tbQueryOptions = tablePanel.queryOptions; 28 | local tbFieldConfig = tablePanel.fieldConfig; 29 | local tbCustom = tbFieldConfig.defaults.custom; 30 | local tbOverride = tbStandardOptions.override; 31 | 32 | { 33 | grafanaDashboards+:: { 34 | 35 | local datasourceVariable = 36 | datasource.new( 37 | 'datasource', 38 | 'prometheus', 39 | ) + 40 | datasource.generalOptions.withLabel('Data source') + 41 | { 42 | current: { 43 | selected: true, 44 | text: $._config.datasourceName, 45 | value: $._config.datasourceName, 46 | }, 47 | }, 48 | 49 | local clusterVariable = 50 | query.new( 51 | $._config.clusterLabel, 52 | 'label_values(argocd_app_info{}, cluster)' % $._config, 53 | ) + 54 | query.withDatasourceFromVariable(datasourceVariable) + 55 | query.withSort() + 56 | query.generalOptions.withLabel('Cluster') + 57 | query.refresh.onLoad() + 58 | query.refresh.onTime() + 59 | ( 60 | if $._config.showMultiCluster 61 | then query.generalOptions.showOnDashboard.withLabelAndValue() 62 | else query.generalOptions.showOnDashboard.withNothing() 63 | ), 64 | 65 | local namespaceVariable = 66 | query.new( 67 | 'namespace', 68 | 'label_values(argocd_app_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config, 69 | ) + 70 | query.withDatasourceFromVariable(datasourceVariable) + 71 | query.withSort(1) + 72 | query.generalOptions.withLabel('Namespace') + 73 | query.selectionOptions.withMulti(true) + 74 | query.selectionOptions.withIncludeAll(true) + 75 | query.refresh.onLoad() + 76 | query.refresh.onTime(), 77 | 78 | local jobVariable = 79 | query.new( 80 | 'job', 81 | 'label_values(argocd_app_info{%(clusterLabel)s="$cluster", namespace=~"$namespace"}, job)' % $._config, 82 | ) + 83 | query.withDatasourceFromVariable(datasourceVariable) + 84 | query.withSort(1) + 85 | query.generalOptions.withLabel('Job') + 86 | query.selectionOptions.withMulti(true) + 87 | query.selectionOptions.withIncludeAll(true) + 88 | query.refresh.onLoad() + 89 | query.refresh.onTime(), 90 | 91 | local kubernetesClusterVariable = 92 | query.new( 93 | 'kubernetes_cluster', 94 | 'label_values(argocd_app_info{%(clusterLabel)s="$cluster", namespace=~"$namespace", job=~"$job"}, dest_server)' % $._config, 95 | ) + 96 | query.withDatasourceFromVariable(datasourceVariable) + 97 | query.withSort(1) + 98 | query.generalOptions.withLabel('Kubernetes Cluster') + 99 | query.selectionOptions.withMulti(true) + 100 | query.selectionOptions.withIncludeAll(true) + 101 | query.refresh.onLoad() + 102 | query.refresh.onTime(), 103 | 104 | local projectVariable = 105 | query.new( 106 | 'project', 107 | 'label_values(argocd_app_info{%(clusterLabel)s="$cluster", namespace=~"$namespace", job=~"$job", dest_server=~"$kubernetes_cluster"}, project)' % $._config 108 | ) + 109 | query.withDatasourceFromVariable(datasourceVariable) + 110 | query.withSort(1) + 111 | query.generalOptions.withLabel('Project') + 112 | query.selectionOptions.withMulti(true) + 113 | query.selectionOptions.withIncludeAll(true) + 114 | query.refresh.onLoad() + 115 | query.refresh.onTime(), 116 | 117 | local applicationVariable = 118 | query.new( 119 | 'application', 120 | 'label_values(argocd_app_info{%(clusterLabel)s="$cluster", namespace=~"$namespace", job=~"$job", dest_server=~"$kubernetes_cluster", project=~"$project"}, name)' % $._config 121 | ) + 122 | query.withDatasourceFromVariable(datasourceVariable) + 123 | query.withSort(1) + 124 | query.generalOptions.withLabel('Application') + 125 | query.selectionOptions.withMulti(true) + 126 | query.selectionOptions.withIncludeAll(false) + 127 | query.refresh.onLoad() + 128 | query.refresh.onTime(), 129 | 130 | local variables = [ 131 | datasourceVariable, 132 | clusterVariable, 133 | namespaceVariable, 134 | jobVariable, 135 | kubernetesClusterVariable, 136 | projectVariable, 137 | applicationVariable, 138 | ], 139 | 140 | local commonLabels = ||| 141 | %(clusterLabel)s="$cluster", 142 | namespace=~'$namespace', 143 | job=~'$job', 144 | dest_server=~'$kubernetes_cluster', 145 | project=~'$project', 146 | ||| % $._config, 147 | 148 | local appHealthStatusQuery = ||| 149 | sum( 150 | argocd_app_info{ 151 | %s 152 | } 153 | ) by (job, dest_server, project, health_status) 154 | ||| % commonLabels, 155 | 156 | local appHealthStatusTimeSeriesPanel = 157 | timeSeriesPanel.new( 158 | 'Application Health Status', 159 | ) + 160 | tsQueryOptions.withTargets( 161 | prometheus.new( 162 | '$datasource', 163 | appHealthStatusQuery, 164 | ) + 165 | prometheus.withLegendFormat( 166 | '{{ dest_server }}/{{ project }} - {{ health_status }}' 167 | ) 168 | ) + 169 | tsStandardOptions.withUnit('short') + 170 | tsOptions.tooltip.withMode('multi') + 171 | tsOptions.tooltip.withSort('desc') + 172 | tsLegend.withShowLegend(true) + 173 | tsLegend.withDisplayMode('table') + 174 | tsLegend.withPlacement('right') + 175 | tsLegend.withCalcs(['last', 'max']) + 176 | tsLegend.withSortBy('Last') + 177 | tsLegend.withSortDesc(true) + 178 | tsCustom.withFillOpacity(10), 179 | 180 | local appSyncStatusQuery = ||| 181 | sum( 182 | argocd_app_info{ 183 | %s 184 | } 185 | ) by (job, dest_server, project, sync_status) 186 | ||| % commonLabels, 187 | 188 | local appSyncStatusTimeSeriesPanel = 189 | timeSeriesPanel.new( 190 | 'Application Sync Status', 191 | ) + 192 | tsQueryOptions.withTargets( 193 | prometheus.new( 194 | '$datasource', 195 | appSyncStatusQuery, 196 | ) + 197 | prometheus.withLegendFormat( 198 | '{{ dest_server }}/{{ project }} - {{ sync_status }}', 199 | ) 200 | ) + 201 | tsStandardOptions.withUnit('short') + 202 | tsOptions.tooltip.withMode('multi') + 203 | tsOptions.tooltip.withSort('desc') + 204 | tsLegend.withShowLegend(true) + 205 | tsLegend.withDisplayMode('table') + 206 | tsLegend.withPlacement('right') + 207 | tsLegend.withCalcs(['last', 'max']) + 208 | tsLegend.withSortBy('Last') + 209 | tsLegend.withSortDesc(true) + 210 | tsCustom.withFillOpacity(10), 211 | 212 | local appSyncQuery = ||| 213 | sum( 214 | round( 215 | increase( 216 | argocd_app_sync_total{ 217 | %s 218 | }[$__rate_interval] 219 | ) 220 | ) 221 | ) by (job, dest_server, project, phase) 222 | ||| % commonLabels, 223 | 224 | local appSyncTimeSeriesPanel = 225 | timeSeriesPanel.new( 226 | 'Application Syncs', 227 | ) + 228 | tsQueryOptions.withTargets( 229 | prometheus.new( 230 | '$datasource', 231 | appSyncQuery, 232 | ) + 233 | prometheus.withLegendFormat( 234 | '{{ dest_server }}/{{ project }} - {{ phase }}', 235 | ) 236 | ) + 237 | tsStandardOptions.withUnit('short') + 238 | tsOptions.tooltip.withMode('multi') + 239 | tsOptions.tooltip.withSort('desc') + 240 | tsLegend.withShowLegend(true) + 241 | tsLegend.withDisplayMode('table') + 242 | tsLegend.withPlacement('right') + 243 | tsLegend.withCalcs(['last', 'max']) + 244 | tsLegend.withSortBy('Last') + 245 | tsLegend.withSortDesc(true) + 246 | tsCustom.withFillOpacity(10), 247 | 248 | local appAutoSyncStatusQuery = ||| 249 | sum( 250 | argocd_app_info{ 251 | %s 252 | } 253 | ) by (job, dest_server, project, autosync_enabled) 254 | ||| % commonLabels, 255 | 256 | local appAutoSyncStatusTimeSeriesPanel = 257 | timeSeriesPanel.new( 258 | 'Application Auto Sync Enabled', 259 | ) + 260 | tsQueryOptions.withTargets( 261 | prometheus.new( 262 | '$datasource', 263 | appAutoSyncStatusQuery, 264 | ) + 265 | prometheus.withLegendFormat( 266 | '{{ dest_server }}/{{ project }} - {{ autosync_enabled }}', 267 | ) 268 | ) + 269 | tsStandardOptions.withUnit('short') + 270 | tsOptions.tooltip.withMode('multi') + 271 | tsOptions.tooltip.withSort('desc') + 272 | tsLegend.withShowLegend(true) + 273 | tsLegend.withDisplayMode('table') + 274 | tsLegend.withPlacement('right') + 275 | tsLegend.withCalcs(['last', 'max']) + 276 | tsLegend.withSortBy('Last') + 277 | tsLegend.withSortDesc(true) + 278 | tsCustom.withFillOpacity(10), 279 | 280 | local appsDefined = std.length($._config.applications) != 0, 281 | local appBadgeContent = [ 282 | '| %(name)s | %(environment)s | [![App Status](%(baseUrl)s/api/badge?name=%(applicationName)s&revision=true)](%(baseUrl)s/applications/%(applicationName)s) |' % application { 283 | baseUrl: if std.objectHas(application, 'baseUrl') then application.baseUrl else $._config.argoCdUrl, 284 | applicationName: if std.objectHas(application, 'applicationName') then application.applicationName else application.name, 285 | } 286 | for application in $._config.applications 287 | ], 288 | local appBadgeTextPanel = 289 | textPanel.new( 290 | 'Application Badges', 291 | ) + 292 | textPanel.options.withMode('markdown') + 293 | textPanel.options.withContent( 294 | if appsDefined then ||| 295 | | Application | Environment | Status | 296 | | --- | --- | --- | 297 | %s 298 | ||| % std.join('\n', appBadgeContent) else 'No applications defined', 299 | ), 300 | 301 | local appUnhealthyQuery = ||| 302 | sum( 303 | argocd_app_info{ 304 | %s 305 | health_status!~"Healthy|Progressing" 306 | } 307 | ) by (job, dest_server, project, name, health_status) 308 | ||| % commonLabels, 309 | 310 | local appUnhealthyTablePanel = 311 | tablePanel.new( 312 | 'Applications Unhealthy', 313 | ) + 314 | tbStandardOptions.withUnit('short') + 315 | tbOptions.withSortBy( 316 | tbOptions.sortBy.withDisplayName('Application') 317 | ) + 318 | tbOptions.footer.withEnablePagination(true) + 319 | tbQueryOptions.withTargets( 320 | prometheus.new( 321 | '$datasource', 322 | appUnhealthyQuery, 323 | ) + 324 | prometheus.withFormat('table') + 325 | prometheus.withInstant(true) 326 | ) + 327 | tbQueryOptions.withTransformations([ 328 | tbQueryOptions.transformation.withId( 329 | 'organize' 330 | ) + 331 | tbQueryOptions.transformation.withOptions( 332 | { 333 | renameByName: { 334 | job: 'Job', 335 | dest_server: 'Cluster', 336 | project: 'Project', 337 | name: 'Application', 338 | health_status: 'Health Status', 339 | }, 340 | indexByName: { 341 | name: 0, 342 | project: 1, 343 | health_status: 2, 344 | }, 345 | excludeByName: { 346 | Time: true, 347 | job: true, 348 | dest_server: true, 349 | Value: true, 350 | }, 351 | } 352 | ), 353 | ]) + 354 | tbStandardOptions.withOverrides([ 355 | tbOverride.byName.new('name') + 356 | tbOverride.byName.withPropertiesFromOptions( 357 | tbStandardOptions.withLinks( 358 | tbPanelOptions.link.withTitle('Go To Application') + 359 | tbPanelOptions.link.withUrl( 360 | $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' 361 | ) + 362 | tbPanelOptions.link.withTargetBlank(true) 363 | ) 364 | ), 365 | tbOverride.byName.new('health_status') + 366 | tbOverride.byName.withPropertiesFromOptions( 367 | tbStandardOptions.color.withMode('fixed') + 368 | tbStandardOptions.color.withFixedColor('yellow') + 369 | tbCustom.withDisplayMode('color-background') 370 | ), 371 | ]), 372 | 373 | local appOutOfSyncQuery = ||| 374 | sum( 375 | argocd_app_info{ 376 | %s 377 | sync_status!="Synced" 378 | } 379 | ) by (job, dest_server, project, name, sync_status) > 0 380 | ||| % commonLabels, 381 | 382 | local appOutOfSyncTablePanel = 383 | tablePanel.new( 384 | 'Applications Out Of Sync', 385 | ) + 386 | tbStandardOptions.withUnit('short') + 387 | tbOptions.withSortBy( 388 | tbOptions.sortBy.withDisplayName('Application') 389 | ) + 390 | tbOptions.footer.withEnablePagination(true) + 391 | tbQueryOptions.withTargets( 392 | prometheus.new( 393 | '$datasource', 394 | appOutOfSyncQuery, 395 | ) + 396 | prometheus.withFormat('table') + 397 | prometheus.withInstant(true) 398 | ) + 399 | tbQueryOptions.withTransformations([ 400 | tbQueryOptions.transformation.withId( 401 | 'organize' 402 | ) + 403 | tbQueryOptions.transformation.withOptions( 404 | { 405 | renameByName: { 406 | job: 'Job', 407 | dest_server: 'Cluster', 408 | project: 'Project', 409 | name: 'Application', 410 | sync_status: 'Sync Status', 411 | }, 412 | indexByName: { 413 | name: 0, 414 | project: 1, 415 | sync_status: 2, 416 | }, 417 | excludeByName: { 418 | Time: true, 419 | job: true, 420 | dest_server: true, 421 | Value: true, 422 | }, 423 | } 424 | ), 425 | ]) + 426 | tbStandardOptions.withOverrides([ 427 | tbOverride.byName.new('name') + 428 | tbOverride.byName.withPropertiesFromOptions( 429 | tbStandardOptions.withLinks( 430 | tbPanelOptions.link.withTitle('Go To Application') + 431 | tbPanelOptions.link.withUrl( 432 | $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' 433 | ) + 434 | tbPanelOptions.link.withTargetBlank(true) 435 | ) 436 | ), 437 | tbOverride.byName.new('sync_status') + 438 | tbOverride.byName.withPropertiesFromOptions( 439 | tbStandardOptions.color.withMode('fixed') + 440 | tbStandardOptions.color.withFixedColor('yellow') + 441 | tbCustom.withDisplayMode('color-background') 442 | ), 443 | ]), 444 | 445 | local appSync7dQuery = ||| 446 | sum( 447 | round( 448 | increase( 449 | argocd_app_sync_total{ 450 | %s 451 | phase!="Succeeded" 452 | }[7d] 453 | ) 454 | ) 455 | ) by (job, dest_server, project, name, phase) > 0 456 | ||| % commonLabels, 457 | 458 | local appSync7dTablePanel = 459 | tablePanel.new( 460 | 'Applications That Failed to Sync[7d]', 461 | ) + 462 | tbStandardOptions.withUnit('short') + 463 | tbOptions.withSortBy( 464 | tbOptions.sortBy.withDisplayName('Application') 465 | ) + 466 | tbOptions.footer.withEnablePagination(true) + 467 | tbQueryOptions.withTargets( 468 | prometheus.new( 469 | '$datasource', 470 | appSync7dQuery, 471 | ) + 472 | prometheus.withFormat('table') + 473 | prometheus.withInstant(true) 474 | ) + 475 | tbQueryOptions.withTransformations([ 476 | tbQueryOptions.transformation.withId( 477 | 'organize' 478 | ) + 479 | tbQueryOptions.transformation.withOptions( 480 | { 481 | renameByName: { 482 | job: 'Job', 483 | dest_server: 'Cluster', 484 | project: 'Project', 485 | name: 'Application', 486 | phase: 'Phase', 487 | Value: 'Count', 488 | }, 489 | indexByName: { 490 | name: 0, 491 | project: 1, 492 | phase: 2, 493 | }, 494 | excludeByName: { 495 | Time: true, 496 | job: true, 497 | dest_server: true, 498 | }, 499 | } 500 | ), 501 | ]) + 502 | tbStandardOptions.withOverrides([ 503 | tbOverride.byName.new('name') + 504 | tbOverride.byName.withPropertiesFromOptions( 505 | tbStandardOptions.withLinks( 506 | tbPanelOptions.link.withTitle('Go To Application') + 507 | tbPanelOptions.link.withUrl( 508 | $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' 509 | ) + 510 | tbPanelOptions.link.withTargetBlank(true) 511 | ) 512 | ), 513 | tbOverride.byName.new('Value') + 514 | tbOverride.byName.withPropertiesFromOptions( 515 | tbStandardOptions.color.withMode('fixed') + 516 | tbStandardOptions.color.withFixedColor('yellow') + 517 | tbCustom.withDisplayMode('color-background') 518 | ), 519 | ]), 520 | 521 | local appAutoSyncDisabledQuery = ||| 522 | sum( 523 | argocd_app_info{ 524 | %s 525 | autosync_enabled!="true" 526 | } 527 | ) by (job, dest_server, project, name, autosync_enabled) > 0 528 | ||| % commonLabels, 529 | 530 | local appAutoSyncDisabledTablePanel = 531 | tablePanel.new( 532 | 'Applications With Auto Sync Disabled', 533 | ) + 534 | tbStandardOptions.withUnit('short') + 535 | tbOptions.withSortBy( 536 | tbOptions.sortBy.withDisplayName('Application') 537 | ) + 538 | tbOptions.footer.withEnablePagination(true) + 539 | tbQueryOptions.withTargets( 540 | prometheus.new( 541 | '$datasource', 542 | appAutoSyncDisabledQuery, 543 | ) + 544 | prometheus.withFormat('table') + 545 | prometheus.withInstant(true) 546 | ) + 547 | tbQueryOptions.withTransformations([ 548 | tbQueryOptions.transformation.withId( 549 | 'organize' 550 | ) + 551 | tbQueryOptions.transformation.withOptions( 552 | { 553 | renameByName: { 554 | job: 'Job', 555 | dest_server: 'Kubernetes Cluster', 556 | project: 'Project', 557 | name: 'Application', 558 | autosync_enabled: 'Auto Sync Enabled', 559 | }, 560 | indexByName: { 561 | name: 0, 562 | project: 1, 563 | autosync_enabled: 2, 564 | }, 565 | excludeByName: { 566 | Time: true, 567 | job: true, 568 | dest_server: true, 569 | Value: true, 570 | }, 571 | } 572 | ), 573 | ]) + 574 | tbStandardOptions.withOverrides([ 575 | tbOverride.byName.new('name') + 576 | tbOverride.byName.withPropertiesFromOptions( 577 | tbStandardOptions.withLinks( 578 | tbPanelOptions.link.withTitle('Go To Application') + 579 | tbPanelOptions.link.withUrl( 580 | $._config.argoCdUrl + '/applications/${__data.fields.Project}/${__value.raw}' 581 | ) + 582 | tbPanelOptions.link.withTargetBlank(true) 583 | ) 584 | ), 585 | tbOverride.byName.new('autosync_enabled') + 586 | tbOverride.byName.withPropertiesFromOptions( 587 | tbStandardOptions.color.withMode('fixed') + 588 | tbStandardOptions.color.withFixedColor('yellow') + 589 | tbCustom.withDisplayMode('color-background') 590 | ), 591 | ]), 592 | 593 | local appHealthStatusByAppQuery = ||| 594 | sum( 595 | argocd_app_info{ 596 | %s 597 | name=~"$application", 598 | } 599 | ) by (namespace, job, dest_server, project, name, health_status) 600 | ||| % commonLabels, 601 | 602 | local appHealthStatusByAppTimeSeriesPanel = 603 | timeSeriesPanel.new( 604 | 'Application Health Status', 605 | ) + 606 | tsQueryOptions.withTargets( 607 | prometheus.new( 608 | '$datasource', 609 | appHealthStatusByAppQuery, 610 | ) + 611 | prometheus.withLegendFormat( 612 | '{{ dest_server }}/{{ project }}/{{ name }} - {{ health_status }}' 613 | ) 614 | ) + 615 | tsQueryOptions.withInterval('5m') + 616 | tsStandardOptions.withUnit('short') + 617 | tsOptions.tooltip.withMode('multi') + 618 | tsOptions.tooltip.withSort('desc') + 619 | tsLegend.withShowLegend(true) + 620 | tsLegend.withDisplayMode('table') + 621 | tsLegend.withCalcs(['last']) + 622 | tsLegend.withSortBy('Last') + 623 | tsLegend.withSortDesc(true) + 624 | tsCustom.withFillOpacity(10), 625 | 626 | local appSyncStatusByAppQuery = ||| 627 | sum( 628 | argocd_app_info{ 629 | %s 630 | name=~"$application", 631 | } 632 | ) by (namespace, job, dest_server, project, name, sync_status) 633 | ||| % commonLabels, 634 | 635 | local appSyncStatusByAppTimeSeriesPanel = 636 | timeSeriesPanel.new( 637 | 'Application Sync Status', 638 | ) + 639 | tsQueryOptions.withTargets( 640 | prometheus.new( 641 | '$datasource', 642 | appSyncStatusByAppQuery, 643 | ) + 644 | prometheus.withLegendFormat( 645 | '{{ dest_server }}/{{ project }}/{{ name }} - {{ sync_status }}' 646 | ) 647 | ) + 648 | tsQueryOptions.withInterval('5m') + 649 | tsStandardOptions.withUnit('short') + 650 | tsOptions.tooltip.withMode('multi') + 651 | tsOptions.tooltip.withSort('desc') + 652 | tsLegend.withShowLegend(true) + 653 | tsLegend.withDisplayMode('table') + 654 | tsLegend.withCalcs(['last']) + 655 | tsLegend.withSortBy('Last') + 656 | tsLegend.withSortDesc(true) + 657 | tsCustom.withFillOpacity(10), 658 | 659 | local appSyncByAppQuery = ||| 660 | sum( 661 | round( 662 | increase( 663 | argocd_app_sync_total{ 664 | %s 665 | name=~"$application", 666 | }[$__rate_interval] 667 | ) 668 | ) 669 | ) by (namespace, job, dest_server, project, name, phase) 670 | ||| % commonLabels, 671 | 672 | local appSyncByAppTimeSeriesPanel = 673 | timeSeriesPanel.new( 674 | 'Application Sync Result', 675 | ) + 676 | tsQueryOptions.withTargets( 677 | prometheus.new( 678 | '$datasource', 679 | appSyncByAppQuery, 680 | ) + 681 | prometheus.withLegendFormat( 682 | '{{ dest_server }}/{{ project }}/{{ name }} - {{ phase }}' 683 | ) 684 | ) + 685 | tsQueryOptions.withInterval('5m') + 686 | tsStandardOptions.withUnit('short') + 687 | tsOptions.tooltip.withMode('multi') + 688 | tsOptions.tooltip.withSort('desc') + 689 | tsLegend.withShowLegend(true) + 690 | tsLegend.withDisplayMode('table') + 691 | tsLegend.withCalcs(['last']) + 692 | tsLegend.withSortBy('Last') + 693 | tsLegend.withSortDesc(true) + 694 | tsCustom.withFillOpacity(10), 695 | 696 | local summaryRow = 697 | row.new( 698 | 'Summary by Kubernetes Cluster, Project' 699 | ), 700 | 701 | local appSummaryRow = 702 | row.new( 703 | 'Applications (Unhealthy/OutOfSync/AutoSyncDisabled) Summary', 704 | ), 705 | 706 | local appRow = 707 | row.new( 708 | 'Application ($application)', 709 | ), 710 | 711 | 'argo-cd-application-overview.json': 712 | $._config.bypassDashboardValidation + 713 | dashboard.new( 714 | 'ArgoCD / Application / Overview', 715 | ) + 716 | dashboard.withDescription('A dashboard that monitors ArgoCD with a focus on Application status. It is created using the [argo-cd-mixin](https://github.com/adinhodovic/argo-cd-mixin). Requires custom configuration to add application badges. Please refer to the mixin.') + 717 | dashboard.withUid($._config.applicationOverviewDashboardUid) + 718 | dashboard.withTags($._config.tags) + 719 | dashboard.withTimezone('utc') + 720 | dashboard.withEditable(true) + 721 | dashboard.time.withFrom('now-6h') + 722 | dashboard.time.withTo('now') + 723 | dashboard.withVariables(variables) + 724 | dashboard.withLinks( 725 | [ 726 | dashboard.link.dashboards.new('ArgoCD Dashboards', $._config.tags) + 727 | dashboard.link.link.options.withTargetBlank(true), 728 | ] 729 | ) + 730 | dashboard.withPanels( 731 | [ 732 | summaryRow + 733 | row.gridPos.withX(0) + 734 | row.gridPos.withY(0) + 735 | row.gridPos.withW(24) + 736 | row.gridPos.withH(1), 737 | appHealthStatusTimeSeriesPanel + 738 | timeSeriesPanel.gridPos.withX(0) + 739 | timeSeriesPanel.gridPos.withY(1) + 740 | timeSeriesPanel.gridPos.withW(9) + 741 | timeSeriesPanel.gridPos.withH(5), 742 | appSyncStatusTimeSeriesPanel + 743 | timeSeriesPanel.gridPos.withX(9) + 744 | timeSeriesPanel.gridPos.withY(1) + 745 | timeSeriesPanel.gridPos.withW(9) + 746 | timeSeriesPanel.gridPos.withH(5), 747 | appSyncTimeSeriesPanel + 748 | timeSeriesPanel.gridPos.withX(0) + 749 | timeSeriesPanel.gridPos.withY(6) + 750 | timeSeriesPanel.gridPos.withW(9) + 751 | timeSeriesPanel.gridPos.withH(5), 752 | appAutoSyncStatusTimeSeriesPanel + 753 | timeSeriesPanel.gridPos.withX(9) + 754 | timeSeriesPanel.gridPos.withY(6) + 755 | timeSeriesPanel.gridPos.withW(9) + 756 | timeSeriesPanel.gridPos.withH(5), 757 | appBadgeTextPanel + 758 | textPanel.gridPos.withX(18) + 759 | textPanel.gridPos.withY(1) + 760 | textPanel.gridPos.withW(6) + 761 | textPanel.gridPos.withH(10), 762 | appSummaryRow + 763 | timeSeriesPanel.gridPos.withX(0) + 764 | timeSeriesPanel.gridPos.withY(11) + 765 | timeSeriesPanel.gridPos.withW(18) + 766 | timeSeriesPanel.gridPos.withH(1), 767 | ] + 768 | grid.makeGrid( 769 | [ 770 | appUnhealthyTablePanel, 771 | appOutOfSyncTablePanel, 772 | appSync7dTablePanel, 773 | appAutoSyncDisabledTablePanel, 774 | ], 775 | panelWidth=12, 776 | panelHeight=6, 777 | startY=12 778 | ) + 779 | [ 780 | appRow + 781 | row.gridPos.withX(0) + 782 | row.gridPos.withY(23) + 783 | row.gridPos.withW(24) + 784 | row.gridPos.withH(1), 785 | ] 786 | + 787 | grid.makeGrid( 788 | [ 789 | appHealthStatusByAppTimeSeriesPanel, 790 | appSyncStatusByAppTimeSeriesPanel, 791 | appSyncByAppTimeSeriesPanel, 792 | ], 793 | panelWidth=8, 794 | panelHeight=8, 795 | startY=24 796 | ) 797 | ) + 798 | if $._config.annotation.enabled then 799 | dashboard.withAnnotations($._config.customAnnotation) 800 | else {}, 801 | }, 802 | } 803 | -------------------------------------------------------------------------------- /dashboards/argo-cd-notifications.libsonnet: -------------------------------------------------------------------------------- 1 | local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; 2 | local dashboard = g.dashboard; 3 | local row = g.panel.row; 4 | local grid = g.util.grid; 5 | 6 | local timeSeriesPanel = g.panel.timeSeries; 7 | 8 | local variable = dashboard.variable; 9 | local datasource = variable.datasource; 10 | local query = variable.query; 11 | local prometheus = g.query.prometheus; 12 | 13 | // Timeseries 14 | local tsOptions = timeSeriesPanel.options; 15 | local tsStandardOptions = timeSeriesPanel.standardOptions; 16 | local tsQueryOptions = timeSeriesPanel.queryOptions; 17 | local tsFieldConfig = timeSeriesPanel.fieldConfig; 18 | local tsCustom = tsFieldConfig.defaults.custom; 19 | local tsLegend = tsOptions.legend; 20 | 21 | { 22 | grafanaDashboards+:: { 23 | 24 | local datasourceVariable = 25 | datasource.new( 26 | 'datasource', 27 | 'prometheus', 28 | ) + 29 | datasource.generalOptions.withLabel('Data source') + 30 | { 31 | current: { 32 | selected: true, 33 | text: $._config.datasourceName, 34 | value: $._config.datasourceName, 35 | }, 36 | }, 37 | 38 | local clusterVariable = 39 | query.new( 40 | $._config.clusterLabel, 41 | 'label_values(argocd_notifications_deliveries_total{}, cluster)' % $._config, 42 | ) + 43 | query.withDatasourceFromVariable(datasourceVariable) + 44 | query.withSort() + 45 | query.generalOptions.withLabel('Cluster') + 46 | query.refresh.onLoad() + 47 | query.refresh.onTime() + 48 | ( 49 | if $._config.showMultiCluster 50 | then query.generalOptions.showOnDashboard.withLabelAndValue() 51 | else query.generalOptions.showOnDashboard.withNothing() 52 | ), 53 | 54 | local namespaceVariable = 55 | query.new( 56 | 'namespace', 57 | 'label_values(argocd_notifications_deliveries_total{%(clusterLabel)s="$cluster"}, namespace)' % $._config, 58 | ) + 59 | query.withDatasourceFromVariable(datasourceVariable) + 60 | query.withSort(1) + 61 | query.generalOptions.withLabel('Namespace') + 62 | query.selectionOptions.withMulti(true) + 63 | query.selectionOptions.withIncludeAll(true) + 64 | query.refresh.onLoad() + 65 | query.refresh.onTime(), 66 | 67 | local jobVariable = 68 | query.new( 69 | 'job', 70 | 'label_values(argocd_notifications_deliveries_total{%(clusterLabel)s="$cluster", namespace=~"$namespace"}, job)' % $._config, 71 | ) + 72 | query.withDatasourceFromVariable(datasourceVariable) + 73 | query.withSort(1) + 74 | query.generalOptions.withLabel('Job') + 75 | query.selectionOptions.withMulti(true) + 76 | query.selectionOptions.withIncludeAll(true, '.*') + 77 | query.refresh.onLoad() + 78 | query.refresh.onTime(), 79 | 80 | local exportedServiceVariable = 81 | query.new( 82 | 'exported_service', 83 | 'label_values(argocd_notifications_deliveries_total{%(clusterLabel)s="$cluster", namespace=~"$namespace", job=~"$job"}, exported_service)' % $._config, 84 | ) + 85 | query.withDatasourceFromVariable(datasourceVariable) + 86 | query.withSort(1) + 87 | query.generalOptions.withLabel('Notifications Service') + 88 | query.selectionOptions.withMulti(true) + 89 | query.selectionOptions.withIncludeAll(true) + 90 | query.refresh.onLoad() + 91 | query.refresh.onTime(), 92 | 93 | local variables = [ 94 | datasourceVariable, 95 | clusterVariable, 96 | namespaceVariable, 97 | jobVariable, 98 | exportedServiceVariable, 99 | ], 100 | 101 | local commonLabels = ||| 102 | %(clusterLabel)s="$cluster", 103 | namespace=~'$namespace', 104 | job=~'$job', 105 | ||| % $._config, 106 | 107 | local deliveriesQuery = ||| 108 | sum( 109 | round( 110 | increase( 111 | argocd_notifications_deliveries_total{ 112 | %s 113 | exported_service=~"$exported_service", 114 | }[$__rate_interval] 115 | ) 116 | ) 117 | ) by (job, exported_service, succeeded) 118 | ||| % commonLabels, 119 | 120 | local deliveriesTimeSeriesPanel = 121 | timeSeriesPanel.new( 122 | 'Notification Deliveries', 123 | ) + 124 | tsQueryOptions.withTargets( 125 | prometheus.new( 126 | '$datasource', 127 | deliveriesQuery, 128 | ) + 129 | prometheus.withLegendFormat( 130 | '{{ exported_service }} - Succeeded: {{ succeeded }}' 131 | ) 132 | ) + 133 | tsStandardOptions.withUnit('short') + 134 | tsOptions.tooltip.withMode('multi') + 135 | tsOptions.tooltip.withSort('desc') + 136 | tsLegend.withShowLegend(true) + 137 | tsLegend.withDisplayMode('table') + 138 | tsLegend.withPlacement('right') + 139 | tsLegend.withCalcs(['last', 'max']) + 140 | tsLegend.withSortBy('Last') + 141 | tsLegend.withSortDesc(true) + 142 | tsCustom.withFillOpacity(10), 143 | 144 | local triggerEvalQuery = ||| 145 | sum( 146 | round( 147 | increase( 148 | argocd_notifications_trigger_eval_total{ 149 | %s 150 | }[$__rate_interval] 151 | ) 152 | ) 153 | ) by (job, name, triggered) 154 | ||| % commonLabels, 155 | 156 | local triggerEvalTimeSeriesPanel = 157 | timeSeriesPanel.new( 158 | 'Trigger Evaluations', 159 | ) + 160 | tsQueryOptions.withTargets( 161 | prometheus.new( 162 | '$datasource', 163 | triggerEvalQuery, 164 | ) + 165 | prometheus.withLegendFormat( 166 | '{{ name }} - Triggered: {{ triggered }}', 167 | ) 168 | ) + 169 | tsStandardOptions.withUnit('short') + 170 | tsOptions.tooltip.withMode('multi') + 171 | tsOptions.tooltip.withSort('desc') + 172 | tsLegend.withShowLegend(true) + 173 | tsLegend.withDisplayMode('table') + 174 | tsLegend.withPlacement('right') + 175 | tsLegend.withCalcs(['last', 'max']) + 176 | tsLegend.withSortBy('Last') + 177 | tsLegend.withSortDesc(true) + 178 | tsCustom.withFillOpacity(10), 179 | 180 | local summaryRow = 181 | row.new( 182 | title='Summary' 183 | ), 184 | 185 | 'argo-cd-notifications-overview.json': 186 | $._config.bypassDashboardValidation + 187 | dashboard.new( 188 | 'ArgoCD / Notifications / Overview', 189 | ) + 190 | dashboard.withDescription('A dashboard that monitors ArgoCD notifications. It is created using the [argo-cd-mixin](https://github.com/adinhodovic/argo-cd-mixin).') + 191 | dashboard.withUid($._config.notificationsOverviewDashboardUid) + 192 | dashboard.withTags($._config.tags) + 193 | dashboard.withTimezone('utc') + 194 | dashboard.withEditable(true) + 195 | dashboard.time.withFrom('now-2d') + 196 | dashboard.time.withTo('now') + 197 | dashboard.withVariables(variables) + 198 | dashboard.withLinks( 199 | [ 200 | dashboard.link.dashboards.new('ArgoCD Dashboards', $._config.tags) + 201 | dashboard.link.link.options.withTargetBlank(true), 202 | ] 203 | ) + 204 | dashboard.withPanels( 205 | [ 206 | summaryRow + 207 | row.gridPos.withX(0) + 208 | row.gridPos.withY(0) + 209 | row.gridPos.withW(24) + 210 | row.gridPos.withH(1), 211 | ] + 212 | grid.makeGrid( 213 | [ 214 | deliveriesTimeSeriesPanel, 215 | triggerEvalTimeSeriesPanel, 216 | ], 217 | panelWidth=12, 218 | panelHeight=8, 219 | startY=1 220 | ) 221 | ) + 222 | if $._config.annotation.enabled then 223 | dashboard.withAnnotations($._config.customAnnotation) 224 | else {}, 225 | }, 226 | } 227 | -------------------------------------------------------------------------------- /dashboards/argo-cd-operational.libsonnet: -------------------------------------------------------------------------------- 1 | local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; 2 | local dashboard = g.dashboard; 3 | local row = g.panel.row; 4 | local grid = g.util.grid; 5 | 6 | local statPanel = g.panel.stat; 7 | local pieChartPanel = g.panel.pieChart; 8 | local tablePanel = g.panel.table; 9 | local timeSeriesPanel = g.panel.timeSeries; 10 | local heatmapPanel = g.panel.heatmap; 11 | 12 | local variable = dashboard.variable; 13 | local datasource = variable.datasource; 14 | local query = variable.query; 15 | local prometheus = g.query.prometheus; 16 | 17 | // Pie Chart 18 | local pcOptions = pieChartPanel.options; 19 | local pcStandardOptions = pieChartPanel.standardOptions; 20 | local pcOverride = pcStandardOptions.override; 21 | local pcLegend = pcOptions.legend; 22 | 23 | // Timeseries 24 | local tsOptions = timeSeriesPanel.options; 25 | local tsStandardOptions = timeSeriesPanel.standardOptions; 26 | local tsQueryOptions = timeSeriesPanel.queryOptions; 27 | local tsFieldConfig = timeSeriesPanel.fieldConfig; 28 | local tsCustom = tsFieldConfig.defaults.custom; 29 | local tsLegend = tsOptions.legend; 30 | 31 | // Table 32 | local tbOptions = tablePanel.options; 33 | local tbStandardOptions = tablePanel.standardOptions; 34 | local tbQueryOptions = tablePanel.queryOptions; 35 | local tbPanelOptions = tablePanel.panelOptions; 36 | local tbOverride = tbStandardOptions.override; 37 | 38 | // HeatmapPanel 39 | local hmStandardOptions = heatmapPanel.standardOptions; 40 | local hmQueryOptions = heatmapPanel.queryOptions; 41 | 42 | { 43 | grafanaDashboards+:: { 44 | 45 | local datasourceVariable = 46 | datasource.new( 47 | 'datasource', 48 | 'prometheus', 49 | ) + 50 | datasource.generalOptions.withLabel('Data source') + 51 | { 52 | current: { 53 | selected: true, 54 | text: $._config.datasourceName, 55 | value: $._config.datasourceName, 56 | }, 57 | }, 58 | 59 | local clusterVariable = 60 | query.new( 61 | $._config.clusterLabel, 62 | 'label_values(argocd_app_info{}, cluster)' % $._config, 63 | ) + 64 | query.withDatasourceFromVariable(datasourceVariable) + 65 | query.withSort() + 66 | query.generalOptions.withLabel('Cluster') + 67 | query.refresh.onLoad() + 68 | query.refresh.onTime() + 69 | ( 70 | if $._config.showMultiCluster 71 | then query.generalOptions.showOnDashboard.withLabelAndValue() 72 | else query.generalOptions.showOnDashboard.withNothing() 73 | ), 74 | 75 | local namespaceVariable = 76 | query.new( 77 | 'namespace', 78 | 'label_values(argocd_app_info{%(clusterLabel)s="$cluster"}, namespace)' % $._config, 79 | ) + 80 | query.withDatasourceFromVariable(datasourceVariable) + 81 | query.withSort(1) + 82 | query.generalOptions.withLabel('Namespace') + 83 | query.selectionOptions.withMulti(true) + 84 | query.selectionOptions.withIncludeAll(true) + 85 | query.refresh.onLoad() + 86 | query.refresh.onTime(), 87 | 88 | // We use operational metrics from multiple Argo CD jobs, hence we need to use a regex. 89 | local jobVariable = 90 | query.new( 91 | 'job', 92 | 'label_values(job)', 93 | ) + 94 | query.withDatasourceFromVariable(datasourceVariable) + 95 | query.withSort(1) + 96 | query.withRegex('argo.*') + 97 | query.generalOptions.withLabel('Job') + 98 | query.selectionOptions.withMulti(true) + 99 | query.selectionOptions.withIncludeAll(true, '.*') + 100 | query.refresh.onLoad() + 101 | query.refresh.onTime(), 102 | 103 | local kubernetesClusterVariable = 104 | query.new( 105 | 'kubernetes_cluster', 106 | 'label_values(argocd_app_info{%(clusterLabel)s="$cluster", namespace=~"$namespace", job=~"$job"}, dest_server)' % $._config, 107 | ) + 108 | query.withDatasourceFromVariable(datasourceVariable) + 109 | query.withSort(1) + 110 | query.generalOptions.withLabel('Kubernetes Cluster') + 111 | query.selectionOptions.withMulti(true) + 112 | query.selectionOptions.withIncludeAll(true) + 113 | query.refresh.onLoad() + 114 | query.refresh.onTime(), 115 | 116 | local projectVariable = 117 | query.new( 118 | 'project', 119 | 'label_values(argocd_app_info{%(clusterLabel)s="$cluster", namespace=~"$namespace", job=~"$job", dest_server=~"$kubernetes_cluster"}, project)' % $._config 120 | ) + 121 | query.withDatasourceFromVariable(datasourceVariable) + 122 | query.withSort(1) + 123 | query.generalOptions.withLabel('Project') + 124 | query.selectionOptions.withMulti(true) + 125 | query.selectionOptions.withIncludeAll(true) + 126 | query.refresh.onLoad() + 127 | query.refresh.onTime(), 128 | 129 | local variables = [ 130 | datasourceVariable, 131 | clusterVariable, 132 | namespaceVariable, 133 | jobVariable, 134 | kubernetesClusterVariable, 135 | projectVariable, 136 | ], 137 | 138 | local commonLabels = ||| 139 | %(clusterLabel)s="$cluster", 140 | namespace=~'$namespace', 141 | job=~'$job', 142 | dest_server=~'$kubernetes_cluster', 143 | project=~'$project', 144 | ||| % $._config, 145 | 146 | local clustersCountQuery = ||| 147 | sum( 148 | argocd_cluster_info{ 149 | %(clusterLabel)s="$cluster", 150 | namespace=~'$namespace', 151 | job=~'$job' 152 | } 153 | ) 154 | ||| % $._config, 155 | 156 | local clustersCountStatPanel = 157 | statPanel.new( 158 | 'Clusters', 159 | ) + 160 | statPanel.standardOptions.withUnit('short') + 161 | statPanel.queryOptions.withTargets( 162 | prometheus.new( 163 | '$datasource', 164 | clustersCountQuery, 165 | ) 166 | ), 167 | 168 | local repositoriesCountQuery = ||| 169 | count( 170 | count( 171 | argocd_app_info{ 172 | %(clusterLabel)s="$cluster", 173 | namespace=~'$namespace', 174 | job=~'$job' 175 | } 176 | ) 177 | by (repo) 178 | ) 179 | ||| % $._config, 180 | 181 | local repositoriesCountStatPanel = 182 | statPanel.new( 183 | 'Repositories', 184 | ) + 185 | statPanel.standardOptions.withUnit('short') + 186 | statPanel.queryOptions.withTargets( 187 | prometheus.new( 188 | '$datasource', 189 | repositoriesCountQuery, 190 | ) 191 | ), 192 | 193 | local appsCountQuery = ||| 194 | sum( 195 | argocd_app_info{ 196 | %s 197 | } 198 | ) 199 | ||| % commonLabels, 200 | 201 | local appsCountStatPanel = 202 | statPanel.new( 203 | 'Applications', 204 | ) + 205 | statPanel.standardOptions.withUnit('short') + 206 | statPanel.queryOptions.withTargets( 207 | prometheus.new( 208 | '$datasource', 209 | appsCountQuery, 210 | ) 211 | ), 212 | 213 | local healthStatusQuery = ||| 214 | sum( 215 | argocd_app_info{ 216 | %s 217 | } 218 | ) by (health_status) 219 | ||| % commonLabels, 220 | 221 | local healthStatusPieChartPanel = 222 | pieChartPanel.new( 223 | 'Health Status', 224 | ) + 225 | pieChartPanel.queryOptions.withTargets( 226 | prometheus.new( 227 | '$datasource', 228 | healthStatusQuery, 229 | ) + 230 | prometheus.withInstant(true) + 231 | prometheus.withLegendFormat( 232 | '{{ health_status }}' 233 | ) 234 | ) + 235 | pcStandardOptions.withUnit('short') + 236 | pcOptions.tooltip.withMode('multi') + 237 | pcLegend.withShowLegend(true) + 238 | pcLegend.withDisplayMode('table') + 239 | pcLegend.withPlacement('right') + 240 | pcLegend.withValues(['value']) + 241 | pcStandardOptions.withOverrides([ 242 | pcOverride.byName.new('Healthy') + 243 | pcOverride.byName.withPropertiesFromOptions( 244 | pcStandardOptions.color.withMode('fixed') + 245 | pcStandardOptions.color.withFixedColor('green') 246 | ), 247 | pcOverride.byName.new('Degraded') + 248 | pcOverride.byName.withPropertiesFromOptions( 249 | pcStandardOptions.color.withMode('fixed') + 250 | pcStandardOptions.color.withFixedColor('red') 251 | ), 252 | pcOverride.byName.new('Progressing') + 253 | pcOverride.byName.withPropertiesFromOptions( 254 | pcStandardOptions.color.withMode('fixed') + 255 | pcStandardOptions.color.withFixedColor('yellow') 256 | ), 257 | ]), 258 | 259 | local syncStatusQuery = ||| 260 | sum( 261 | argocd_app_info{ 262 | %s 263 | } 264 | ) by (sync_status) 265 | ||| % commonLabels, 266 | 267 | local syncStatusPieChartPanel = 268 | pieChartPanel.new( 269 | 'Sync Status', 270 | ) + 271 | pieChartPanel.queryOptions.withTargets( 272 | prometheus.new( 273 | '$datasource', 274 | syncStatusQuery, 275 | ) + 276 | prometheus.withInstant(true) + 277 | prometheus.withLegendFormat( 278 | '{{ sync_status }}' 279 | ) 280 | ) + 281 | pcStandardOptions.withUnit('short') + 282 | pcOptions.tooltip.withMode('multi') + 283 | pcLegend.withShowLegend(true) + 284 | pcLegend.withDisplayMode('table') + 285 | pcLegend.withPlacement('right') + 286 | pcLegend.withValues(['value']) + 287 | pcStandardOptions.withOverrides([ 288 | pcOverride.byName.new('Synced') + 289 | pcOverride.byName.withPropertiesFromOptions( 290 | pcStandardOptions.color.withMode('fixed') + 291 | pcStandardOptions.color.withFixedColor('green') 292 | ), 293 | pcOverride.byName.new('OutOfSync') + 294 | pcOverride.byName.withPropertiesFromOptions( 295 | pcStandardOptions.color.withMode('fixed') + 296 | pcStandardOptions.color.withFixedColor('red') 297 | ), 298 | pcOverride.byName.new('Unknown') + 299 | pcOverride.byName.withPropertiesFromOptions( 300 | pcStandardOptions.color.withMode('fixed') + 301 | pcStandardOptions.color.withFixedColor('yellow') 302 | ), 303 | ]), 304 | 305 | local appsQuery = ||| 306 | sum( 307 | argocd_app_info{ 308 | %s 309 | } 310 | ) by (job, dest_server, project, name, health_status, sync_status) 311 | ||| % commonLabels, 312 | 313 | local appsTablePanel = 314 | tablePanel.new( 315 | 'Applications', 316 | ) + 317 | tbStandardOptions.withUnit('short') + 318 | tbOptions.withSortBy( 319 | tbOptions.sortBy.withDisplayName('Application') 320 | ) + 321 | tbOptions.footer.withEnablePagination(true) + 322 | tbQueryOptions.withTargets( 323 | prometheus.new( 324 | '$datasource', 325 | appsQuery, 326 | ) + 327 | prometheus.withFormat('table') + 328 | prometheus.withInstant(true) 329 | ) + 330 | tbQueryOptions.withTransformations([ 331 | tbQueryOptions.transformation.withId( 332 | 'organize' 333 | ) + 334 | tbQueryOptions.transformation.withOptions( 335 | { 336 | renameByName: { 337 | job: 'Job', 338 | dest_server: 'Kubernetes Cluster', 339 | project: 'Project', 340 | name: 'Application', 341 | health_status: 'Health Status', 342 | sync_status: 'Sync Status', 343 | }, 344 | indexByName: { 345 | name: 0, 346 | project: 1, 347 | health_status: 2, 348 | sync_status: 3, 349 | }, 350 | excludeByName: { 351 | Time: true, 352 | job: true, 353 | dest_server: true, 354 | Value: true, 355 | }, 356 | } 357 | ), 358 | ]) + 359 | tbStandardOptions.withOverrides([ 360 | tbOverride.byName.new('name') + 361 | tbOverride.byName.withPropertiesFromOptions( 362 | tbStandardOptions.withLinks( 363 | tbPanelOptions.link.withTitle('Go To Application') + 364 | tbPanelOptions.link.withType('dashboard') + 365 | tbPanelOptions.link.withUrl( 366 | '/d/%s/argocd-notifications-overview?&var-project=${__data.fields.Project}&var-application=${__value.raw}' % $._config.applicationOverviewDashboardUid 367 | ) + 368 | tbPanelOptions.link.withTargetBlank(true) 369 | ) 370 | ), 371 | ]), 372 | 373 | local syncActivityQuery = ||| 374 | sum( 375 | round( 376 | increase( 377 | argocd_app_sync_total{ 378 | %s 379 | }[$__rate_interval] 380 | ) 381 | ) 382 | ) by (job, dest_server, project, name) 383 | ||| % commonLabels, 384 | 385 | local syncActivityTimeSeriesPanel = 386 | timeSeriesPanel.new( 387 | 'Sync Activity', 388 | ) + 389 | tsQueryOptions.withTargets( 390 | prometheus.new( 391 | '$datasource', 392 | syncActivityQuery, 393 | ) + 394 | prometheus.withLegendFormat( 395 | '{{ dest_server }}/{{ project }}/{{ name }}' 396 | ) 397 | ) + 398 | tsStandardOptions.withUnit('short') + 399 | tsOptions.tooltip.withMode('multi') + 400 | tsOptions.tooltip.withSort('desc') + 401 | tsLegend.withShowLegend(true) + 402 | tsLegend.withDisplayMode('table') + 403 | tsLegend.withPlacement('right') + 404 | tsLegend.withCalcs(['lastNotNull', 'mean', 'max']) + 405 | tsLegend.withSortBy('Mean') + 406 | tsLegend.withSortDesc(true) + 407 | tsCustom.withFillOpacity(10), 408 | 409 | local syncFailuresQuery = ||| 410 | sum( 411 | round( 412 | increase( 413 | argocd_app_sync_total{ 414 | %s 415 | phase=~"Error|Failed" 416 | }[$__rate_interval] 417 | ) 418 | ) 419 | ) by (job, dest_server, project, application, phase) 420 | ||| % commonLabels, 421 | 422 | local syncFailuresTimeSeriesPanel = 423 | timeSeriesPanel.new( 424 | 'Sync Failures', 425 | ) + 426 | tsQueryOptions.withTargets( 427 | prometheus.new( 428 | '$datasource', 429 | syncFailuresQuery, 430 | ) + 431 | prometheus.withLegendFormat( 432 | '{{ dest_server }}/{{ project }}/{{ application }} - {{ phase }}' 433 | ) 434 | ) + 435 | tsStandardOptions.withUnit('short') + 436 | tsOptions.tooltip.withMode('multi') + 437 | tsOptions.tooltip.withSort('desc') + 438 | tsLegend.withShowLegend(true) + 439 | tsLegend.withDisplayMode('table') + 440 | tsLegend.withPlacement('right') + 441 | tsLegend.withCalcs(['lastNotNull', 'mean', 'max']) + 442 | tsLegend.withSortBy('Mean') + 443 | tsLegend.withSortDesc(true) + 444 | tsCustom.withFillOpacity(10), 445 | 446 | local reconcilationActivityQuery = ||| 447 | sum( 448 | round( 449 | increase( 450 | argocd_app_reconcile_count{ 451 | %(clusterLabel)s="$cluster", 452 | namespace=~'$namespace', 453 | job=~'$job', 454 | dest_server=~'$kubernetes_cluster' 455 | }[$__rate_interval] 456 | ) 457 | ) 458 | ) by (namespace, job, dest_server) 459 | ||| % $._config, 460 | 461 | local reconcilationActivtyTimeSeriesPanel = 462 | timeSeriesPanel.new( 463 | 'Recociliation Activity', 464 | ) + 465 | tsQueryOptions.withTargets( 466 | prometheus.new( 467 | '$datasource', 468 | reconcilationActivityQuery, 469 | ) + 470 | prometheus.withLegendFormat( 471 | '{{ namespace }}/{{ dest_server }}' 472 | ) 473 | ) + 474 | tsStandardOptions.withUnit('short') + 475 | tsOptions.tooltip.withMode('multi') + 476 | tsOptions.tooltip.withSort('desc') + 477 | tsLegend.withShowLegend(true) + 478 | tsLegend.withDisplayMode('table') + 479 | tsLegend.withPlacement('right') + 480 | tsLegend.withCalcs(['lastNotNull', 'mean', 'max']) + 481 | tsLegend.withSortBy('Mean') + 482 | tsLegend.withSortDesc(true) + 483 | tsCustom.withFillOpacity(10), 484 | 485 | local reconcilationPerformanceQuery = ||| 486 | sum( 487 | increase( 488 | argocd_app_reconcile_bucket{ 489 | %(clusterLabel)s="$cluster", 490 | namespace=~'$namespace', 491 | job=~'$job', 492 | dest_server=~'$kubernetes_cluster' 493 | }[$__rate_interval] 494 | ) 495 | ) by (le) 496 | ||| % $._config, 497 | 498 | local reconcilationPerformanceHeatmapPanel = 499 | heatmapPanel.new( 500 | 'Reconciliation Performance', 501 | ) + 502 | hmQueryOptions.withTargets( 503 | prometheus.new( 504 | '$datasource', 505 | reconcilationPerformanceQuery, 506 | ) + 507 | prometheus.withLegendFormat( 508 | '{{ le }}' 509 | ) + 510 | prometheus.withFormat('heatmap') 511 | ) + 512 | hmStandardOptions.withUnit('short'), 513 | 514 | local k8sApiActivityQuery = ||| 515 | sum( 516 | round( 517 | increase( 518 | argocd_app_k8s_request_total{ 519 | %(clusterLabel)s="$cluster", 520 | namespace=~'$namespace', 521 | job=~'$job', 522 | project=~'$project' 523 | }[$__rate_interval] 524 | ) 525 | ) 526 | ) by (job, server, project, verb, resource_kind) 527 | ||| % $._config, 528 | 529 | local k8sApiActivityTimeSeriesPanel = 530 | timeSeriesPanel.new( 531 | 'K8s API Activity', 532 | ) + 533 | tsQueryOptions.withTargets( 534 | prometheus.new( 535 | '$datasource', 536 | k8sApiActivityQuery, 537 | ) + 538 | prometheus.withLegendFormat( 539 | '{{ server }}/{{ project }} - {{ verb }}/{{ resource_kind }}' 540 | ) 541 | ) + 542 | tsStandardOptions.withUnit('short') + 543 | tsOptions.tooltip.withMode('multi') + 544 | tsOptions.tooltip.withSort('desc') + 545 | tsLegend.withShowLegend(true) + 546 | tsLegend.withDisplayMode('table') + 547 | tsLegend.withPlacement('right') + 548 | tsLegend.withCalcs(['lastNotNull', 'mean', 'max']) + 549 | tsLegend.withSortBy('Mean') + 550 | tsLegend.withSortDesc(true) + 551 | tsCustom.withFillOpacity(10), 552 | 553 | local pendingKubectlRunQuery = ||| 554 | sum( 555 | argocd_kubectl_exec_pending{ 556 | %(clusterLabel)s="$cluster", 557 | namespace=~'$namespace', 558 | job=~'$job' 559 | } 560 | ) by (job, command) 561 | ||| % $._config, 562 | 563 | local pendingKubectlTimeSeriesPanel = 564 | timeSeriesPanel.new( 565 | 'Pending Kubectl Runs', 566 | ) + 567 | tsQueryOptions.withTargets( 568 | prometheus.new( 569 | '$datasource', 570 | pendingKubectlRunQuery, 571 | ) + 572 | prometheus.withLegendFormat( 573 | '{{ dest_server }} - {{ command }}' 574 | ) 575 | ) + 576 | tsStandardOptions.withUnit('short') + 577 | tsOptions.tooltip.withMode('multi') + 578 | tsOptions.tooltip.withSort('desc') + 579 | tsLegend.withShowLegend(true) + 580 | tsLegend.withDisplayMode('table') + 581 | tsLegend.withPlacement('right') + 582 | tsLegend.withCalcs(['lastNotNull', 'mean', 'max']) + 583 | tsLegend.withSortBy('Mean') + 584 | tsLegend.withSortDesc(true) + 585 | tsCustom.withFillOpacity(10), 586 | 587 | local resourceObjectsQuery = ||| 588 | sum( 589 | argocd_cluster_api_resource_objects{ 590 | %(clusterLabel)s="$cluster", 591 | namespace=~'$namespace', 592 | job=~'$job', 593 | server=~'$kubernetes_cluster' 594 | } 595 | ) by (namespace, job, server) 596 | ||| % $._config, 597 | 598 | local resourceObjectsTimeSeriesPanel = 599 | timeSeriesPanel.new( 600 | 'Resource Objects', 601 | ) + 602 | tsQueryOptions.withTargets( 603 | prometheus.new( 604 | '$datasource', 605 | resourceObjectsQuery, 606 | ) + 607 | prometheus.withLegendFormat( 608 | '{{ server }}' 609 | ) 610 | ) + 611 | tsStandardOptions.withUnit('short') + 612 | tsOptions.tooltip.withMode('multi') + 613 | tsOptions.tooltip.withSort('desc') + 614 | tsLegend.withShowLegend(true) + 615 | tsLegend.withDisplayMode('table') + 616 | tsLegend.withPlacement('right') + 617 | tsLegend.withCalcs(['lastNotNull', 'mean', 'max']) + 618 | tsLegend.withSortBy('Mean') + 619 | tsLegend.withSortDesc(true) + 620 | tsCustom.withFillOpacity(10), 621 | 622 | local apiResourcesQuery = ||| 623 | sum( 624 | argocd_cluster_api_resources{ 625 | %(clusterLabel)s="$cluster", 626 | namespace=~'$namespace', 627 | job=~'$job', 628 | server=~'$kubernetes_cluster' 629 | } 630 | ) by (namespace, job, server) 631 | ||| % $._config, 632 | 633 | local apiResourcesTimeSeriesPanel = 634 | timeSeriesPanel.new( 635 | 'API Resources', 636 | ) + 637 | tsQueryOptions.withTargets( 638 | prometheus.new( 639 | '$datasource', 640 | apiResourcesQuery, 641 | ) + 642 | prometheus.withLegendFormat( 643 | '{{ server }}' 644 | ) 645 | ) + 646 | tsStandardOptions.withUnit('short') + 647 | tsOptions.tooltip.withMode('multi') + 648 | tsOptions.tooltip.withSort('desc') + 649 | tsLegend.withShowLegend(true) + 650 | tsLegend.withDisplayMode('table') + 651 | tsLegend.withPlacement('right') + 652 | tsLegend.withCalcs(['lastNotNull', 'mean', 'max']) + 653 | tsLegend.withSortBy('Mean') + 654 | tsLegend.withSortDesc(true) + 655 | tsCustom.withFillOpacity(10), 656 | 657 | local clusterEventsQuery = ||| 658 | sum( 659 | increase( 660 | argocd_cluster_events_total{ 661 | %(clusterLabel)s="$cluster", 662 | namespace=~'$namespace', 663 | job=~'$job', 664 | server=~'$kubernetes_cluster' 665 | }[$__rate_interval] 666 | ) 667 | ) by (namespace, job, server) 668 | ||| % $._config, 669 | 670 | local clusterEventsTimeSeriesPanel = 671 | timeSeriesPanel.new( 672 | 'Cluster Events', 673 | ) + 674 | tsQueryOptions.withTargets( 675 | prometheus.new( 676 | '$datasource', 677 | clusterEventsQuery, 678 | ) + 679 | prometheus.withLegendFormat( 680 | '{{ server }}' 681 | ) 682 | ) + 683 | tsStandardOptions.withUnit('short') + 684 | tsOptions.tooltip.withMode('multi') + 685 | tsOptions.tooltip.withSort('desc') + 686 | tsLegend.withShowLegend(true) + 687 | tsLegend.withDisplayMode('table') + 688 | tsLegend.withPlacement('right') + 689 | tsLegend.withCalcs(['lastNotNull', 'mean', 'max']) + 690 | tsLegend.withSortBy('Mean') + 691 | tsLegend.withSortDesc(true) + 692 | tsCustom.withFillOpacity(10), 693 | 694 | local gitRequestsLsRemoteQuery = ||| 695 | sum( 696 | increase( 697 | argocd_git_request_total{ 698 | %(clusterLabel)s="$cluster", 699 | namespace=~'$namespace', 700 | job=~'$job', 701 | request_type="ls-remote" 702 | }[$__rate_interval] 703 | ) 704 | ) by (namespace, job, repo) 705 | ||| % $._config, 706 | 707 | local gitRequestsLsRemoteTimeSeriesPanel = 708 | timeSeriesPanel.new( 709 | 'Git Requests (ls-remote)', 710 | ) + 711 | tsQueryOptions.withTargets( 712 | prometheus.new( 713 | '$datasource', 714 | gitRequestsLsRemoteQuery, 715 | ) + 716 | prometheus.withLegendFormat( 717 | '{{ namespace }} - {{ repo }}' 718 | ) 719 | ) + 720 | tsStandardOptions.withUnit('short') + 721 | tsOptions.tooltip.withMode('multi') + 722 | tsOptions.tooltip.withSort('desc') + 723 | tsLegend.withShowLegend(true) + 724 | tsLegend.withDisplayMode('table') + 725 | tsLegend.withPlacement('right') + 726 | tsLegend.withCalcs(['lastNotNull', 'mean', 'max']) + 727 | tsLegend.withSortBy('Mean') + 728 | tsLegend.withSortDesc(true) + 729 | tsCustom.withFillOpacity(10), 730 | 731 | local gitRequestsCheckoutQuery = ||| 732 | sum( 733 | increase( 734 | argocd_git_request_total{ 735 | %(clusterLabel)s="$cluster", 736 | namespace=~'$namespace', 737 | job=~'$job', 738 | request_type="fetch" 739 | }[$__rate_interval] 740 | ) 741 | ) by (namespace, job, repo) 742 | ||| % $._config, 743 | 744 | local gitRequestsCheckoutTimeSeriesPanel = 745 | timeSeriesPanel.new( 746 | 'Git Requests (checkout)', 747 | ) + 748 | tsQueryOptions.withTargets( 749 | prometheus.new( 750 | '$datasource', 751 | gitRequestsCheckoutQuery, 752 | ) + 753 | prometheus.withLegendFormat( 754 | '{{ namespace }} - {{ repo }}' 755 | ) 756 | ) + 757 | tsStandardOptions.withUnit('short') + 758 | tsOptions.tooltip.withMode('multi') + 759 | tsOptions.tooltip.withSort('desc') + 760 | tsLegend.withShowLegend(true) + 761 | tsLegend.withDisplayMode('table') + 762 | tsLegend.withPlacement('right') + 763 | tsLegend.withCalcs(['lastNotNull', 'mean', 'max']) + 764 | tsLegend.withSortBy('Mean') + 765 | tsLegend.withSortDesc(true) + 766 | tsCustom.withFillOpacity(10), 767 | 768 | local gitFetchPerformanceQuery = ||| 769 | sum( 770 | increase( 771 | argocd_git_request_duration_seconds_bucket{ 772 | %(clusterLabel)s="$cluster", 773 | namespace=~'$namespace', 774 | job=~'$job', 775 | request_type="fetch" 776 | }[$__rate_interval] 777 | ) 778 | ) by (le) 779 | ||| % $._config, 780 | 781 | local gitFetchPerformanceHeatmapPanel = 782 | heatmapPanel.new( 783 | 'Git Fetch Performance', 784 | ) + 785 | hmQueryOptions.withTargets( 786 | prometheus.new( 787 | '$datasource', 788 | gitFetchPerformanceQuery, 789 | ) + 790 | prometheus.withLegendFormat( 791 | '{{ le }}' 792 | ) + 793 | prometheus.withFormat('heatmap') 794 | ) + 795 | hmStandardOptions.withUnit('short'), 796 | 797 | local gitLsRemotePerformanceQuery = ||| 798 | sum( 799 | increase( 800 | argocd_git_request_duration_seconds_bucket{ 801 | %(clusterLabel)s="$cluster", 802 | namespace=~'$namespace', 803 | job=~'$job', 804 | request_type="ls-remote" 805 | }[$__rate_interval] 806 | ) 807 | ) by (le) 808 | ||| % $._config, 809 | 810 | local gitLsRemotePerformanceHeatmapPanel = 811 | heatmapPanel.new( 812 | 'Git Ls-remote Performance', 813 | ) + 814 | hmQueryOptions.withTargets( 815 | prometheus.new( 816 | '$datasource', 817 | gitLsRemotePerformanceQuery, 818 | ) + 819 | prometheus.withLegendFormat( 820 | '{{ le }}' 821 | ) + 822 | prometheus.withFormat('heatmap') 823 | ) + 824 | hmStandardOptions.withUnit('short'), 825 | 826 | local summaryRow = 827 | row.new( 828 | 'Summary' 829 | ), 830 | 831 | local syncStatsRow = 832 | row.new( 833 | 'Sync Stats' 834 | ), 835 | 836 | local controllerStatsRow = 837 | row.new( 838 | 'Controller Stats' 839 | ), 840 | 841 | local clusterStatsRow = 842 | row.new( 843 | 'Cluster Stats' 844 | ), 845 | 846 | local repoServerStatsRow = 847 | row.new( 848 | 'Repo Server Stats', 849 | ), 850 | 851 | 'argo-cd-operational-overview.json': 852 | $._config.bypassDashboardValidation + 853 | dashboard.new( 854 | 'ArgoCD / Operational / Overview', 855 | ) + 856 | dashboard.withDescription('A dashboard that monitors ArgoCD with a focus on the operational. It is created using the [argo-cd-mixin](https://github.com/adinhodovic/argo-cd-mixin).') + 857 | dashboard.withUid($._config.operationalOverviewDashboardUid) + 858 | dashboard.withTags($._config.tags) + 859 | dashboard.withTimezone('utc') + 860 | dashboard.withEditable(true) + 861 | dashboard.time.withFrom('now-6h') + 862 | dashboard.time.withTo('now') + 863 | dashboard.withVariables(variables) + 864 | dashboard.withLinks( 865 | [ 866 | dashboard.link.dashboards.new('ArgoCD Dashboards', $._config.tags) + 867 | dashboard.link.link.options.withTargetBlank(true), 868 | ] 869 | ) + 870 | dashboard.withPanels( 871 | [ 872 | summaryRow + 873 | row.gridPos.withX(0) + 874 | row.gridPos.withY(0) + 875 | row.gridPos.withW(24) + 876 | row.gridPos.withH(1), 877 | clustersCountStatPanel + 878 | tablePanel.gridPos.withX(0) + 879 | tablePanel.gridPos.withY(1) + 880 | tablePanel.gridPos.withW(4) + 881 | tablePanel.gridPos.withH(4), 882 | repositoriesCountStatPanel + 883 | tablePanel.gridPos.withX(4) + 884 | tablePanel.gridPos.withY(1) + 885 | tablePanel.gridPos.withW(4) + 886 | tablePanel.gridPos.withH(4), 887 | appsCountStatPanel + 888 | tablePanel.gridPos.withX(8) + 889 | tablePanel.gridPos.withY(1) + 890 | tablePanel.gridPos.withW(4) + 891 | tablePanel.gridPos.withH(4), 892 | healthStatusPieChartPanel + 893 | tablePanel.gridPos.withX(0) + 894 | tablePanel.gridPos.withY(5) + 895 | tablePanel.gridPos.withW(6) + 896 | tablePanel.gridPos.withH(6), 897 | syncStatusPieChartPanel + 898 | tablePanel.gridPos.withX(6) + 899 | tablePanel.gridPos.withY(5) + 900 | tablePanel.gridPos.withW(6) + 901 | tablePanel.gridPos.withH(6), 902 | appsTablePanel + 903 | tablePanel.gridPos.withX(12) + 904 | tablePanel.gridPos.withY(1) + 905 | tablePanel.gridPos.withW(12) + 906 | tablePanel.gridPos.withH(10), 907 | ] + 908 | [ 909 | syncStatsRow + 910 | row.gridPos.withX(0) + 911 | row.gridPos.withY(11) + 912 | row.gridPos.withW(24) + 913 | row.gridPos.withH(1), 914 | ] + 915 | grid.makeGrid( 916 | [syncActivityTimeSeriesPanel, syncFailuresTimeSeriesPanel], 917 | panelWidth=12, 918 | panelHeight=6, 919 | startY=12 920 | ) + 921 | [ 922 | controllerStatsRow + 923 | row.gridPos.withX(0) + 924 | row.gridPos.withY(18) + 925 | row.gridPos.withW(24) + 926 | row.gridPos.withH(1), 927 | ] + 928 | grid.makeGrid( 929 | [ 930 | reconcilationActivtyTimeSeriesPanel, 931 | reconcilationPerformanceHeatmapPanel, 932 | k8sApiActivityTimeSeriesPanel, 933 | pendingKubectlTimeSeriesPanel, 934 | ], 935 | panelWidth=12, 936 | panelHeight=6, 937 | startY=19 938 | ) + 939 | [ 940 | clusterStatsRow + 941 | row.gridPos.withX(0) + 942 | row.gridPos.withY(31) + 943 | row.gridPos.withW(24) + 944 | row.gridPos.withH(1), 945 | ] + 946 | grid.makeGrid( 947 | [resourceObjectsTimeSeriesPanel, apiResourcesTimeSeriesPanel, clusterEventsTimeSeriesPanel], 948 | panelWidth=8, 949 | panelHeight=6, 950 | startY=32 951 | ) + 952 | [ 953 | repoServerStatsRow + 954 | row.gridPos.withX(0) + 955 | row.gridPos.withY(38) + 956 | row.gridPos.withW(24) + 957 | row.gridPos.withH(1), 958 | ] + 959 | grid.makeGrid( 960 | [ 961 | gitRequestsLsRemoteTimeSeriesPanel, 962 | gitRequestsCheckoutTimeSeriesPanel, 963 | gitFetchPerformanceHeatmapPanel, 964 | gitLsRemotePerformanceHeatmapPanel, 965 | ], 966 | panelWidth=12, 967 | panelHeight=6, 968 | startY=39 969 | ) 970 | ) + 971 | if $._config.annotation.enabled then 972 | dashboard.withAnnotations($._config.customAnnotation) 973 | else {}, 974 | }, 975 | } 976 | -------------------------------------------------------------------------------- /dashboards/dashboards.libsonnet: -------------------------------------------------------------------------------- 1 | (import 'argo-cd-operational.libsonnet') + 2 | (import 'argo-cd-applications.libsonnet') + 3 | (import 'argo-cd-notifications.libsonnet') + 4 | {} 5 | -------------------------------------------------------------------------------- /dashboards_out/argo-cd-application-overview.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ ], 3 | "__requires": [ ], 4 | "description": "A dashboard that monitors ArgoCD with a focus on Application status. It is created using the [argo-cd-mixin](https://github.com/adinhodovic/argo-cd-mixin). Requires custom configuration to add application badges. Please refer to the mixin.", 5 | "editable": true, 6 | "links": [ 7 | { 8 | "tags": [ 9 | "ci/cd", 10 | "argo-cd" 11 | ], 12 | "targetBlank": true, 13 | "title": "ArgoCD Dashboards", 14 | "type": "dashboards" 15 | } 16 | ], 17 | "panels": [ 18 | { 19 | "collapsed": false, 20 | "gridPos": { 21 | "h": 1, 22 | "w": 24, 23 | "x": 0, 24 | "y": 0 25 | }, 26 | "id": 1, 27 | "title": "Summary by Kubernetes Cluster, Project", 28 | "type": "row" 29 | }, 30 | { 31 | "datasource": { 32 | "type": "datasource", 33 | "uid": "-- Mixed --" 34 | }, 35 | "fieldConfig": { 36 | "defaults": { 37 | "custom": { 38 | "fillOpacity": 10 39 | }, 40 | "unit": "short" 41 | } 42 | }, 43 | "gridPos": { 44 | "h": 5, 45 | "w": 9, 46 | "x": 0, 47 | "y": 1 48 | }, 49 | "id": 2, 50 | "options": { 51 | "legend": { 52 | "calcs": [ 53 | "last", 54 | "max" 55 | ], 56 | "displayMode": "table", 57 | "placement": "right", 58 | "showLegend": true, 59 | "sortBy": "Last", 60 | "sortDesc": true 61 | }, 62 | "tooltip": { 63 | "mode": "multi", 64 | "sort": "desc" 65 | } 66 | }, 67 | "pluginVersion": "v11.4.0", 68 | "targets": [ 69 | { 70 | "datasource": { 71 | "type": "prometheus", 72 | "uid": "$datasource" 73 | }, 74 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n }\n) by (job, dest_server, project, health_status)\n", 75 | "legendFormat": "{{ dest_server }}/{{ project }} - {{ health_status }}" 76 | } 77 | ], 78 | "title": "Application Health Status", 79 | "type": "timeseries" 80 | }, 81 | { 82 | "datasource": { 83 | "type": "datasource", 84 | "uid": "-- Mixed --" 85 | }, 86 | "fieldConfig": { 87 | "defaults": { 88 | "custom": { 89 | "fillOpacity": 10 90 | }, 91 | "unit": "short" 92 | } 93 | }, 94 | "gridPos": { 95 | "h": 5, 96 | "w": 9, 97 | "x": 9, 98 | "y": 1 99 | }, 100 | "id": 3, 101 | "options": { 102 | "legend": { 103 | "calcs": [ 104 | "last", 105 | "max" 106 | ], 107 | "displayMode": "table", 108 | "placement": "right", 109 | "showLegend": true, 110 | "sortBy": "Last", 111 | "sortDesc": true 112 | }, 113 | "tooltip": { 114 | "mode": "multi", 115 | "sort": "desc" 116 | } 117 | }, 118 | "pluginVersion": "v11.4.0", 119 | "targets": [ 120 | { 121 | "datasource": { 122 | "type": "prometheus", 123 | "uid": "$datasource" 124 | }, 125 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n }\n) by (job, dest_server, project, sync_status)\n", 126 | "legendFormat": "{{ dest_server }}/{{ project }} - {{ sync_status }}" 127 | } 128 | ], 129 | "title": "Application Sync Status", 130 | "type": "timeseries" 131 | }, 132 | { 133 | "datasource": { 134 | "type": "datasource", 135 | "uid": "-- Mixed --" 136 | }, 137 | "fieldConfig": { 138 | "defaults": { 139 | "custom": { 140 | "fillOpacity": 10 141 | }, 142 | "unit": "short" 143 | } 144 | }, 145 | "gridPos": { 146 | "h": 5, 147 | "w": 9, 148 | "x": 0, 149 | "y": 6 150 | }, 151 | "id": 4, 152 | "options": { 153 | "legend": { 154 | "calcs": [ 155 | "last", 156 | "max" 157 | ], 158 | "displayMode": "table", 159 | "placement": "right", 160 | "showLegend": true, 161 | "sortBy": "Last", 162 | "sortDesc": true 163 | }, 164 | "tooltip": { 165 | "mode": "multi", 166 | "sort": "desc" 167 | } 168 | }, 169 | "pluginVersion": "v11.4.0", 170 | "targets": [ 171 | { 172 | "datasource": { 173 | "type": "prometheus", 174 | "uid": "$datasource" 175 | }, 176 | "expr": "sum(\n round(\n increase(\n argocd_app_sync_total{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n }[$__rate_interval]\n )\n )\n) by (job, dest_server, project, phase)\n", 177 | "legendFormat": "{{ dest_server }}/{{ project }} - {{ phase }}" 178 | } 179 | ], 180 | "title": "Application Syncs", 181 | "type": "timeseries" 182 | }, 183 | { 184 | "datasource": { 185 | "type": "datasource", 186 | "uid": "-- Mixed --" 187 | }, 188 | "fieldConfig": { 189 | "defaults": { 190 | "custom": { 191 | "fillOpacity": 10 192 | }, 193 | "unit": "short" 194 | } 195 | }, 196 | "gridPos": { 197 | "h": 5, 198 | "w": 9, 199 | "x": 9, 200 | "y": 6 201 | }, 202 | "id": 5, 203 | "options": { 204 | "legend": { 205 | "calcs": [ 206 | "last", 207 | "max" 208 | ], 209 | "displayMode": "table", 210 | "placement": "right", 211 | "showLegend": true, 212 | "sortBy": "Last", 213 | "sortDesc": true 214 | }, 215 | "tooltip": { 216 | "mode": "multi", 217 | "sort": "desc" 218 | } 219 | }, 220 | "pluginVersion": "v11.4.0", 221 | "targets": [ 222 | { 223 | "datasource": { 224 | "type": "prometheus", 225 | "uid": "$datasource" 226 | }, 227 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n }\n) by (job, dest_server, project, autosync_enabled)\n", 228 | "legendFormat": "{{ dest_server }}/{{ project }} - {{ autosync_enabled }}" 229 | } 230 | ], 231 | "title": "Application Auto Sync Enabled", 232 | "type": "timeseries" 233 | }, 234 | { 235 | "datasource": { 236 | "type": "datasource", 237 | "uid": "-- Mixed --" 238 | }, 239 | "gridPos": { 240 | "h": 10, 241 | "w": 6, 242 | "x": 18, 243 | "y": 1 244 | }, 245 | "id": 6, 246 | "options": { 247 | "content": "No applications defined", 248 | "mode": "markdown" 249 | }, 250 | "pluginVersion": "v11.4.0", 251 | "title": "Application Badges", 252 | "type": "text" 253 | }, 254 | { 255 | "collapsed": false, 256 | "gridPos": { 257 | "h": 1, 258 | "w": 18, 259 | "x": 0, 260 | "y": 11 261 | }, 262 | "id": 7, 263 | "title": "Applications (Unhealthy/OutOfSync/AutoSyncDisabled) Summary", 264 | "type": "row" 265 | }, 266 | { 267 | "datasource": { 268 | "type": "datasource", 269 | "uid": "-- Mixed --" 270 | }, 271 | "fieldConfig": { 272 | "defaults": { 273 | "unit": "short" 274 | }, 275 | "overrides": [ 276 | { 277 | "matcher": { 278 | "id": "byName", 279 | "options": "name" 280 | }, 281 | "properties": [ 282 | { 283 | "id": "links", 284 | "value": [ 285 | { 286 | "targetBlank": true, 287 | "title": "Go To Application", 288 | "url": "https://argocd.com/applications/${__data.fields.Project}/${__value.raw}" 289 | } 290 | ] 291 | } 292 | ] 293 | }, 294 | { 295 | "matcher": { 296 | "id": "byName", 297 | "options": "health_status" 298 | }, 299 | "properties": [ 300 | { 301 | "id": "color", 302 | "value": { 303 | "fixedColor": "yellow", 304 | "mode": "fixed" 305 | } 306 | }, 307 | { 308 | "id": "custom.displayMode", 309 | "value": "color-background" 310 | } 311 | ] 312 | } 313 | ] 314 | }, 315 | "gridPos": { 316 | "h": 6, 317 | "w": 12, 318 | "x": 0, 319 | "y": 12 320 | }, 321 | "id": 8, 322 | "options": { 323 | "footer": { 324 | "enablePagination": true 325 | }, 326 | "sortBy": [ 327 | { 328 | "displayName": "Application" 329 | } 330 | ] 331 | }, 332 | "pluginVersion": "v11.4.0", 333 | "targets": [ 334 | { 335 | "datasource": { 336 | "type": "prometheus", 337 | "uid": "$datasource" 338 | }, 339 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n health_status!~\"Healthy|Progressing\"\n }\n) by (job, dest_server, project, name, health_status)\n", 340 | "format": "table", 341 | "instant": true 342 | } 343 | ], 344 | "title": "Applications Unhealthy", 345 | "transformations": [ 346 | { 347 | "id": "organize", 348 | "options": { 349 | "excludeByName": { 350 | "Time": true, 351 | "Value": true, 352 | "dest_server": true, 353 | "job": true 354 | }, 355 | "indexByName": { 356 | "health_status": 2, 357 | "name": 0, 358 | "project": 1 359 | }, 360 | "renameByName": { 361 | "dest_server": "Cluster", 362 | "health_status": "Health Status", 363 | "job": "Job", 364 | "name": "Application", 365 | "project": "Project" 366 | } 367 | } 368 | } 369 | ], 370 | "type": "table" 371 | }, 372 | { 373 | "datasource": { 374 | "type": "datasource", 375 | "uid": "-- Mixed --" 376 | }, 377 | "fieldConfig": { 378 | "defaults": { 379 | "unit": "short" 380 | }, 381 | "overrides": [ 382 | { 383 | "matcher": { 384 | "id": "byName", 385 | "options": "name" 386 | }, 387 | "properties": [ 388 | { 389 | "id": "links", 390 | "value": [ 391 | { 392 | "targetBlank": true, 393 | "title": "Go To Application", 394 | "url": "https://argocd.com/applications/${__data.fields.Project}/${__value.raw}" 395 | } 396 | ] 397 | } 398 | ] 399 | }, 400 | { 401 | "matcher": { 402 | "id": "byName", 403 | "options": "sync_status" 404 | }, 405 | "properties": [ 406 | { 407 | "id": "color", 408 | "value": { 409 | "fixedColor": "yellow", 410 | "mode": "fixed" 411 | } 412 | }, 413 | { 414 | "id": "custom.displayMode", 415 | "value": "color-background" 416 | } 417 | ] 418 | } 419 | ] 420 | }, 421 | "gridPos": { 422 | "h": 6, 423 | "w": 12, 424 | "x": 12, 425 | "y": 12 426 | }, 427 | "id": 9, 428 | "options": { 429 | "footer": { 430 | "enablePagination": true 431 | }, 432 | "sortBy": [ 433 | { 434 | "displayName": "Application" 435 | } 436 | ] 437 | }, 438 | "pluginVersion": "v11.4.0", 439 | "targets": [ 440 | { 441 | "datasource": { 442 | "type": "prometheus", 443 | "uid": "$datasource" 444 | }, 445 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n sync_status!=\"Synced\"\n }\n) by (job, dest_server, project, name, sync_status) > 0\n", 446 | "format": "table", 447 | "instant": true 448 | } 449 | ], 450 | "title": "Applications Out Of Sync", 451 | "transformations": [ 452 | { 453 | "id": "organize", 454 | "options": { 455 | "excludeByName": { 456 | "Time": true, 457 | "Value": true, 458 | "dest_server": true, 459 | "job": true 460 | }, 461 | "indexByName": { 462 | "name": 0, 463 | "project": 1, 464 | "sync_status": 2 465 | }, 466 | "renameByName": { 467 | "dest_server": "Cluster", 468 | "job": "Job", 469 | "name": "Application", 470 | "project": "Project", 471 | "sync_status": "Sync Status" 472 | } 473 | } 474 | } 475 | ], 476 | "type": "table" 477 | }, 478 | { 479 | "datasource": { 480 | "type": "datasource", 481 | "uid": "-- Mixed --" 482 | }, 483 | "fieldConfig": { 484 | "defaults": { 485 | "unit": "short" 486 | }, 487 | "overrides": [ 488 | { 489 | "matcher": { 490 | "id": "byName", 491 | "options": "name" 492 | }, 493 | "properties": [ 494 | { 495 | "id": "links", 496 | "value": [ 497 | { 498 | "targetBlank": true, 499 | "title": "Go To Application", 500 | "url": "https://argocd.com/applications/${__data.fields.Project}/${__value.raw}" 501 | } 502 | ] 503 | } 504 | ] 505 | }, 506 | { 507 | "matcher": { 508 | "id": "byName", 509 | "options": "Value" 510 | }, 511 | "properties": [ 512 | { 513 | "id": "color", 514 | "value": { 515 | "fixedColor": "yellow", 516 | "mode": "fixed" 517 | } 518 | }, 519 | { 520 | "id": "custom.displayMode", 521 | "value": "color-background" 522 | } 523 | ] 524 | } 525 | ] 526 | }, 527 | "gridPos": { 528 | "h": 6, 529 | "w": 12, 530 | "x": 0, 531 | "y": 18 532 | }, 533 | "id": 10, 534 | "options": { 535 | "footer": { 536 | "enablePagination": true 537 | }, 538 | "sortBy": [ 539 | { 540 | "displayName": "Application" 541 | } 542 | ] 543 | }, 544 | "pluginVersion": "v11.4.0", 545 | "targets": [ 546 | { 547 | "datasource": { 548 | "type": "prometheus", 549 | "uid": "$datasource" 550 | }, 551 | "expr": "sum(\n round(\n increase(\n argocd_app_sync_total{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n phase!=\"Succeeded\"\n }[7d]\n )\n )\n) by (job, dest_server, project, name, phase) > 0\n", 552 | "format": "table", 553 | "instant": true 554 | } 555 | ], 556 | "title": "Applications That Failed to Sync[7d]", 557 | "transformations": [ 558 | { 559 | "id": "organize", 560 | "options": { 561 | "excludeByName": { 562 | "Time": true, 563 | "dest_server": true, 564 | "job": true 565 | }, 566 | "indexByName": { 567 | "name": 0, 568 | "phase": 2, 569 | "project": 1 570 | }, 571 | "renameByName": { 572 | "Value": "Count", 573 | "dest_server": "Cluster", 574 | "job": "Job", 575 | "name": "Application", 576 | "phase": "Phase", 577 | "project": "Project" 578 | } 579 | } 580 | } 581 | ], 582 | "type": "table" 583 | }, 584 | { 585 | "datasource": { 586 | "type": "datasource", 587 | "uid": "-- Mixed --" 588 | }, 589 | "fieldConfig": { 590 | "defaults": { 591 | "unit": "short" 592 | }, 593 | "overrides": [ 594 | { 595 | "matcher": { 596 | "id": "byName", 597 | "options": "name" 598 | }, 599 | "properties": [ 600 | { 601 | "id": "links", 602 | "value": [ 603 | { 604 | "targetBlank": true, 605 | "title": "Go To Application", 606 | "url": "https://argocd.com/applications/${__data.fields.Project}/${__value.raw}" 607 | } 608 | ] 609 | } 610 | ] 611 | }, 612 | { 613 | "matcher": { 614 | "id": "byName", 615 | "options": "autosync_enabled" 616 | }, 617 | "properties": [ 618 | { 619 | "id": "color", 620 | "value": { 621 | "fixedColor": "yellow", 622 | "mode": "fixed" 623 | } 624 | }, 625 | { 626 | "id": "custom.displayMode", 627 | "value": "color-background" 628 | } 629 | ] 630 | } 631 | ] 632 | }, 633 | "gridPos": { 634 | "h": 6, 635 | "w": 12, 636 | "x": 12, 637 | "y": 18 638 | }, 639 | "id": 11, 640 | "options": { 641 | "footer": { 642 | "enablePagination": true 643 | }, 644 | "sortBy": [ 645 | { 646 | "displayName": "Application" 647 | } 648 | ] 649 | }, 650 | "pluginVersion": "v11.4.0", 651 | "targets": [ 652 | { 653 | "datasource": { 654 | "type": "prometheus", 655 | "uid": "$datasource" 656 | }, 657 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n autosync_enabled!=\"true\"\n }\n) by (job, dest_server, project, name, autosync_enabled) > 0\n", 658 | "format": "table", 659 | "instant": true 660 | } 661 | ], 662 | "title": "Applications With Auto Sync Disabled", 663 | "transformations": [ 664 | { 665 | "id": "organize", 666 | "options": { 667 | "excludeByName": { 668 | "Time": true, 669 | "Value": true, 670 | "dest_server": true, 671 | "job": true 672 | }, 673 | "indexByName": { 674 | "autosync_enabled": 2, 675 | "name": 0, 676 | "project": 1 677 | }, 678 | "renameByName": { 679 | "autosync_enabled": "Auto Sync Enabled", 680 | "dest_server": "Kubernetes Cluster", 681 | "job": "Job", 682 | "name": "Application", 683 | "project": "Project" 684 | } 685 | } 686 | } 687 | ], 688 | "type": "table" 689 | }, 690 | { 691 | "collapsed": false, 692 | "gridPos": { 693 | "h": 1, 694 | "w": 24, 695 | "x": 0, 696 | "y": 23 697 | }, 698 | "id": 12, 699 | "title": "Application ($application)", 700 | "type": "row" 701 | }, 702 | { 703 | "datasource": { 704 | "type": "datasource", 705 | "uid": "-- Mixed --" 706 | }, 707 | "fieldConfig": { 708 | "defaults": { 709 | "custom": { 710 | "fillOpacity": 10 711 | }, 712 | "unit": "short" 713 | } 714 | }, 715 | "gridPos": { 716 | "h": 8, 717 | "w": 8, 718 | "x": 0, 719 | "y": 24 720 | }, 721 | "id": 13, 722 | "interval": "5m", 723 | "options": { 724 | "legend": { 725 | "calcs": [ 726 | "last" 727 | ], 728 | "displayMode": "table", 729 | "showLegend": true, 730 | "sortBy": "Last", 731 | "sortDesc": true 732 | }, 733 | "tooltip": { 734 | "mode": "multi", 735 | "sort": "desc" 736 | } 737 | }, 738 | "pluginVersion": "v11.4.0", 739 | "targets": [ 740 | { 741 | "datasource": { 742 | "type": "prometheus", 743 | "uid": "$datasource" 744 | }, 745 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n name=~\"$application\",\n }\n) by (namespace, job, dest_server, project, name, health_status)\n", 746 | "legendFormat": "{{ dest_server }}/{{ project }}/{{ name }} - {{ health_status }}" 747 | } 748 | ], 749 | "title": "Application Health Status", 750 | "type": "timeseries" 751 | }, 752 | { 753 | "datasource": { 754 | "type": "datasource", 755 | "uid": "-- Mixed --" 756 | }, 757 | "fieldConfig": { 758 | "defaults": { 759 | "custom": { 760 | "fillOpacity": 10 761 | }, 762 | "unit": "short" 763 | } 764 | }, 765 | "gridPos": { 766 | "h": 8, 767 | "w": 8, 768 | "x": 8, 769 | "y": 24 770 | }, 771 | "id": 14, 772 | "interval": "5m", 773 | "options": { 774 | "legend": { 775 | "calcs": [ 776 | "last" 777 | ], 778 | "displayMode": "table", 779 | "showLegend": true, 780 | "sortBy": "Last", 781 | "sortDesc": true 782 | }, 783 | "tooltip": { 784 | "mode": "multi", 785 | "sort": "desc" 786 | } 787 | }, 788 | "pluginVersion": "v11.4.0", 789 | "targets": [ 790 | { 791 | "datasource": { 792 | "type": "prometheus", 793 | "uid": "$datasource" 794 | }, 795 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n name=~\"$application\",\n }\n) by (namespace, job, dest_server, project, name, sync_status)\n", 796 | "legendFormat": "{{ dest_server }}/{{ project }}/{{ name }} - {{ sync_status }}" 797 | } 798 | ], 799 | "title": "Application Sync Status", 800 | "type": "timeseries" 801 | }, 802 | { 803 | "datasource": { 804 | "type": "datasource", 805 | "uid": "-- Mixed --" 806 | }, 807 | "fieldConfig": { 808 | "defaults": { 809 | "custom": { 810 | "fillOpacity": 10 811 | }, 812 | "unit": "short" 813 | } 814 | }, 815 | "gridPos": { 816 | "h": 8, 817 | "w": 8, 818 | "x": 16, 819 | "y": 24 820 | }, 821 | "id": 15, 822 | "interval": "5m", 823 | "options": { 824 | "legend": { 825 | "calcs": [ 826 | "last" 827 | ], 828 | "displayMode": "table", 829 | "showLegend": true, 830 | "sortBy": "Last", 831 | "sortDesc": true 832 | }, 833 | "tooltip": { 834 | "mode": "multi", 835 | "sort": "desc" 836 | } 837 | }, 838 | "pluginVersion": "v11.4.0", 839 | "targets": [ 840 | { 841 | "datasource": { 842 | "type": "prometheus", 843 | "uid": "$datasource" 844 | }, 845 | "expr": "sum(\n round(\n increase(\n argocd_app_sync_total{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n name=~\"$application\",\n }[$__rate_interval]\n )\n )\n) by (namespace, job, dest_server, project, name, phase)\n", 846 | "legendFormat": "{{ dest_server }}/{{ project }}/{{ name }} - {{ phase }}" 847 | } 848 | ], 849 | "title": "Application Sync Result", 850 | "type": "timeseries" 851 | } 852 | ], 853 | "schemaVersion": 39, 854 | "tags": [ 855 | "ci/cd", 856 | "argo-cd" 857 | ], 858 | "templating": { 859 | "list": [ 860 | { 861 | "current": { 862 | "selected": true, 863 | "text": "default", 864 | "value": "default" 865 | }, 866 | "label": "Data source", 867 | "name": "datasource", 868 | "query": "prometheus", 869 | "type": "datasource" 870 | }, 871 | { 872 | "datasource": { 873 | "type": "prometheus", 874 | "uid": "${datasource}" 875 | }, 876 | "hide": 2, 877 | "label": "Cluster", 878 | "name": "cluster", 879 | "query": "label_values(argocd_app_info{}, cluster)", 880 | "refresh": 2, 881 | "sort": 1, 882 | "type": "query" 883 | }, 884 | { 885 | "datasource": { 886 | "type": "prometheus", 887 | "uid": "${datasource}" 888 | }, 889 | "includeAll": true, 890 | "label": "Namespace", 891 | "multi": true, 892 | "name": "namespace", 893 | "query": "label_values(argocd_app_info{cluster=\"$cluster\"}, namespace)", 894 | "refresh": 2, 895 | "sort": 1, 896 | "type": "query" 897 | }, 898 | { 899 | "datasource": { 900 | "type": "prometheus", 901 | "uid": "${datasource}" 902 | }, 903 | "includeAll": true, 904 | "label": "Job", 905 | "multi": true, 906 | "name": "job", 907 | "query": "label_values(argocd_app_info{cluster=\"$cluster\", namespace=~\"$namespace\"}, job)", 908 | "refresh": 2, 909 | "sort": 1, 910 | "type": "query" 911 | }, 912 | { 913 | "datasource": { 914 | "type": "prometheus", 915 | "uid": "${datasource}" 916 | }, 917 | "includeAll": true, 918 | "label": "Kubernetes Cluster", 919 | "multi": true, 920 | "name": "kubernetes_cluster", 921 | "query": "label_values(argocd_app_info{cluster=\"$cluster\", namespace=~\"$namespace\", job=~\"$job\"}, dest_server)", 922 | "refresh": 2, 923 | "sort": 1, 924 | "type": "query" 925 | }, 926 | { 927 | "datasource": { 928 | "type": "prometheus", 929 | "uid": "${datasource}" 930 | }, 931 | "includeAll": true, 932 | "label": "Project", 933 | "multi": true, 934 | "name": "project", 935 | "query": "label_values(argocd_app_info{cluster=\"$cluster\", namespace=~\"$namespace\", job=~\"$job\", dest_server=~\"$kubernetes_cluster\"}, project)", 936 | "refresh": 2, 937 | "sort": 1, 938 | "type": "query" 939 | }, 940 | { 941 | "datasource": { 942 | "type": "prometheus", 943 | "uid": "${datasource}" 944 | }, 945 | "includeAll": false, 946 | "label": "Application", 947 | "multi": true, 948 | "name": "application", 949 | "query": "label_values(argocd_app_info{cluster=\"$cluster\", namespace=~\"$namespace\", job=~\"$job\", dest_server=~\"$kubernetes_cluster\", project=~\"$project\"}, name)", 950 | "refresh": 2, 951 | "sort": 1, 952 | "type": "query" 953 | } 954 | ] 955 | }, 956 | "time": { 957 | "from": "now-6h", 958 | "to": "now" 959 | }, 960 | "timezone": "utc", 961 | "title": "ArgoCD / Application / Overview", 962 | "uid": "argo-cd-application-overview-kask" 963 | } 964 | -------------------------------------------------------------------------------- /dashboards_out/argo-cd-notifications-overview.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ ], 3 | "__requires": [ ], 4 | "description": "A dashboard that monitors ArgoCD notifications. It is created using the [argo-cd-mixin](https://github.com/adinhodovic/argo-cd-mixin).", 5 | "editable": true, 6 | "links": [ 7 | { 8 | "tags": [ 9 | "ci/cd", 10 | "argo-cd" 11 | ], 12 | "targetBlank": true, 13 | "title": "ArgoCD Dashboards", 14 | "type": "dashboards" 15 | } 16 | ], 17 | "panels": [ 18 | { 19 | "collapsed": false, 20 | "gridPos": { 21 | "h": 1, 22 | "w": 24, 23 | "x": 0, 24 | "y": 0 25 | }, 26 | "id": 1, 27 | "title": "Summary", 28 | "type": "row" 29 | }, 30 | { 31 | "datasource": { 32 | "type": "datasource", 33 | "uid": "-- Mixed --" 34 | }, 35 | "fieldConfig": { 36 | "defaults": { 37 | "custom": { 38 | "fillOpacity": 10 39 | }, 40 | "unit": "short" 41 | } 42 | }, 43 | "gridPos": { 44 | "h": 8, 45 | "w": 12, 46 | "x": 0, 47 | "y": 1 48 | }, 49 | "id": 2, 50 | "options": { 51 | "legend": { 52 | "calcs": [ 53 | "last", 54 | "max" 55 | ], 56 | "displayMode": "table", 57 | "placement": "right", 58 | "showLegend": true, 59 | "sortBy": "Last", 60 | "sortDesc": true 61 | }, 62 | "tooltip": { 63 | "mode": "multi", 64 | "sort": "desc" 65 | } 66 | }, 67 | "pluginVersion": "v11.4.0", 68 | "targets": [ 69 | { 70 | "datasource": { 71 | "type": "prometheus", 72 | "uid": "$datasource" 73 | }, 74 | "expr": "sum(\n round(\n increase(\n argocd_notifications_deliveries_total{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\n\n exported_service=~\"$exported_service\",\n }[$__rate_interval]\n )\n )\n) by (job, exported_service, succeeded)\n", 75 | "legendFormat": "{{ exported_service }} - Succeeded: {{ succeeded }}" 76 | } 77 | ], 78 | "title": "Notification Deliveries", 79 | "type": "timeseries" 80 | }, 81 | { 82 | "datasource": { 83 | "type": "datasource", 84 | "uid": "-- Mixed --" 85 | }, 86 | "fieldConfig": { 87 | "defaults": { 88 | "custom": { 89 | "fillOpacity": 10 90 | }, 91 | "unit": "short" 92 | } 93 | }, 94 | "gridPos": { 95 | "h": 8, 96 | "w": 12, 97 | "x": 12, 98 | "y": 1 99 | }, 100 | "id": 3, 101 | "options": { 102 | "legend": { 103 | "calcs": [ 104 | "last", 105 | "max" 106 | ], 107 | "displayMode": "table", 108 | "placement": "right", 109 | "showLegend": true, 110 | "sortBy": "Last", 111 | "sortDesc": true 112 | }, 113 | "tooltip": { 114 | "mode": "multi", 115 | "sort": "desc" 116 | } 117 | }, 118 | "pluginVersion": "v11.4.0", 119 | "targets": [ 120 | { 121 | "datasource": { 122 | "type": "prometheus", 123 | "uid": "$datasource" 124 | }, 125 | "expr": "sum(\n round(\n increase(\n argocd_notifications_trigger_eval_total{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\n\n }[$__rate_interval]\n )\n )\n) by (job, name, triggered)\n", 126 | "legendFormat": "{{ name }} - Triggered: {{ triggered }}" 127 | } 128 | ], 129 | "title": "Trigger Evaluations", 130 | "type": "timeseries" 131 | } 132 | ], 133 | "schemaVersion": 39, 134 | "tags": [ 135 | "ci/cd", 136 | "argo-cd" 137 | ], 138 | "templating": { 139 | "list": [ 140 | { 141 | "current": { 142 | "selected": true, 143 | "text": "default", 144 | "value": "default" 145 | }, 146 | "label": "Data source", 147 | "name": "datasource", 148 | "query": "prometheus", 149 | "type": "datasource" 150 | }, 151 | { 152 | "datasource": { 153 | "type": "prometheus", 154 | "uid": "${datasource}" 155 | }, 156 | "hide": 2, 157 | "label": "Cluster", 158 | "name": "cluster", 159 | "query": "label_values(argocd_notifications_deliveries_total{}, cluster)", 160 | "refresh": 2, 161 | "sort": 1, 162 | "type": "query" 163 | }, 164 | { 165 | "datasource": { 166 | "type": "prometheus", 167 | "uid": "${datasource}" 168 | }, 169 | "includeAll": true, 170 | "label": "Namespace", 171 | "multi": true, 172 | "name": "namespace", 173 | "query": "label_values(argocd_notifications_deliveries_total{cluster=\"$cluster\"}, namespace)", 174 | "refresh": 2, 175 | "sort": 1, 176 | "type": "query" 177 | }, 178 | { 179 | "allValue": ".*", 180 | "datasource": { 181 | "type": "prometheus", 182 | "uid": "${datasource}" 183 | }, 184 | "includeAll": true, 185 | "label": "Job", 186 | "multi": true, 187 | "name": "job", 188 | "query": "label_values(argocd_notifications_deliveries_total{cluster=\"$cluster\", namespace=~\"$namespace\"}, job)", 189 | "refresh": 2, 190 | "sort": 1, 191 | "type": "query" 192 | }, 193 | { 194 | "datasource": { 195 | "type": "prometheus", 196 | "uid": "${datasource}" 197 | }, 198 | "includeAll": true, 199 | "label": "Notifications Service", 200 | "multi": true, 201 | "name": "exported_service", 202 | "query": "label_values(argocd_notifications_deliveries_total{cluster=\"$cluster\", namespace=~\"$namespace\", job=~\"$job\"}, exported_service)", 203 | "refresh": 2, 204 | "sort": 1, 205 | "type": "query" 206 | } 207 | ] 208 | }, 209 | "time": { 210 | "from": "now-2d", 211 | "to": "now" 212 | }, 213 | "timezone": "utc", 214 | "title": "ArgoCD / Notifications / Overview", 215 | "uid": "argo-cd-notifications-overview-kask" 216 | } 217 | -------------------------------------------------------------------------------- /dashboards_out/argo-cd-operational-overview.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ ], 3 | "__requires": [ ], 4 | "description": "A dashboard that monitors ArgoCD with a focus on the operational. It is created using the [argo-cd-mixin](https://github.com/adinhodovic/argo-cd-mixin).", 5 | "editable": true, 6 | "links": [ 7 | { 8 | "tags": [ 9 | "ci/cd", 10 | "argo-cd" 11 | ], 12 | "targetBlank": true, 13 | "title": "ArgoCD Dashboards", 14 | "type": "dashboards" 15 | } 16 | ], 17 | "panels": [ 18 | { 19 | "collapsed": false, 20 | "gridPos": { 21 | "h": 1, 22 | "w": 24, 23 | "x": 0, 24 | "y": 0 25 | }, 26 | "id": 1, 27 | "title": "Summary", 28 | "type": "row" 29 | }, 30 | { 31 | "datasource": { 32 | "type": "datasource", 33 | "uid": "-- Mixed --" 34 | }, 35 | "fieldConfig": { 36 | "defaults": { 37 | "unit": "short" 38 | } 39 | }, 40 | "gridPos": { 41 | "h": 4, 42 | "w": 4, 43 | "x": 0, 44 | "y": 1 45 | }, 46 | "id": 2, 47 | "pluginVersion": "v11.4.0", 48 | "targets": [ 49 | { 50 | "datasource": { 51 | "type": "prometheus", 52 | "uid": "$datasource" 53 | }, 54 | "expr": "sum(\n argocd_cluster_info{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job'\n }\n)\n" 55 | } 56 | ], 57 | "title": "Clusters", 58 | "type": "stat" 59 | }, 60 | { 61 | "datasource": { 62 | "type": "datasource", 63 | "uid": "-- Mixed --" 64 | }, 65 | "fieldConfig": { 66 | "defaults": { 67 | "unit": "short" 68 | } 69 | }, 70 | "gridPos": { 71 | "h": 4, 72 | "w": 4, 73 | "x": 4, 74 | "y": 1 75 | }, 76 | "id": 3, 77 | "pluginVersion": "v11.4.0", 78 | "targets": [ 79 | { 80 | "datasource": { 81 | "type": "prometheus", 82 | "uid": "$datasource" 83 | }, 84 | "expr": "count(\n count(\n argocd_app_info{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job'\n }\n )\n by (repo)\n)\n" 85 | } 86 | ], 87 | "title": "Repositories", 88 | "type": "stat" 89 | }, 90 | { 91 | "datasource": { 92 | "type": "datasource", 93 | "uid": "-- Mixed --" 94 | }, 95 | "fieldConfig": { 96 | "defaults": { 97 | "unit": "short" 98 | } 99 | }, 100 | "gridPos": { 101 | "h": 4, 102 | "w": 4, 103 | "x": 8, 104 | "y": 1 105 | }, 106 | "id": 4, 107 | "pluginVersion": "v11.4.0", 108 | "targets": [ 109 | { 110 | "datasource": { 111 | "type": "prometheus", 112 | "uid": "$datasource" 113 | }, 114 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n }\n)\n" 115 | } 116 | ], 117 | "title": "Applications", 118 | "type": "stat" 119 | }, 120 | { 121 | "datasource": { 122 | "type": "datasource", 123 | "uid": "-- Mixed --" 124 | }, 125 | "fieldConfig": { 126 | "defaults": { 127 | "unit": "short" 128 | }, 129 | "overrides": [ 130 | { 131 | "matcher": { 132 | "id": "byName", 133 | "options": "Healthy" 134 | }, 135 | "properties": [ 136 | { 137 | "id": "color", 138 | "value": { 139 | "fixedColor": "green", 140 | "mode": "fixed" 141 | } 142 | } 143 | ] 144 | }, 145 | { 146 | "matcher": { 147 | "id": "byName", 148 | "options": "Degraded" 149 | }, 150 | "properties": [ 151 | { 152 | "id": "color", 153 | "value": { 154 | "fixedColor": "red", 155 | "mode": "fixed" 156 | } 157 | } 158 | ] 159 | }, 160 | { 161 | "matcher": { 162 | "id": "byName", 163 | "options": "Progressing" 164 | }, 165 | "properties": [ 166 | { 167 | "id": "color", 168 | "value": { 169 | "fixedColor": "yellow", 170 | "mode": "fixed" 171 | } 172 | } 173 | ] 174 | } 175 | ] 176 | }, 177 | "gridPos": { 178 | "h": 6, 179 | "w": 6, 180 | "x": 0, 181 | "y": 5 182 | }, 183 | "id": 5, 184 | "options": { 185 | "legend": { 186 | "displayMode": "table", 187 | "placement": "right", 188 | "showLegend": true, 189 | "values": [ 190 | "value" 191 | ] 192 | }, 193 | "tooltip": { 194 | "mode": "multi" 195 | } 196 | }, 197 | "pluginVersion": "v11.4.0", 198 | "targets": [ 199 | { 200 | "datasource": { 201 | "type": "prometheus", 202 | "uid": "$datasource" 203 | }, 204 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n }\n) by (health_status)\n", 205 | "instant": true, 206 | "legendFormat": "{{ health_status }}" 207 | } 208 | ], 209 | "title": "Health Status", 210 | "type": "piechart" 211 | }, 212 | { 213 | "datasource": { 214 | "type": "datasource", 215 | "uid": "-- Mixed --" 216 | }, 217 | "fieldConfig": { 218 | "defaults": { 219 | "unit": "short" 220 | }, 221 | "overrides": [ 222 | { 223 | "matcher": { 224 | "id": "byName", 225 | "options": "Synced" 226 | }, 227 | "properties": [ 228 | { 229 | "id": "color", 230 | "value": { 231 | "fixedColor": "green", 232 | "mode": "fixed" 233 | } 234 | } 235 | ] 236 | }, 237 | { 238 | "matcher": { 239 | "id": "byName", 240 | "options": "OutOfSync" 241 | }, 242 | "properties": [ 243 | { 244 | "id": "color", 245 | "value": { 246 | "fixedColor": "red", 247 | "mode": "fixed" 248 | } 249 | } 250 | ] 251 | }, 252 | { 253 | "matcher": { 254 | "id": "byName", 255 | "options": "Unknown" 256 | }, 257 | "properties": [ 258 | { 259 | "id": "color", 260 | "value": { 261 | "fixedColor": "yellow", 262 | "mode": "fixed" 263 | } 264 | } 265 | ] 266 | } 267 | ] 268 | }, 269 | "gridPos": { 270 | "h": 6, 271 | "w": 6, 272 | "x": 6, 273 | "y": 5 274 | }, 275 | "id": 6, 276 | "options": { 277 | "legend": { 278 | "displayMode": "table", 279 | "placement": "right", 280 | "showLegend": true, 281 | "values": [ 282 | "value" 283 | ] 284 | }, 285 | "tooltip": { 286 | "mode": "multi" 287 | } 288 | }, 289 | "pluginVersion": "v11.4.0", 290 | "targets": [ 291 | { 292 | "datasource": { 293 | "type": "prometheus", 294 | "uid": "$datasource" 295 | }, 296 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n }\n) by (sync_status)\n", 297 | "instant": true, 298 | "legendFormat": "{{ sync_status }}" 299 | } 300 | ], 301 | "title": "Sync Status", 302 | "type": "piechart" 303 | }, 304 | { 305 | "datasource": { 306 | "type": "datasource", 307 | "uid": "-- Mixed --" 308 | }, 309 | "fieldConfig": { 310 | "defaults": { 311 | "unit": "short" 312 | }, 313 | "overrides": [ 314 | { 315 | "matcher": { 316 | "id": "byName", 317 | "options": "name" 318 | }, 319 | "properties": [ 320 | { 321 | "id": "links", 322 | "value": [ 323 | { 324 | "targetBlank": true, 325 | "title": "Go To Application", 326 | "type": "dashboard", 327 | "url": "/d/argo-cd-application-overview-kask/argocd-notifications-overview?&var-project=${__data.fields.Project}&var-application=${__value.raw}" 328 | } 329 | ] 330 | } 331 | ] 332 | } 333 | ] 334 | }, 335 | "gridPos": { 336 | "h": 10, 337 | "w": 12, 338 | "x": 12, 339 | "y": 1 340 | }, 341 | "id": 7, 342 | "options": { 343 | "footer": { 344 | "enablePagination": true 345 | }, 346 | "sortBy": [ 347 | { 348 | "displayName": "Application" 349 | } 350 | ] 351 | }, 352 | "pluginVersion": "v11.4.0", 353 | "targets": [ 354 | { 355 | "datasource": { 356 | "type": "prometheus", 357 | "uid": "$datasource" 358 | }, 359 | "expr": "sum(\n argocd_app_info{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n }\n) by (job, dest_server, project, name, health_status, sync_status)\n", 360 | "format": "table", 361 | "instant": true 362 | } 363 | ], 364 | "title": "Applications", 365 | "transformations": [ 366 | { 367 | "id": "organize", 368 | "options": { 369 | "excludeByName": { 370 | "Time": true, 371 | "Value": true, 372 | "dest_server": true, 373 | "job": true 374 | }, 375 | "indexByName": { 376 | "health_status": 2, 377 | "name": 0, 378 | "project": 1, 379 | "sync_status": 3 380 | }, 381 | "renameByName": { 382 | "dest_server": "Kubernetes Cluster", 383 | "health_status": "Health Status", 384 | "job": "Job", 385 | "name": "Application", 386 | "project": "Project", 387 | "sync_status": "Sync Status" 388 | } 389 | } 390 | } 391 | ], 392 | "type": "table" 393 | }, 394 | { 395 | "collapsed": false, 396 | "gridPos": { 397 | "h": 1, 398 | "w": 24, 399 | "x": 0, 400 | "y": 11 401 | }, 402 | "id": 8, 403 | "title": "Sync Stats", 404 | "type": "row" 405 | }, 406 | { 407 | "datasource": { 408 | "type": "datasource", 409 | "uid": "-- Mixed --" 410 | }, 411 | "fieldConfig": { 412 | "defaults": { 413 | "custom": { 414 | "fillOpacity": 10 415 | }, 416 | "unit": "short" 417 | } 418 | }, 419 | "gridPos": { 420 | "h": 6, 421 | "w": 12, 422 | "x": 0, 423 | "y": 12 424 | }, 425 | "id": 9, 426 | "options": { 427 | "legend": { 428 | "calcs": [ 429 | "lastNotNull", 430 | "mean", 431 | "max" 432 | ], 433 | "displayMode": "table", 434 | "placement": "right", 435 | "showLegend": true, 436 | "sortBy": "Mean", 437 | "sortDesc": true 438 | }, 439 | "tooltip": { 440 | "mode": "multi", 441 | "sort": "desc" 442 | } 443 | }, 444 | "pluginVersion": "v11.4.0", 445 | "targets": [ 446 | { 447 | "datasource": { 448 | "type": "prometheus", 449 | "uid": "$datasource" 450 | }, 451 | "expr": "sum(\n round(\n increase(\n argocd_app_sync_total{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n }[$__rate_interval]\n )\n )\n) by (job, dest_server, project, name)\n", 452 | "legendFormat": "{{ dest_server }}/{{ project }}/{{ name }}" 453 | } 454 | ], 455 | "title": "Sync Activity", 456 | "type": "timeseries" 457 | }, 458 | { 459 | "datasource": { 460 | "type": "datasource", 461 | "uid": "-- Mixed --" 462 | }, 463 | "fieldConfig": { 464 | "defaults": { 465 | "custom": { 466 | "fillOpacity": 10 467 | }, 468 | "unit": "short" 469 | } 470 | }, 471 | "gridPos": { 472 | "h": 6, 473 | "w": 12, 474 | "x": 12, 475 | "y": 12 476 | }, 477 | "id": 10, 478 | "options": { 479 | "legend": { 480 | "calcs": [ 481 | "lastNotNull", 482 | "mean", 483 | "max" 484 | ], 485 | "displayMode": "table", 486 | "placement": "right", 487 | "showLegend": true, 488 | "sortBy": "Mean", 489 | "sortDesc": true 490 | }, 491 | "tooltip": { 492 | "mode": "multi", 493 | "sort": "desc" 494 | } 495 | }, 496 | "pluginVersion": "v11.4.0", 497 | "targets": [ 498 | { 499 | "datasource": { 500 | "type": "prometheus", 501 | "uid": "$datasource" 502 | }, 503 | "expr": "sum(\n round(\n increase(\n argocd_app_sync_total{\n cluster=\"$cluster\",\nnamespace=~'$namespace',\njob=~'$job',\ndest_server=~'$kubernetes_cluster',\nproject=~'$project',\n\n phase=~\"Error|Failed\"\n }[$__rate_interval]\n )\n )\n) by (job, dest_server, project, application, phase)\n", 504 | "legendFormat": "{{ dest_server }}/{{ project }}/{{ application }} - {{ phase }}" 505 | } 506 | ], 507 | "title": "Sync Failures", 508 | "type": "timeseries" 509 | }, 510 | { 511 | "collapsed": false, 512 | "gridPos": { 513 | "h": 1, 514 | "w": 24, 515 | "x": 0, 516 | "y": 18 517 | }, 518 | "id": 11, 519 | "title": "Controller Stats", 520 | "type": "row" 521 | }, 522 | { 523 | "datasource": { 524 | "type": "datasource", 525 | "uid": "-- Mixed --" 526 | }, 527 | "fieldConfig": { 528 | "defaults": { 529 | "custom": { 530 | "fillOpacity": 10 531 | }, 532 | "unit": "short" 533 | } 534 | }, 535 | "gridPos": { 536 | "h": 6, 537 | "w": 12, 538 | "x": 0, 539 | "y": 19 540 | }, 541 | "id": 12, 542 | "options": { 543 | "legend": { 544 | "calcs": [ 545 | "lastNotNull", 546 | "mean", 547 | "max" 548 | ], 549 | "displayMode": "table", 550 | "placement": "right", 551 | "showLegend": true, 552 | "sortBy": "Mean", 553 | "sortDesc": true 554 | }, 555 | "tooltip": { 556 | "mode": "multi", 557 | "sort": "desc" 558 | } 559 | }, 560 | "pluginVersion": "v11.4.0", 561 | "targets": [ 562 | { 563 | "datasource": { 564 | "type": "prometheus", 565 | "uid": "$datasource" 566 | }, 567 | "expr": "sum(\n round(\n increase(\n argocd_app_reconcile_count{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job',\n dest_server=~'$kubernetes_cluster'\n }[$__rate_interval]\n )\n )\n) by (namespace, job, dest_server)\n", 568 | "legendFormat": "{{ namespace }}/{{ dest_server }}" 569 | } 570 | ], 571 | "title": "Recociliation Activity", 572 | "type": "timeseries" 573 | }, 574 | { 575 | "datasource": { 576 | "type": "datasource", 577 | "uid": "-- Mixed --" 578 | }, 579 | "fieldConfig": { 580 | "defaults": { 581 | "unit": "short" 582 | } 583 | }, 584 | "gridPos": { 585 | "h": 6, 586 | "w": 12, 587 | "x": 12, 588 | "y": 19 589 | }, 590 | "id": 13, 591 | "pluginVersion": "v11.4.0", 592 | "targets": [ 593 | { 594 | "datasource": { 595 | "type": "prometheus", 596 | "uid": "$datasource" 597 | }, 598 | "expr": "sum(\n increase(\n argocd_app_reconcile_bucket{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job',\n dest_server=~'$kubernetes_cluster'\n }[$__rate_interval]\n )\n) by (le)\n", 599 | "format": "heatmap", 600 | "legendFormat": "{{ le }}" 601 | } 602 | ], 603 | "title": "Reconciliation Performance", 604 | "type": "heatmap" 605 | }, 606 | { 607 | "datasource": { 608 | "type": "datasource", 609 | "uid": "-- Mixed --" 610 | }, 611 | "fieldConfig": { 612 | "defaults": { 613 | "custom": { 614 | "fillOpacity": 10 615 | }, 616 | "unit": "short" 617 | } 618 | }, 619 | "gridPos": { 620 | "h": 6, 621 | "w": 12, 622 | "x": 0, 623 | "y": 25 624 | }, 625 | "id": 14, 626 | "options": { 627 | "legend": { 628 | "calcs": [ 629 | "lastNotNull", 630 | "mean", 631 | "max" 632 | ], 633 | "displayMode": "table", 634 | "placement": "right", 635 | "showLegend": true, 636 | "sortBy": "Mean", 637 | "sortDesc": true 638 | }, 639 | "tooltip": { 640 | "mode": "multi", 641 | "sort": "desc" 642 | } 643 | }, 644 | "pluginVersion": "v11.4.0", 645 | "targets": [ 646 | { 647 | "datasource": { 648 | "type": "prometheus", 649 | "uid": "$datasource" 650 | }, 651 | "expr": "sum(\n round(\n increase(\n argocd_app_k8s_request_total{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job',\n project=~'$project'\n }[$__rate_interval]\n )\n )\n) by (job, server, project, verb, resource_kind)\n", 652 | "legendFormat": "{{ server }}/{{ project }} - {{ verb }}/{{ resource_kind }}" 653 | } 654 | ], 655 | "title": "K8s API Activity", 656 | "type": "timeseries" 657 | }, 658 | { 659 | "datasource": { 660 | "type": "datasource", 661 | "uid": "-- Mixed --" 662 | }, 663 | "fieldConfig": { 664 | "defaults": { 665 | "custom": { 666 | "fillOpacity": 10 667 | }, 668 | "unit": "short" 669 | } 670 | }, 671 | "gridPos": { 672 | "h": 6, 673 | "w": 12, 674 | "x": 12, 675 | "y": 25 676 | }, 677 | "id": 15, 678 | "options": { 679 | "legend": { 680 | "calcs": [ 681 | "lastNotNull", 682 | "mean", 683 | "max" 684 | ], 685 | "displayMode": "table", 686 | "placement": "right", 687 | "showLegend": true, 688 | "sortBy": "Mean", 689 | "sortDesc": true 690 | }, 691 | "tooltip": { 692 | "mode": "multi", 693 | "sort": "desc" 694 | } 695 | }, 696 | "pluginVersion": "v11.4.0", 697 | "targets": [ 698 | { 699 | "datasource": { 700 | "type": "prometheus", 701 | "uid": "$datasource" 702 | }, 703 | "expr": "sum(\n argocd_kubectl_exec_pending{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job'\n }\n) by (job, command)\n", 704 | "legendFormat": "{{ dest_server }} - {{ command }}" 705 | } 706 | ], 707 | "title": "Pending Kubectl Runs", 708 | "type": "timeseries" 709 | }, 710 | { 711 | "collapsed": false, 712 | "gridPos": { 713 | "h": 1, 714 | "w": 24, 715 | "x": 0, 716 | "y": 31 717 | }, 718 | "id": 16, 719 | "title": "Cluster Stats", 720 | "type": "row" 721 | }, 722 | { 723 | "datasource": { 724 | "type": "datasource", 725 | "uid": "-- Mixed --" 726 | }, 727 | "fieldConfig": { 728 | "defaults": { 729 | "custom": { 730 | "fillOpacity": 10 731 | }, 732 | "unit": "short" 733 | } 734 | }, 735 | "gridPos": { 736 | "h": 6, 737 | "w": 8, 738 | "x": 0, 739 | "y": 32 740 | }, 741 | "id": 17, 742 | "options": { 743 | "legend": { 744 | "calcs": [ 745 | "lastNotNull", 746 | "mean", 747 | "max" 748 | ], 749 | "displayMode": "table", 750 | "placement": "right", 751 | "showLegend": true, 752 | "sortBy": "Mean", 753 | "sortDesc": true 754 | }, 755 | "tooltip": { 756 | "mode": "multi", 757 | "sort": "desc" 758 | } 759 | }, 760 | "pluginVersion": "v11.4.0", 761 | "targets": [ 762 | { 763 | "datasource": { 764 | "type": "prometheus", 765 | "uid": "$datasource" 766 | }, 767 | "expr": "sum(\n argocd_cluster_api_resource_objects{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job',\n server=~'$kubernetes_cluster'\n }\n) by (namespace, job, server)\n", 768 | "legendFormat": "{{ server }}" 769 | } 770 | ], 771 | "title": "Resource Objects", 772 | "type": "timeseries" 773 | }, 774 | { 775 | "datasource": { 776 | "type": "datasource", 777 | "uid": "-- Mixed --" 778 | }, 779 | "fieldConfig": { 780 | "defaults": { 781 | "custom": { 782 | "fillOpacity": 10 783 | }, 784 | "unit": "short" 785 | } 786 | }, 787 | "gridPos": { 788 | "h": 6, 789 | "w": 8, 790 | "x": 8, 791 | "y": 32 792 | }, 793 | "id": 18, 794 | "options": { 795 | "legend": { 796 | "calcs": [ 797 | "lastNotNull", 798 | "mean", 799 | "max" 800 | ], 801 | "displayMode": "table", 802 | "placement": "right", 803 | "showLegend": true, 804 | "sortBy": "Mean", 805 | "sortDesc": true 806 | }, 807 | "tooltip": { 808 | "mode": "multi", 809 | "sort": "desc" 810 | } 811 | }, 812 | "pluginVersion": "v11.4.0", 813 | "targets": [ 814 | { 815 | "datasource": { 816 | "type": "prometheus", 817 | "uid": "$datasource" 818 | }, 819 | "expr": "sum(\n argocd_cluster_api_resources{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job',\n server=~'$kubernetes_cluster'\n }\n) by (namespace, job, server)\n", 820 | "legendFormat": "{{ server }}" 821 | } 822 | ], 823 | "title": "API Resources", 824 | "type": "timeseries" 825 | }, 826 | { 827 | "datasource": { 828 | "type": "datasource", 829 | "uid": "-- Mixed --" 830 | }, 831 | "fieldConfig": { 832 | "defaults": { 833 | "custom": { 834 | "fillOpacity": 10 835 | }, 836 | "unit": "short" 837 | } 838 | }, 839 | "gridPos": { 840 | "h": 6, 841 | "w": 8, 842 | "x": 16, 843 | "y": 32 844 | }, 845 | "id": 19, 846 | "options": { 847 | "legend": { 848 | "calcs": [ 849 | "lastNotNull", 850 | "mean", 851 | "max" 852 | ], 853 | "displayMode": "table", 854 | "placement": "right", 855 | "showLegend": true, 856 | "sortBy": "Mean", 857 | "sortDesc": true 858 | }, 859 | "tooltip": { 860 | "mode": "multi", 861 | "sort": "desc" 862 | } 863 | }, 864 | "pluginVersion": "v11.4.0", 865 | "targets": [ 866 | { 867 | "datasource": { 868 | "type": "prometheus", 869 | "uid": "$datasource" 870 | }, 871 | "expr": "sum(\n increase(\n argocd_cluster_events_total{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job',\n server=~'$kubernetes_cluster'\n }[$__rate_interval]\n )\n) by (namespace, job, server)\n", 872 | "legendFormat": "{{ server }}" 873 | } 874 | ], 875 | "title": "Cluster Events", 876 | "type": "timeseries" 877 | }, 878 | { 879 | "collapsed": false, 880 | "gridPos": { 881 | "h": 1, 882 | "w": 24, 883 | "x": 0, 884 | "y": 38 885 | }, 886 | "id": 20, 887 | "title": "Repo Server Stats", 888 | "type": "row" 889 | }, 890 | { 891 | "datasource": { 892 | "type": "datasource", 893 | "uid": "-- Mixed --" 894 | }, 895 | "fieldConfig": { 896 | "defaults": { 897 | "custom": { 898 | "fillOpacity": 10 899 | }, 900 | "unit": "short" 901 | } 902 | }, 903 | "gridPos": { 904 | "h": 6, 905 | "w": 12, 906 | "x": 0, 907 | "y": 39 908 | }, 909 | "id": 21, 910 | "options": { 911 | "legend": { 912 | "calcs": [ 913 | "lastNotNull", 914 | "mean", 915 | "max" 916 | ], 917 | "displayMode": "table", 918 | "placement": "right", 919 | "showLegend": true, 920 | "sortBy": "Mean", 921 | "sortDesc": true 922 | }, 923 | "tooltip": { 924 | "mode": "multi", 925 | "sort": "desc" 926 | } 927 | }, 928 | "pluginVersion": "v11.4.0", 929 | "targets": [ 930 | { 931 | "datasource": { 932 | "type": "prometheus", 933 | "uid": "$datasource" 934 | }, 935 | "expr": "sum(\n increase(\n argocd_git_request_total{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job',\n request_type=\"ls-remote\"\n }[$__rate_interval]\n )\n) by (namespace, job, repo)\n", 936 | "legendFormat": "{{ namespace }} - {{ repo }}" 937 | } 938 | ], 939 | "title": "Git Requests (ls-remote)", 940 | "type": "timeseries" 941 | }, 942 | { 943 | "datasource": { 944 | "type": "datasource", 945 | "uid": "-- Mixed --" 946 | }, 947 | "fieldConfig": { 948 | "defaults": { 949 | "custom": { 950 | "fillOpacity": 10 951 | }, 952 | "unit": "short" 953 | } 954 | }, 955 | "gridPos": { 956 | "h": 6, 957 | "w": 12, 958 | "x": 12, 959 | "y": 39 960 | }, 961 | "id": 22, 962 | "options": { 963 | "legend": { 964 | "calcs": [ 965 | "lastNotNull", 966 | "mean", 967 | "max" 968 | ], 969 | "displayMode": "table", 970 | "placement": "right", 971 | "showLegend": true, 972 | "sortBy": "Mean", 973 | "sortDesc": true 974 | }, 975 | "tooltip": { 976 | "mode": "multi", 977 | "sort": "desc" 978 | } 979 | }, 980 | "pluginVersion": "v11.4.0", 981 | "targets": [ 982 | { 983 | "datasource": { 984 | "type": "prometheus", 985 | "uid": "$datasource" 986 | }, 987 | "expr": "sum(\n increase(\n argocd_git_request_total{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job',\n request_type=\"fetch\"\n }[$__rate_interval]\n )\n) by (namespace, job, repo)\n", 988 | "legendFormat": "{{ namespace }} - {{ repo }}" 989 | } 990 | ], 991 | "title": "Git Requests (checkout)", 992 | "type": "timeseries" 993 | }, 994 | { 995 | "datasource": { 996 | "type": "datasource", 997 | "uid": "-- Mixed --" 998 | }, 999 | "fieldConfig": { 1000 | "defaults": { 1001 | "unit": "short" 1002 | } 1003 | }, 1004 | "gridPos": { 1005 | "h": 6, 1006 | "w": 12, 1007 | "x": 0, 1008 | "y": 45 1009 | }, 1010 | "id": 23, 1011 | "pluginVersion": "v11.4.0", 1012 | "targets": [ 1013 | { 1014 | "datasource": { 1015 | "type": "prometheus", 1016 | "uid": "$datasource" 1017 | }, 1018 | "expr": "sum(\n increase(\n argocd_git_request_duration_seconds_bucket{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job',\n request_type=\"fetch\"\n }[$__rate_interval]\n )\n) by (le)\n", 1019 | "format": "heatmap", 1020 | "legendFormat": "{{ le }}" 1021 | } 1022 | ], 1023 | "title": "Git Fetch Performance", 1024 | "type": "heatmap" 1025 | }, 1026 | { 1027 | "datasource": { 1028 | "type": "datasource", 1029 | "uid": "-- Mixed --" 1030 | }, 1031 | "fieldConfig": { 1032 | "defaults": { 1033 | "unit": "short" 1034 | } 1035 | }, 1036 | "gridPos": { 1037 | "h": 6, 1038 | "w": 12, 1039 | "x": 12, 1040 | "y": 45 1041 | }, 1042 | "id": 24, 1043 | "pluginVersion": "v11.4.0", 1044 | "targets": [ 1045 | { 1046 | "datasource": { 1047 | "type": "prometheus", 1048 | "uid": "$datasource" 1049 | }, 1050 | "expr": "sum(\n increase(\n argocd_git_request_duration_seconds_bucket{\n cluster=\"$cluster\",\n namespace=~'$namespace',\n job=~'$job',\n request_type=\"ls-remote\"\n }[$__rate_interval]\n )\n) by (le)\n", 1051 | "format": "heatmap", 1052 | "legendFormat": "{{ le }}" 1053 | } 1054 | ], 1055 | "title": "Git Ls-remote Performance", 1056 | "type": "heatmap" 1057 | } 1058 | ], 1059 | "schemaVersion": 39, 1060 | "tags": [ 1061 | "ci/cd", 1062 | "argo-cd" 1063 | ], 1064 | "templating": { 1065 | "list": [ 1066 | { 1067 | "current": { 1068 | "selected": true, 1069 | "text": "default", 1070 | "value": "default" 1071 | }, 1072 | "label": "Data source", 1073 | "name": "datasource", 1074 | "query": "prometheus", 1075 | "type": "datasource" 1076 | }, 1077 | { 1078 | "datasource": { 1079 | "type": "prometheus", 1080 | "uid": "${datasource}" 1081 | }, 1082 | "hide": 2, 1083 | "label": "Cluster", 1084 | "name": "cluster", 1085 | "query": "label_values(argocd_app_info{}, cluster)", 1086 | "refresh": 2, 1087 | "sort": 1, 1088 | "type": "query" 1089 | }, 1090 | { 1091 | "datasource": { 1092 | "type": "prometheus", 1093 | "uid": "${datasource}" 1094 | }, 1095 | "includeAll": true, 1096 | "label": "Namespace", 1097 | "multi": true, 1098 | "name": "namespace", 1099 | "query": "label_values(argocd_app_info{cluster=\"$cluster\"}, namespace)", 1100 | "refresh": 2, 1101 | "sort": 1, 1102 | "type": "query" 1103 | }, 1104 | { 1105 | "allValue": ".*", 1106 | "datasource": { 1107 | "type": "prometheus", 1108 | "uid": "${datasource}" 1109 | }, 1110 | "includeAll": true, 1111 | "label": "Job", 1112 | "multi": true, 1113 | "name": "job", 1114 | "query": "label_values(job)", 1115 | "refresh": 2, 1116 | "regex": "argo.*", 1117 | "sort": 1, 1118 | "type": "query" 1119 | }, 1120 | { 1121 | "datasource": { 1122 | "type": "prometheus", 1123 | "uid": "${datasource}" 1124 | }, 1125 | "includeAll": true, 1126 | "label": "Kubernetes Cluster", 1127 | "multi": true, 1128 | "name": "kubernetes_cluster", 1129 | "query": "label_values(argocd_app_info{cluster=\"$cluster\", namespace=~\"$namespace\", job=~\"$job\"}, dest_server)", 1130 | "refresh": 2, 1131 | "sort": 1, 1132 | "type": "query" 1133 | }, 1134 | { 1135 | "datasource": { 1136 | "type": "prometheus", 1137 | "uid": "${datasource}" 1138 | }, 1139 | "includeAll": true, 1140 | "label": "Project", 1141 | "multi": true, 1142 | "name": "project", 1143 | "query": "label_values(argocd_app_info{cluster=\"$cluster\", namespace=~\"$namespace\", job=~\"$job\", dest_server=~\"$kubernetes_cluster\"}, project)", 1144 | "refresh": 2, 1145 | "sort": 1, 1146 | "type": "query" 1147 | } 1148 | ] 1149 | }, 1150 | "time": { 1151 | "from": "now-6h", 1152 | "to": "now" 1153 | }, 1154 | "timezone": "utc", 1155 | "title": "ArgoCD / Operational / Overview", 1156 | "uid": "argo-cd-operational-overview-kask" 1157 | } 1158 | -------------------------------------------------------------------------------- /images/argo-cd-application-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adinhodovic/argo-cd-mixin/d22df9f6385996d56eb547cfc2d88278be0702d9/images/argo-cd-application-overview.png -------------------------------------------------------------------------------- /images/argo-cd-notifications-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adinhodovic/argo-cd-mixin/d22df9f6385996d56eb547cfc2d88278be0702d9/images/argo-cd-notifications-overview.png -------------------------------------------------------------------------------- /images/argo-cd-operational-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adinhodovic/argo-cd-mixin/d22df9f6385996d56eb547cfc2d88278be0702d9/images/argo-cd-operational-overview.png -------------------------------------------------------------------------------- /jsonnetfile.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "dependencies": [ 4 | { 5 | "source": { 6 | "git": { 7 | "remote": "https://github.com/grafana/grafonnet.git", 8 | "subdir": "gen/grafonnet-latest" 9 | } 10 | }, 11 | "version": "main" 12 | }, 13 | { 14 | "source": { 15 | "git": { 16 | "remote": "https://github.com/jsonnet-libs/docsonnet.git", 17 | "subdir": "doc-util" 18 | } 19 | }, 20 | "version": "master" 21 | }, 22 | { 23 | "source": { 24 | "git": { 25 | "remote": "https://github.com/jsonnet-libs/xtd.git", 26 | "subdir": "" 27 | } 28 | }, 29 | "version": "master" 30 | } 31 | ] 32 | } 33 | -------------------------------------------------------------------------------- /lib/alerts.jsonnet: -------------------------------------------------------------------------------- 1 | std.manifestYamlDoc((import '../mixin.libsonnet').prometheusAlerts) 2 | -------------------------------------------------------------------------------- /lib/dashboards.jsonnet: -------------------------------------------------------------------------------- 1 | local dashboards = (import '../mixin.libsonnet').grafanaDashboards; 2 | 3 | { 4 | [name]: dashboards[name] 5 | for name in std.objectFields(dashboards) 6 | } 7 | -------------------------------------------------------------------------------- /mixin.libsonnet: -------------------------------------------------------------------------------- 1 | (import 'alerts/alerts.libsonnet') + 2 | (import 'dashboards/dashboards.libsonnet') + 3 | (import 'config.libsonnet') 4 | -------------------------------------------------------------------------------- /prometheus_alerts.yaml: -------------------------------------------------------------------------------- 1 | "groups": 2 | - "name": "argo-cd" 3 | "rules": 4 | - "alert": "ArgoCdAppSyncFailed" 5 | "annotations": 6 | "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" 7 | "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has failed to sync with the status {{ $labels.phase }} the past 10m." 8 | "summary": "An ArgoCD Application has Failed to Sync." 9 | "expr": | 10 | sum( 11 | round( 12 | increase( 13 | argocd_app_sync_total{ 14 | job=~".*", 15 | phase!="Succeeded" 16 | }[10m] 17 | ) 18 | ) 19 | ) by (cluster, job, dest_server, project, name, phase) > 0 20 | "for": "1m" 21 | "labels": 22 | "severity": "warning" 23 | - "alert": "ArgoCdAppUnhealthy" 24 | "annotations": 25 | "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" 26 | "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is unhealthy with the health status {{ $labels.health_status }} for the past 15m." 27 | "summary": "An ArgoCD Application is Unhealthy." 28 | "expr": | 29 | sum( 30 | argocd_app_info{ 31 | job=~".*", 32 | health_status!~"Healthy|Progressing" 33 | } 34 | ) by (cluster, job, dest_server, project, name, health_status) 35 | > 0 36 | "for": "15m" 37 | "labels": 38 | "severity": "warning" 39 | - "alert": "ArgoCdAppOutOfSync" 40 | "annotations": 41 | "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" 42 | "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} is out of sync with the sync status {{ $labels.sync_status }} for the past 15m." 43 | "summary": "An ArgoCD Application is Out Of Sync." 44 | "expr": | 45 | sum( 46 | argocd_app_info{ 47 | job=~".*", 48 | sync_status!="Synced" 49 | } 50 | ) by (cluster, job, dest_server, project, name, sync_status) 51 | > 0 52 | "for": "15m" 53 | "labels": 54 | "severity": "warning" 55 | - "alert": "ArgoCdAppAutoSyncDisabled" 56 | "annotations": 57 | "dashboard_url": "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server={{ $labels.dest_server }}&var-project={{ $labels.project }}&var-application={{ $labels.name }}" 58 | "description": "The application {{ $labels.dest_server }}/{{ $labels.project }}/{{ $labels.name }} has autosync disabled for the past 2h." 59 | "summary": "An ArgoCD Application has AutoSync Disabled." 60 | "expr": | 61 | sum( 62 | argocd_app_info{ 63 | job=~".*", 64 | autosync_enabled!="true", 65 | name!~"" 66 | } 67 | ) by (cluster, job, dest_server, project, name, autosync_enabled) 68 | > 0 69 | "for": "2h" 70 | "labels": 71 | "severity": "warning" 72 | - "alert": "ArgoCdNotificationDeliveryFailed" 73 | "annotations": 74 | "dashboard_url": "https://grafana.com/d/argo-cd-notifications-overview-kask/argocd-notifications-overview?var-job={{ $labels.job }}&var-exported_service={{ $labels.exported_service }}" 75 | "description": "The notification job {{ $labels.job }} has failed to deliver to {{ $labels.exported_service }} for the past 10m." 76 | "summary": "ArgoCD Notification Delivery Failed." 77 | "expr": | 78 | sum( 79 | round( 80 | increase( 81 | argocd_notifications_deliveries_total{ 82 | job=~".*", 83 | succeeded!="true" 84 | }[10m] 85 | ) 86 | ) 87 | ) by (cluster, job, exported_service, succeeded) > 0 88 | "for": "1m" 89 | "labels": 90 | "severity": "warning" 91 | -------------------------------------------------------------------------------- /scripts/go.mod: -------------------------------------------------------------------------------- 1 | module _ 2 | 3 | go 1.17 4 | 5 | require ( 6 | github.com/google/go-jsonnet v0.17.1-0.20210520122306-7373f5b60678 7 | github.com/grafana/dashboard-linter v0.0.0-20231114210226-c458893a5731 8 | github.com/jsonnet-bundler/jsonnet-bundler v0.4.0 9 | github.com/prometheus/prometheus v0.48.1-0.20240109134750-3db4596965dc 10 | ) 11 | 12 | require ( 13 | cloud.google.com/go v0.110.10 // indirect 14 | cloud.google.com/go/compute v1.23.3 // indirect 15 | cloud.google.com/go/compute/metadata v0.2.3 // indirect 16 | cloud.google.com/go/iam v1.1.5 // indirect 17 | cloud.google.com/go/storage v1.30.1 // indirect 18 | github.com/Azure/azure-sdk-for-go v57.1.0+incompatible // indirect 19 | github.com/Azure/azure-sdk-for-go/sdk/azcore v1.9.0 // indirect 20 | github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.4.0 // indirect 21 | github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.0 // indirect 22 | github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.3.0 // indirect 23 | github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4 v4.3.0 // indirect 24 | github.com/Azure/go-autorest/autorest v0.11.20 // indirect 25 | github.com/Azure/go-autorest/autorest/adal v0.9.15 // indirect 26 | github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect 27 | github.com/Azure/go-autorest/autorest/to v0.4.0 // indirect 28 | github.com/Azure/go-autorest/autorest/validation v0.3.1 // indirect 29 | github.com/Azure/go-autorest/logger v0.2.1 // indirect 30 | github.com/Azure/go-autorest/tracing v0.6.0 // indirect 31 | github.com/AzureAD/microsoft-authentication-library-for-go v1.1.1 // indirect 32 | github.com/BurntSushi/toml v0.3.1 // indirect 33 | github.com/Code-Hex/go-generics-cache v1.3.1 // indirect 34 | github.com/alecthomas/kingpin/v2 v2.4.0 // indirect 35 | github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect 36 | github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect 37 | github.com/armon/go-metrics v0.4.1 // indirect 38 | github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect 39 | github.com/aws/aws-sdk-go v1.48.14 // indirect 40 | github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 // indirect 41 | github.com/beorn7/perks v1.0.1 // indirect 42 | github.com/cespare/xxhash/v2 v2.2.0 // indirect 43 | github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 // indirect 44 | github.com/containerd/containerd v1.5.4 // indirect 45 | github.com/coreos/go-systemd/v22 v22.5.0 // indirect 46 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 47 | github.com/dennwc/varint v1.0.0 // indirect 48 | github.com/digitalocean/godo v1.106.0 // indirect 49 | github.com/docker/distribution v2.8.2+incompatible // indirect 50 | github.com/docker/docker v24.0.7+incompatible // indirect 51 | github.com/docker/go-connections v0.4.0 // indirect 52 | github.com/docker/go-units v0.5.0 // indirect 53 | github.com/edsrzf/mmap-go v1.1.0 // indirect 54 | github.com/emicklei/go-restful/v3 v3.10.2 // indirect 55 | github.com/envoyproxy/go-control-plane v0.11.1 // indirect 56 | github.com/envoyproxy/protoc-gen-validate v1.0.2 // indirect 57 | github.com/fatih/color v1.15.0 // indirect 58 | github.com/felixge/httpsnoop v1.0.4 // indirect 59 | github.com/fsnotify/fsnotify v1.7.0 // indirect 60 | github.com/ghodss/yaml v1.0.0 // indirect 61 | github.com/go-kit/log v0.2.1 // indirect 62 | github.com/go-logfmt/logfmt v0.6.0 // indirect 63 | github.com/go-logr/logr v1.3.0 // indirect 64 | github.com/go-logr/stdr v1.2.2 // indirect 65 | github.com/go-openapi/analysis v0.21.4 // indirect 66 | github.com/go-openapi/errors v0.20.4 // indirect 67 | github.com/go-openapi/jsonpointer v0.20.0 // indirect 68 | github.com/go-openapi/jsonreference v0.20.2 // indirect 69 | github.com/go-openapi/loads v0.21.2 // indirect 70 | github.com/go-openapi/spec v0.20.9 // indirect 71 | github.com/go-openapi/strfmt v0.21.9 // indirect 72 | github.com/go-openapi/swag v0.22.4 // indirect 73 | github.com/go-openapi/validate v0.22.1 // indirect 74 | github.com/go-resty/resty/v2 v2.10.0 // indirect 75 | github.com/go-zookeeper/zk v1.0.3 // indirect 76 | github.com/gogo/protobuf v1.3.2 // indirect 77 | github.com/golang-jwt/jwt/v4 v4.0.0 // indirect 78 | github.com/golang-jwt/jwt/v5 v5.0.0 // indirect 79 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 80 | github.com/golang/protobuf v1.5.3 // indirect 81 | github.com/golang/snappy v0.0.4 // indirect 82 | github.com/google/gnostic-models v0.6.8 // indirect 83 | github.com/google/go-cmp v0.6.0 // indirect 84 | github.com/google/go-querystring v1.1.0 // indirect 85 | github.com/google/gofuzz v1.2.0 // indirect 86 | github.com/google/pprof v0.0.0-20231205033806-a5a03c77bf08 // indirect 87 | github.com/google/s2a-go v0.1.7 // indirect 88 | github.com/google/uuid v1.4.0 // indirect 89 | github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect 90 | github.com/googleapis/gax-go/v2 v2.12.0 // indirect 91 | github.com/googleapis/gnostic v0.5.5 // indirect 92 | github.com/gophercloud/gophercloud v1.8.0 // indirect 93 | github.com/gorilla/websocket v1.5.0 // indirect 94 | github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd // indirect 95 | github.com/hashicorp/consul/api v1.26.1 // indirect 96 | github.com/hashicorp/cronexpr v1.1.2 // indirect 97 | github.com/hashicorp/errwrap v1.1.0 // indirect 98 | github.com/hashicorp/go-cleanhttp v0.5.2 // indirect 99 | github.com/hashicorp/go-hclog v1.5.0 // indirect 100 | github.com/hashicorp/go-immutable-radix v1.3.1 // indirect 101 | github.com/hashicorp/go-multierror v1.1.1 // indirect 102 | github.com/hashicorp/go-retryablehttp v0.7.4 // indirect 103 | github.com/hashicorp/go-rootcerts v1.0.2 // indirect 104 | github.com/hashicorp/go-version v1.6.0 // indirect 105 | github.com/hashicorp/golang-lru v0.6.0 // indirect 106 | github.com/hashicorp/hcl v1.0.0 // indirect 107 | github.com/hashicorp/nomad/api v0.0.0-20230721134942-515895c7690c // indirect 108 | github.com/hashicorp/serf v0.10.1 // indirect 109 | github.com/hetznercloud/hcloud-go v1.32.0 // indirect 110 | github.com/hetznercloud/hcloud-go/v2 v2.4.0 // indirect 111 | github.com/imdario/mergo v0.3.16 // indirect 112 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 113 | github.com/ionos-cloud/sdk-go/v6 v6.1.10 // indirect 114 | github.com/jmespath/go-jmespath v0.4.0 // indirect 115 | github.com/josharian/intern v1.0.0 // indirect 116 | github.com/jpillora/backoff v1.0.0 // indirect 117 | github.com/json-iterator/go v1.1.12 // indirect 118 | github.com/klauspost/compress v1.17.4 // indirect 119 | github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b // indirect 120 | github.com/kylelemons/godebug v1.1.0 // indirect 121 | github.com/linode/linodego v1.25.0 // indirect 122 | github.com/magiconair/properties v1.8.7 // indirect 123 | github.com/mailru/easyjson v0.7.7 // indirect 124 | github.com/mattn/go-colorable v0.1.13 // indirect 125 | github.com/mattn/go-isatty v0.0.19 // indirect 126 | github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect 127 | github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect 128 | github.com/miekg/dns v1.1.57 // indirect 129 | github.com/mitchellh/go-homedir v1.1.0 // indirect 130 | github.com/mitchellh/mapstructure v1.5.0 // indirect 131 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 132 | github.com/modern-go/reflect2 v1.0.2 // indirect 133 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 134 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect 135 | github.com/oklog/ulid v1.3.1 // indirect 136 | github.com/opencontainers/go-digest v1.0.0 // indirect 137 | github.com/opencontainers/image-spec v1.0.2 // indirect 138 | github.com/opentracing/opentracing-go v1.2.0 // indirect 139 | github.com/ovh/go-ovh v1.4.3 // indirect 140 | github.com/pelletier/go-toml v1.9.4 // indirect 141 | github.com/pelletier/go-toml/v2 v2.0.8 // indirect 142 | github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect 143 | github.com/pkg/errors v0.9.1 // indirect 144 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect 145 | github.com/prometheus/alertmanager v0.26.0 // indirect 146 | github.com/prometheus/client_golang v1.18.0 // indirect 147 | github.com/prometheus/client_model v0.5.0 // indirect 148 | github.com/prometheus/common v0.45.1-0.20231122191551-832cd6e99f99 // indirect 149 | github.com/prometheus/common/sigv4 v0.1.0 // indirect 150 | github.com/prometheus/exporter-toolkit v0.10.0 // indirect 151 | github.com/prometheus/procfs v0.12.0 // indirect 152 | github.com/scaleway/scaleway-sdk-go v1.0.0-beta.21 // indirect 153 | github.com/sirupsen/logrus v1.7.0 // indirect 154 | github.com/spf13/afero v1.9.5 // indirect 155 | github.com/spf13/cast v1.5.1 // indirect 156 | github.com/spf13/cobra v1.7.0 // indirect 157 | github.com/spf13/jwalterweatherman v1.1.0 // indirect 158 | github.com/spf13/pflag v1.0.5 // indirect 159 | github.com/spf13/viper v1.16.0 // indirect 160 | github.com/stretchr/testify v1.8.4 // indirect 161 | github.com/subosito/gotenv v1.4.2 // indirect 162 | github.com/uber/jaeger-client-go v2.29.1+incompatible // indirect 163 | github.com/uber/jaeger-lib v2.4.1+incompatible // indirect 164 | github.com/vultr/govultr/v2 v2.17.2 // indirect 165 | github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect 166 | github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect 167 | github.com/xeipuuv/gojsonschema v1.2.0 // indirect 168 | github.com/xhit/go-str2duration/v2 v2.1.0 // indirect 169 | github.com/zeitlinger/conflate v0.0.0-20230622100834-279724abda8c // indirect 170 | go.mongodb.org/mongo-driver v1.13.1 // indirect 171 | go.opencensus.io v0.24.0 // indirect 172 | go.opentelemetry.io/collector/featuregate v1.0.0 // indirect 173 | go.opentelemetry.io/collector/pdata v1.0.0 // indirect 174 | go.opentelemetry.io/collector/semconv v0.90.1 // indirect 175 | go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 // indirect 176 | go.opentelemetry.io/otel v1.21.0 // indirect 177 | go.opentelemetry.io/otel/metric v1.21.0 // indirect 178 | go.opentelemetry.io/otel/trace v1.21.0 // indirect 179 | go.uber.org/atomic v1.11.0 // indirect 180 | go.uber.org/goleak v1.3.0 // indirect 181 | go.uber.org/multierr v1.11.0 // indirect 182 | golang.org/x/crypto v0.16.0 // indirect 183 | golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb // indirect 184 | golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect 185 | golang.org/x/mod v0.14.0 // indirect 186 | golang.org/x/net v0.19.0 // indirect 187 | golang.org/x/oauth2 v0.15.0 // indirect 188 | golang.org/x/sync v0.5.0 // indirect 189 | golang.org/x/sys v0.15.0 // indirect 190 | golang.org/x/term v0.15.0 // indirect 191 | golang.org/x/text v0.14.0 // indirect 192 | golang.org/x/time v0.5.0 // indirect 193 | golang.org/x/tools v0.16.0 // indirect 194 | golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect 195 | google.golang.org/api v0.153.0 // indirect 196 | google.golang.org/appengine v1.6.7 // indirect 197 | google.golang.org/genproto v0.0.0-20231120223509-83a465c0220f // indirect 198 | google.golang.org/genproto/googleapis/api v0.0.0-20231127180814-3a041ad873d4 // indirect 199 | google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f // indirect 200 | google.golang.org/grpc v1.59.0 // indirect 201 | google.golang.org/protobuf v1.31.0 // indirect 202 | gopkg.in/alecthomas/kingpin.v2 v2.2.6 // indirect 203 | gopkg.in/fsnotify/fsnotify.v1 v1.4.7 // indirect 204 | gopkg.in/inf.v0 v0.9.1 // indirect 205 | gopkg.in/ini.v1 v1.67.0 // indirect 206 | gopkg.in/yaml.v2 v2.4.0 // indirect 207 | gopkg.in/yaml.v3 v3.0.1 // indirect 208 | k8s.io/api v0.28.4 // indirect 209 | k8s.io/apimachinery v0.28.4 // indirect 210 | k8s.io/client-go v0.28.4 // indirect 211 | k8s.io/klog/v2 v2.110.1 // indirect 212 | k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 // indirect 213 | k8s.io/utils v0.0.0-20230711102312-30195339c3c7 // indirect 214 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect 215 | sigs.k8s.io/structured-merge-diff/v4 v4.3.0 // indirect 216 | sigs.k8s.io/yaml v1.3.0 // indirect 217 | ) 218 | -------------------------------------------------------------------------------- /scripts/tools.go: -------------------------------------------------------------------------------- 1 | //go:build tools 2 | // +build tools 3 | 4 | // Package tools tracks dependencies for tools that used in the build process. 5 | // See https://github.com/golang/go/issues/25922 6 | package tools 7 | 8 | import ( 9 | _ "github.com/google/go-jsonnet/cmd/jsonnet" 10 | _ "github.com/google/go-jsonnet/cmd/jsonnet-lint" 11 | _ "github.com/google/go-jsonnet/cmd/jsonnetfmt" 12 | _ "github.com/grafana/dashboard-linter" 13 | _ "github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb" 14 | _ "github.com/prometheus/prometheus/cmd/promtool" 15 | ) 16 | -------------------------------------------------------------------------------- /tests.yaml: -------------------------------------------------------------------------------- 1 | # yamllint disable rule:line-length 2 | --- 3 | rule_files: 4 | - prometheus_alerts.yaml 5 | 6 | tests: 7 | - interval: 5m 8 | input_series: 9 | - series: 'argocd_app_info{autosync_enabled="false", dest_server="https://kubernetes.default.svc", health_status="Healthy", job="argo-cd-argocd-application-controller-metrics", name="ci-cd", namespace="ci-cd", project="ops", sync_status="OutOfSync"}' 10 | values: "1+0x4" 11 | - series: 'argocd_app_info{autosync_enabled="false", dest_server="https://kubernetes.default.svc", health_status="Healthy", job="argo-cd-argocd-application-controller-metrics", name="ci-cd-synced", namespace="ci-cd", project="ops", sync_status="Synced"}' 12 | values: "1+0x4" 13 | alert_rule_test: 14 | - eval_time: 20m 15 | alertname: ArgoCdAppOutOfSync 16 | exp_alerts: 17 | - exp_labels: 18 | severity: warning 19 | job: argo-cd-argocd-application-controller-metrics 20 | dest_server: https://kubernetes.default.svc 21 | project: ops 22 | name: ci-cd 23 | sync_status: OutOfSync 24 | exp_annotations: 25 | summary: "An ArgoCD Application is Out Of Sync." 26 | description: "The application https://kubernetes.default.svc/ops/ci-cd is out of sync with the sync status OutOfSync for the past 15m." 27 | dashboard_url: "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server=https://kubernetes.default.svc&var-project=ops&var-application=ci-cd" 28 | - interval: 5m 29 | input_series: 30 | - series: 'argocd_app_info{autosync_enabled="true", dest_server="https://kubernetes.default.svc", health_status="Degraded", job="argo-cd-argocd-application-controller-metrics", name="ci-cd", namespace="ci-cd", project="ops", sync_status="Synced"}' 31 | values: "1+0x4" 32 | - series: 'argocd_app_info{autosync_enabled="true", dest_server="https://kubernetes.default.svc", health_status="Healthy", job="argo-cd-argocd-application-controller-metrics", name="ci-cd-healthy", namespace="ci-cd", project="ops", sync_status="Synced"}' 33 | values: "1+0x4" 34 | alert_rule_test: 35 | - eval_time: 20m 36 | alertname: ArgoCdAppUnhealthy 37 | exp_alerts: 38 | - exp_labels: 39 | severity: warning 40 | job: argo-cd-argocd-application-controller-metrics 41 | dest_server: https://kubernetes.default.svc 42 | project: ops 43 | name: ci-cd 44 | health_status: Degraded 45 | exp_annotations: 46 | summary: "An ArgoCD Application is Unhealthy." 47 | description: "The application https://kubernetes.default.svc/ops/ci-cd is unhealthy with the health status Degraded for the past 15m." 48 | dashboard_url: "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server=https://kubernetes.default.svc&var-project=ops&var-application=ci-cd" 49 | - interval: 5m 50 | input_series: 51 | - series: 'argocd_app_info{autosync_enabled="false", dest_server="https://kubernetes.default.svc", health_status="Healthy", job="argo-cd-argocd-application-controller-metrics", name="ci-cd", namespace="ci-cd", project="ops", sync_status="Synced"}' 52 | values: "1+0x40" 53 | - series: 'argocd_app_info{autosync_enabled="true", dest_server="https://kubernetes.default.svc", health_status="Healthy", job="argo-cd-argocd-application-controller-metrics", name="ci-cd-sync-enabled", namespace="ci-cd", project="ops", sync_status="Synced"}' 54 | values: "1+0x40" 55 | alert_rule_test: 56 | - eval_time: 3h 57 | alertname: ArgoCdAppAutoSyncDisabled 58 | exp_alerts: 59 | - exp_labels: 60 | severity: warning 61 | job: argo-cd-argocd-application-controller-metrics 62 | dest_server: https://kubernetes.default.svc 63 | project: ops 64 | name: ci-cd 65 | autosync_enabled: false 66 | exp_annotations: 67 | summary: "An ArgoCD Application has AutoSync Disabled." 68 | description: "The application https://kubernetes.default.svc/ops/ci-cd has autosync disabled for the past 2h." 69 | dashboard_url: "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server=https://kubernetes.default.svc&var-project=ops&var-application=ci-cd" 70 | - interval: 5m 71 | input_series: 72 | - series: 'argocd_app_sync_total{dest_server="https://kubernetes.default.svc", job="argo-cd-argocd-application-controller-metrics", name="ci-cd", namespace="ci-cd", phase="Failed", project="ops", service="argo-cd-argocd-application-controller-metrics"}' 73 | values: "1+1x4" 74 | - series: 'argocd_app_sync_total{dest_server="https://kubernetes.default.svc", job="argo-cd-argocd-application-controller-metrics", name="ci-cd-succeeded", namespace="ci-cd", phase="Succeeded", project="ops", service="argo-cd-argocd-application-controller-metrics"}' 75 | values: "1+1x4" 76 | alert_rule_test: 77 | - eval_time: 20m 78 | alertname: ArgoCdAppSyncFailed 79 | exp_alerts: 80 | - exp_labels: 81 | severity: warning 82 | job: argo-cd-argocd-application-controller-metrics 83 | dest_server: https://kubernetes.default.svc 84 | project: ops 85 | name: ci-cd 86 | phase: "Failed" 87 | exp_annotations: 88 | summary: "An ArgoCD Application has Failed to Sync." 89 | description: "The application https://kubernetes.default.svc/ops/ci-cd has failed to sync with the status Failed the past 10m." 90 | dashboard_url: "https://grafana.com/d/argo-cd-application-overview-kask/argocd-application-overview?var-dest_server=https://kubernetes.default.svc&var-project=ops&var-application=ci-cd" 91 | - interval: 5m 92 | input_series: 93 | - series: 'argocd_notifications_deliveries_total{exported_service="grafana", job="argo-cd-argocd-notifications-controller-metrics", namespace="ci-cd", succeeded="false", trigger="on-deployed"}' 94 | values: "1+1x4" 95 | - series: 'argocd_notifications_deliveries_total{exported_service="grafana", job="argo-cd-argocd-notifications-controller-metrics", namespace="ci-cd", succeeded="true", trigger="on-deployed"}' 96 | values: "1+1x4" 97 | alert_rule_test: 98 | - eval_time: 20m 99 | alertname: ArgoCdNotificationDeliveryFailed 100 | exp_alerts: 101 | - exp_labels: 102 | severity: warning 103 | job: argo-cd-argocd-notifications-controller-metrics 104 | exported_service: grafana 105 | succeeded: false 106 | exp_annotations: 107 | summary: "ArgoCD Notification Delivery Failed." 108 | description: "The notification job argo-cd-argocd-notifications-controller-metrics has failed to deliver to grafana for the past 10m." 109 | dashboard_url: "https://grafana.com/d/argo-cd-notifications-overview-kask/argocd-notifications-overview?var-job=argo-cd-argocd-notifications-controller-metrics&var-exported_service=grafana" 110 | --------------------------------------------------------------------------------