├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── PRIVACY.md ├── README.md ├── SECURITY.md ├── docs └── image │ ├── dataflow.png │ ├── screenshot-dashboard-application.png │ └── screenshot-grant-permission-srbac.png ├── helm ├── charts │ └── index.yaml └── synapse-prometheus-operator │ ├── .helmignore │ ├── Chart.yaml │ ├── grafana_dashboards │ ├── Synapse_Workspace_Spark_Application.json │ ├── Synapse_Workspace_Sparkpools.json │ └── Synapse_Workspace_Workspace.json │ ├── templates │ ├── NOTES.txt │ ├── _helpers.tpl │ ├── configmap-grafana-dashboards.yaml │ ├── secret-discovery-config.yaml │ └── secret.yaml │ └── values.yaml └── synapse-prometheus-connector ├── .dockerignore ├── Dockerfile └── src ├── access_token.py ├── config.py ├── config └── config.example.yaml ├── main.py ├── metrics.py ├── model.py ├── requirements.txt └── spark_pools.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | synapse-prometheus-connector/src/output/ 133 | synapse-prometheus-connector/src/config/config.yaml 134 | helm/out/ 135 | synapse-prometheus-connector/src/backup/ 136 | helm/synapse-prometheus-operator/charts/*.tgz 137 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /PRIVACY.md: -------------------------------------------------------------------------------- 1 | ## Data Collection 2 | 3 | The software may collect information about you and your use of the software and send it to Microsoft. Microsoft may use this information to provide services and improve our products and services. You may turn off the telemetry as described in the repository. There are also some features in the software that may enable you and Microsoft to collect data from users of your applications. If you use these features, you must comply with applicable law, including providing appropriate notices to users of your applications together with a copy of Microsoft's privacy statement. Our privacy statement is located at https://go.microsoft.com/fwlink/?LinkID=824704. You can learn more about data collection and use in the help documentation and our privacy statement. Your use of the software operates as your consent to these practices. 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Azure Synapse Spark Metrics 2 | 3 | ## Introduction 4 | 5 | This project mainly aims to provide: 6 | - **Azure Synapse Apache Spark metrics** monitoring for Azure Synapse Spark applications by leveraging Prometheus, Grafana and Azure APIs. 7 | - **Azure Synapse Prometheus connector** for connecting the on-premises Prometheus server to Azure Synapse Analytics workspace metrics API. 8 | - **Grafana dashboards** for synapse spark metrics visualization. 9 | - **Helm chart** for Prometheus and Grafana deployment on AKS, including the connector, Prometheus servers and Grafana dashboards for metrics users. 10 | 11 | The dataflow: 12 | 13 | ![Dataflow Chart](docs/image/dataflow.png) 14 | 15 | Grafana dashboard screenshot: 16 | 17 | ![Grafana dashboard](docs/image/screenshot-dashboard-application.png) 18 | 19 | ## Prerequisites 20 | 21 | 1. [Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest) 22 | 2. [Helm 3.30+](https://github.com/helm/helm/releases) 23 | 3. [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) 24 | 25 | Or just use the out-of-box [Azure Cloud Shell](https://shell.azure.com/), which includes all above tools. 26 | 27 | ## Getting Started 28 | 29 | 1. Create a Azure Kubernetes (1.16+, or use Minikube instead) 30 | 31 | ```bash 32 | az login 33 | az account set --subscription "" 34 | az aks create --name --resource-group --location eastus --node-vm-size Standard_D2s_v3 35 | az aks get-credentials --name --resource-group 36 | ``` 37 | 38 | 2. Create a service principal and grant permission to synapse workspace 39 | 40 | ```bash 41 | az ad sp create-for-rbac --name 42 | ``` 43 | 44 | The result should look like: 45 | 46 | ```json 47 | { 48 | "appId": "abcdef...", 49 | "displayName": "", 50 | "name": "http://", 51 | "password": "abc....", 52 | "tenant": "" 53 | } 54 | ``` 55 | 56 | Note down the appId, password, and tenant id. 57 | 58 | 1. Login to your [Azure Synapse Analytics workspace](https://web.azuresynapse.net/) as Synapse Administrator 59 | 2. In Synapse Studio, on the left-side pane, select **Manage** > **Access control** 60 | 3. Click the **Add** button on the upper left to add a role assignment 61 | 4. For **Scope** choose **Workspace** 62 | 5. For **Role** choose **Synapse Compute Operator** 63 | 6. For **Select user** input your and click your service principal 64 | 7. Click **Apply** 65 | 66 | Wait 3 minutes for permission to take effect. 67 | 68 | ![screenshot-grant-permission-srbac](docs/image/screenshot-grant-permission-srbac.png) 69 | 70 | > Note: Make sure your service principal has at least a "Reader" role in your Synapse workspace. Go to **Access Control (IAM)** tab of the Azure portal and check the permission settings. 71 | 72 | 3. Install Synapse Prometheus Operator 73 | 74 | Add synapse-prometheus-operator repo to Helm client 75 | 76 | ```bash 77 | helm repo add synapse-charts https://github.com/microsoft/azure-synapse-spark-metrics/releases/download/helm-chart 78 | ``` 79 | 80 | Install by Helm client: 81 | 82 | ```bash 83 | helm install spo synapse-charts/synapse-prometheus-operator --create-namespace --namespace spo \ 84 | --set synapse.workspaces[0].workspace_name="" \ 85 | --set synapse.workspaces[0].tenant_id="" \ 86 | --set synapse.workspaces[0].service_principal_name="" \ 87 | --set synapse.workspaces[0].service_principal_password="" \ 88 | --set synapse.workspaces[0].subscription_id="" \ 89 | --set synapse.workspaces[0].resource_group="" 90 | ``` 91 | 92 | - workspace_name: Synapse workspace name. 93 | - subscription_id: Synapse workspace subscription id. 94 | - workspace_resource_group_name: Synapse workspace resource group name. 95 | - tenant_id: Synapse workspace tenant id. 96 | - service_principal_name: The service principal name (or known as "appId") 97 | - service_principal_password: The service principal password you just created. 98 | 99 | For more details, please refer to [config.example.yaml](https://github.com/microsoft/azure-synapse-spark-metrics/blob/main/synapse-prometheus-connector/src/config/config.example.yaml) 100 | 101 | 4. Open Grafana and enjoy! 102 | 103 | ```bash 104 | # Get password 105 | kubectl get secret --namespace spo spo-grafana -o jsonpath="{.data.admin-password}" | base64 --decode ; echo 106 | # Get service ip, copy & paste the external ip to browser, and login with username 'admin' and the password. 107 | kubectl -n spo get svc spo-grafana 108 | ``` 109 | 110 | Find Synapse Dashboard on the upper left corner of the Grafana page (Home -> Synapse Workspace / Synapse Application), 111 | try to run a example code in Synapse Studio notebook and wait a few seconds for the metrics pulling. 112 | 113 | ## Uninstall 114 | 115 | Remove the operators. 116 | 117 | ```bash 118 | # helm delete -n 119 | helm delete spo -n spo 120 | ``` 121 | 122 | Remove the Kubernetes cluster. 123 | 124 | ```bash 125 | az aks delete --name --resource-group 126 | ``` 127 | 128 | ## Install Helm Chart Locally 129 | 130 | ``` 131 | helm install spo ./synapse-prometheus-operator --create-namespace --namespace spo \ 132 | --set synapse.workspaces[0].workspace_name="" \ 133 | --set synapse.workspaces[0].tenant_id="" \ 134 | --set synapse.workspaces[0].service_principal_name="" \ 135 | --set synapse.workspaces[0].service_principal_password="" \ 136 | --set synapse.workspaces[0].subscription_id="" \ 137 | --set synapse.workspaces[0].resource_group="" 138 | ``` 139 | 140 | ## Build Docker Image 141 | 142 | ```bash 143 | cd synapse-prometheus-connector 144 | docker build -t "synapse-prometheus-connector:${Version}" -f Dockerfile . 145 | ``` 146 | 147 | ## Contributing 148 | 149 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 150 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 151 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 152 | 153 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 154 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 155 | provided by the bot. You will only need to do this once across all repos using our CLA. 156 | 157 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 158 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 159 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 160 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /docs/image/dataflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/azure-synapse-spark-metrics/13b107ce89505e2fdc4771225d425b1343ea1e75/docs/image/dataflow.png -------------------------------------------------------------------------------- /docs/image/screenshot-dashboard-application.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/azure-synapse-spark-metrics/13b107ce89505e2fdc4771225d425b1343ea1e75/docs/image/screenshot-dashboard-application.png -------------------------------------------------------------------------------- /docs/image/screenshot-grant-permission-srbac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/azure-synapse-spark-metrics/13b107ce89505e2fdc4771225d425b1343ea1e75/docs/image/screenshot-grant-permission-srbac.png -------------------------------------------------------------------------------- /helm/charts/index.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | entries: 3 | synapse-prometheus-operator: 4 | - apiVersion: v1 5 | appVersion: "1.0" 6 | created: "2021-11-15T11:21:02.2274873+08:00" 7 | dependencies: 8 | - import-values: 9 | - child: grafana 10 | parent: grafana 11 | - child: prometheus 12 | parent: prometheus 13 | name: prometheus-operator 14 | repository: https://charts.helm.sh/stable 15 | version: 9.3.* 16 | description: A Helm chart for Synapse monitoring, which including prometheus-operator, synapse-prometheus-connector 17 | digest: 78c507653be704defe8a24810de283980df9d5832fa27079b5f57b8751778244 18 | maintainers: 19 | - email: kaizho@microsoft.com 20 | name: kaizho 21 | - email: zhwe@microsoft.com 22 | name: zhwe 23 | name: synapse-prometheus-operator 24 | urls: 25 | - https://github.com/microsoft/azure-synapse-spark-metrics/releases/download/helm-chart/synapse-prometheus-operator-0.1.1.tgz 26 | version: 0.1.1 27 | - apiVersion: v1 28 | appVersion: "1.0" 29 | created: "2021-02-22T13:24:08.9650898+08:00" 30 | dependencies: 31 | - import-values: 32 | - child: grafana 33 | parent: grafana 34 | - child: prometheus 35 | parent: prometheus 36 | name: prometheus-operator 37 | repository: https://charts.helm.sh/stable 38 | version: 9.3.* 39 | description: A Helm chart for Synapse monitoring, which including prometheus-operator, synapse-prometheus-connector 40 | digest: 3f934a1ad3f733076d66a34544eb7050c5a1f005f4995320374f0b878a546907 41 | maintainers: 42 | - email: kaizho@microsoft.com 43 | name: kaizho 44 | - email: zhwe@microsoft.com 45 | name: zhwe 46 | name: synapse-prometheus-operator 47 | urls: 48 | - https://github.com/microsoft/azure-synapse-spark-metrics/releases/download/helm-chart/synapse-prometheus-operator-0.1.0.tgz 49 | version: 0.1.0 50 | generated: "2021-11-15T11:21:02.1894892+08:00" 51 | -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *~ 18 | # Various IDEs 19 | .project 20 | .idea/ 21 | *.tmproj 22 | .vscode/ 23 | -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | appVersion: "1.2" 3 | description: A Helm chart for Synapse monitoring, which including prometheus-operator, synapse-prometheus-connector 4 | name: synapse-prometheus-operator 5 | version: 0.1.2 6 | maintainers: 7 | - name: kaizho 8 | email: kaizho@microsoft.com 9 | - name: zhwe 10 | email: zhwe@microsoft.com 11 | dependencies: 12 | - name: kube-prometheus-stack 13 | version: 36.2.* 14 | repository: https://prometheus-community.github.io/helm-charts 15 | import-values: 16 | - child: grafana 17 | parent: grafana 18 | -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/grafana_dashboards/Synapse_Workspace_Spark_Application.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "editable": false, 16 | "gnetId": null, 17 | "graphTooltip": 1, 18 | "id": 26, 19 | "iteration": 1598250264857, 20 | "links": [ 21 | { 22 | "$$hashKey": "object:111", 23 | "icon": "cloud", 24 | "tags": [], 25 | "targetBlank": true, 26 | "title": "View Application Logs in Synapse Studio", 27 | "tooltip": "", 28 | "type": "link", 29 | "url": "https://web.azuresynapse.net/monitoring/sparkapplication/${name}?workspace=%2Fsubscriptions%2F${subscription_id}%2FresourceGroups%2F${resource_group}%2Fproviders%2FMicrosoft.Synapse%2Fworkspaces%2F${workspace_name}&sparkPoolName=${spark_pool_name}&livyId=${livy_id}" 30 | } 31 | ], 32 | "panels": [ 33 | { 34 | "collapsed": false, 35 | "datasource": null, 36 | "gridPos": { 37 | "h": 1, 38 | "w": 24, 39 | "x": 0, 40 | "y": 0 41 | }, 42 | "id": 31, 43 | "panels": [], 44 | "title": "Summary", 45 | "type": "row" 46 | }, 47 | { 48 | "datasource": "$Datasource", 49 | "fieldConfig": { 50 | "defaults": { 51 | "custom": {}, 52 | "mappings": [], 53 | "thresholds": { 54 | "mode": "absolute", 55 | "steps": [ 56 | { 57 | "color": "green", 58 | "value": null 59 | } 60 | ] 61 | } 62 | }, 63 | "overrides": [] 64 | }, 65 | "gridPos": { 66 | "h": 5, 67 | "w": 3, 68 | "x": 0, 69 | "y": 1 70 | }, 71 | "id": 42, 72 | "options": { 73 | "colorMode": "value", 74 | "graphMode": "area", 75 | "justifyMode": "auto", 76 | "orientation": "auto", 77 | "reduceOptions": { 78 | "calcs": [ 79 | "lastNotNull" 80 | ], 81 | "fields": "", 82 | "values": false 83 | } 84 | }, 85 | "pluginVersion": "7.0.3", 86 | "targets": [ 87 | { 88 | "expr": "count(\n count(spark_executor_memoryUsed_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\", application_id=\"$application_id\"})\n by (spark_pool_name, executor_id, application_id, name, application_name))\nby (spark_pool_name, application_id, name, application_name)", 89 | "format": "time_series", 90 | "interval": "", 91 | "legendFormat": "app={{application_name}} executor={{executor_id}}", 92 | "refId": "A" 93 | } 94 | ], 95 | "timeFrom": null, 96 | "timeShift": null, 97 | "title": "Executors", 98 | "type": "stat" 99 | }, 100 | { 101 | "datasource": "$Datasource", 102 | "fieldConfig": { 103 | "defaults": { 104 | "custom": {}, 105 | "decimals": 1, 106 | "mappings": [], 107 | "thresholds": { 108 | "mode": "absolute", 109 | "steps": [ 110 | { 111 | "color": "green", 112 | "value": null 113 | }, 114 | { 115 | "color": "red", 116 | "value": 80 117 | } 118 | ] 119 | }, 120 | "unit": "percentunit" 121 | }, 122 | "overrides": [] 123 | }, 124 | "gridPos": { 125 | "h": 5, 126 | "w": 3, 127 | "x": 3, 128 | "y": 1 129 | }, 130 | "id": 24, 131 | "options": { 132 | "orientation": "auto", 133 | "reduceOptions": { 134 | "calcs": [ 135 | "lastNotNull" 136 | ], 137 | "fields": "", 138 | "values": false 139 | }, 140 | "showThresholdLabels": false, 141 | "showThresholdMarkers": true 142 | }, 143 | "pluginVersion": "7.0.3", 144 | "targets": [ 145 | { 146 | "expr": "sum(spark_executor_memoryUsed_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\", application_id=\"$application_id\"}) by (application_id)\n/\nsum(spark_executor_maxMemory_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\", application_id=\"$application_id\"}) by (application_id)", 147 | "instant": false, 148 | "interval": "", 149 | "legendFormat": "", 150 | "refId": "A" 151 | } 152 | ], 153 | "timeFrom": null, 154 | "timeShift": null, 155 | "title": "Executor Storage Memory", 156 | "type": "gauge" 157 | }, 158 | { 159 | "datasource": "$Datasource", 160 | "fieldConfig": { 161 | "defaults": { 162 | "custom": {}, 163 | "decimals": 1, 164 | "mappings": [], 165 | "thresholds": { 166 | "mode": "percentage", 167 | "steps": [ 168 | { 169 | "color": "green", 170 | "value": null 171 | }, 172 | { 173 | "color": "#EAB839", 174 | "value": 80 175 | }, 176 | { 177 | "color": "red", 178 | "value": 100 179 | } 180 | ] 181 | }, 182 | "unit": "percentunit" 183 | }, 184 | "overrides": [] 185 | }, 186 | "gridPos": { 187 | "h": 5, 188 | "w": 3, 189 | "x": 6, 190 | "y": 1 191 | }, 192 | "id": 25, 193 | "options": { 194 | "orientation": "auto", 195 | "reduceOptions": { 196 | "calcs": [ 197 | "lastNotNull" 198 | ], 199 | "fields": "", 200 | "values": false 201 | }, 202 | "showThresholdLabels": false, 203 | "showThresholdMarkers": true 204 | }, 205 | "pluginVersion": "7.0.3", 206 | "targets": [ 207 | { 208 | "expr": "max(spark_executor_memoryUsed_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id=\"driver\", application_id=\"$application_id\"}) by (application_id)\n/ \nmax(spark_executor_maxMemory_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id=\"driver\", application_id=\"$application_id\"}) by (application_id)", 209 | "instant": false, 210 | "interval": "", 211 | "legendFormat": "", 212 | "refId": "A" 213 | } 214 | ], 215 | "timeFrom": null, 216 | "timeShift": null, 217 | "title": "Driver Storage Memory", 218 | "type": "gauge" 219 | }, 220 | { 221 | "datasource": "$Datasource", 222 | "fieldConfig": { 223 | "defaults": { 224 | "custom": {}, 225 | "mappings": [], 226 | "thresholds": { 227 | "mode": "absolute", 228 | "steps": [ 229 | { 230 | "color": "green", 231 | "value": null 232 | }, 233 | { 234 | "color": "red", 235 | "value": 250 236 | } 237 | ] 238 | }, 239 | "unit": "s" 240 | }, 241 | "overrides": [] 242 | }, 243 | "gridPos": { 244 | "h": 5, 245 | "w": 3, 246 | "x": 9, 247 | "y": 1 248 | }, 249 | "id": 26, 250 | "options": { 251 | "colorMode": "value", 252 | "graphMode": "area", 253 | "justifyMode": "auto", 254 | "orientation": "auto", 255 | "reduceOptions": { 256 | "calcs": [ 257 | "max" 258 | ], 259 | "fields": "", 260 | "values": false 261 | } 262 | }, 263 | "pluginVersion": "7.0.3", 264 | "targets": [ 265 | { 266 | "expr": "max(rate(spark_executor_totalGCTime_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}[1m]))", 267 | "instant": false, 268 | "interval": "", 269 | "legendFormat": "", 270 | "refId": "A" 271 | } 272 | ], 273 | "timeFrom": null, 274 | "timeShift": null, 275 | "title": "Max Executor GC time", 276 | "type": "stat" 277 | }, 278 | { 279 | "datasource": "$Datasource", 280 | "fieldConfig": { 281 | "defaults": { 282 | "custom": {}, 283 | "mappings": [], 284 | "thresholds": { 285 | "mode": "percentage", 286 | "steps": [ 287 | { 288 | "color": "green", 289 | "value": null 290 | } 291 | ] 292 | }, 293 | "unit": "s" 294 | }, 295 | "overrides": [] 296 | }, 297 | "gridPos": { 298 | "h": 5, 299 | "w": 3, 300 | "x": 12, 301 | "y": 1 302 | }, 303 | "id": 43, 304 | "options": { 305 | "colorMode": "value", 306 | "graphMode": "area", 307 | "justifyMode": "auto", 308 | "orientation": "auto", 309 | "reduceOptions": { 310 | "calcs": [ 311 | "lastNotNull" 312 | ], 313 | "fields": "", 314 | "values": false 315 | } 316 | }, 317 | "pluginVersion": "7.0.3", 318 | "targets": [ 319 | { 320 | "expr": "sum(spark_executor_totalGCTime_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"})", 321 | "instant": false, 322 | "interval": "", 323 | "legendFormat": "", 324 | "refId": "A" 325 | } 326 | ], 327 | "timeFrom": null, 328 | "timeShift": null, 329 | "title": "Total GC time", 330 | "type": "stat" 331 | }, 332 | { 333 | "datasource": "$Datasource", 334 | "fieldConfig": { 335 | "defaults": { 336 | "custom": {}, 337 | "mappings": [], 338 | "thresholds": { 339 | "mode": "absolute", 340 | "steps": [ 341 | { 342 | "color": "green", 343 | "value": null 344 | }, 345 | { 346 | "color": "#EAB839", 347 | "value": 4 348 | }, 349 | { 350 | "color": "red", 351 | "value": 6 352 | } 353 | ] 354 | } 355 | }, 356 | "overrides": [] 357 | }, 358 | "gridPos": { 359 | "h": 5, 360 | "w": 3, 361 | "x": 15, 362 | "y": 1 363 | }, 364 | "id": 27, 365 | "options": { 366 | "orientation": "auto", 367 | "reduceOptions": { 368 | "calcs": [ 369 | "max" 370 | ], 371 | "fields": "", 372 | "values": false 373 | }, 374 | "showThresholdLabels": false, 375 | "showThresholdMarkers": true 376 | }, 377 | "pluginVersion": "7.0.3", 378 | "targets": [ 379 | { 380 | "expr": "max(spark_executor_activeTasks{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"})", 381 | "format": "time_series", 382 | "interval": "", 383 | "legendFormat": "app={{application_name}} executor={{executor_id}}", 384 | "refId": "A" 385 | } 386 | ], 387 | "timeFrom": null, 388 | "timeShift": null, 389 | "title": "Max Tasks on Executor", 390 | "type": "gauge" 391 | }, 392 | { 393 | "datasource": "$Datasource", 394 | "fieldConfig": { 395 | "defaults": { 396 | "custom": {}, 397 | "mappings": [], 398 | "thresholds": { 399 | "mode": "absolute", 400 | "steps": [ 401 | { 402 | "color": "green", 403 | "value": null 404 | }, 405 | { 406 | "color": "#EAB839", 407 | "value": 10 408 | }, 409 | { 410 | "color": "red", 411 | "value": 20 412 | } 413 | ] 414 | } 415 | }, 416 | "overrides": [] 417 | }, 418 | "gridPos": { 419 | "h": 5, 420 | "w": 3, 421 | "x": 18, 422 | "y": 1 423 | }, 424 | "id": 36, 425 | "links": [ 426 | { 427 | "targetBlank": false, 428 | "title": "View Application Logs in Synapse Studio", 429 | "url": "https://web.azuresynapse.net/monitoring/sparkapplication/${name}?workspace=%2Fsubscriptions%2F${subscription_id}%2FresourceGroups%2F${resource_group}%2Fproviders%2FMicrosoft.Synapse%2Fworkspaces%2F${workspace_name}&sparkPoolName=${spark_pool_name}&livyId=${livy_id}" 430 | } 431 | ], 432 | "options": { 433 | "colorMode": "value", 434 | "graphMode": "area", 435 | "justifyMode": "auto", 436 | "orientation": "auto", 437 | "reduceOptions": { 438 | "calcs": [ 439 | "max" 440 | ], 441 | "fields": "", 442 | "values": false 443 | } 444 | }, 445 | "pluginVersion": "7.0.3", 446 | "targets": [ 447 | { 448 | "expr": "sum(spark_executor_failedTasks_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\"})", 449 | "format": "time_series", 450 | "interval": "", 451 | "legendFormat": "app={{application_name}} executor={{executor_id}}", 452 | "refId": "A" 453 | } 454 | ], 455 | "timeFrom": null, 456 | "timeShift": null, 457 | "title": "Total Failed Tasks", 458 | "type": "stat" 459 | }, 460 | { 461 | "datasource": "$Datasource", 462 | "fieldConfig": { 463 | "defaults": { 464 | "custom": {}, 465 | "mappings": [], 466 | "thresholds": { 467 | "mode": "absolute", 468 | "steps": [ 469 | { 470 | "color": "green", 471 | "value": null 472 | } 473 | ] 474 | } 475 | }, 476 | "overrides": [] 477 | }, 478 | "gridPos": { 479 | "h": 5, 480 | "w": 3, 481 | "x": 21, 482 | "y": 1 483 | }, 484 | "id": 38, 485 | "options": { 486 | "colorMode": "value", 487 | "graphMode": "area", 488 | "justifyMode": "auto", 489 | "orientation": "auto", 490 | "reduceOptions": { 491 | "calcs": [ 492 | "max" 493 | ], 494 | "fields": "", 495 | "values": false 496 | } 497 | }, 498 | "pluginVersion": "7.0.3", 499 | "targets": [ 500 | { 501 | "expr": "sum(spark_executor_completedTasks_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\", application_id=\"$application_id\"})", 502 | "interval": "", 503 | "legendFormat": "", 504 | "refId": "B" 505 | } 506 | ], 507 | "timeFrom": null, 508 | "timeShift": null, 509 | "title": "Total Completed Tasks", 510 | "type": "stat" 511 | }, 512 | { 513 | "datasource": "$Datasource", 514 | "description": "", 515 | "fieldConfig": { 516 | "defaults": { 517 | "custom": { 518 | "align": null 519 | }, 520 | "mappings": [], 521 | "thresholds": { 522 | "mode": "absolute", 523 | "steps": [ 524 | { 525 | "color": "green", 526 | "value": null 527 | }, 528 | { 529 | "color": "red", 530 | "value": 80 531 | } 532 | ] 533 | } 534 | }, 535 | "overrides": [ 536 | { 537 | "matcher": { 538 | "id": "byName", 539 | "options": "Running Duration" 540 | }, 541 | "properties": [ 542 | { 543 | "id": "unit", 544 | "value": "s" 545 | } 546 | ] 547 | }, 548 | { 549 | "matcher": { 550 | "id": "byName", 551 | "options": "Submit Time" 552 | }, 553 | "properties": [ 554 | { 555 | "id": "unit", 556 | "value": "dateTimeFromNow" 557 | } 558 | ] 559 | }, 560 | { 561 | "matcher": { 562 | "id": "byName", 563 | "options": "Queue Duration" 564 | }, 565 | "properties": [ 566 | { 567 | "id": "unit", 568 | "value": "s" 569 | } 570 | ] 571 | }, 572 | { 573 | "matcher": { 574 | "id": "byName", 575 | "options": "Application ID" 576 | }, 577 | "properties": [ 578 | { 579 | "id": "custom.width", 580 | "value": 244 581 | } 582 | ] 583 | }, 584 | { 585 | "matcher": { 586 | "id": "byName", 587 | "options": "Name" 588 | }, 589 | "properties": [ 590 | { 591 | "id": "custom.width", 592 | "value": 283 593 | } 594 | ] 595 | }, 596 | { 597 | "matcher": { 598 | "id": "byName", 599 | "options": "Livy ID" 600 | }, 601 | "properties": [ 602 | { 603 | "id": "custom.width", 604 | "value": 144 605 | } 606 | ] 607 | } 608 | ] 609 | }, 610 | "gridPos": { 611 | "h": 3, 612 | "w": 24, 613 | "x": 0, 614 | "y": 6 615 | }, 616 | "id": 45, 617 | "options": { 618 | "frameIndex": 1, 619 | "showHeader": true, 620 | "sortBy": [] 621 | }, 622 | "pluginVersion": "7.0.3", 623 | "targets": [ 624 | { 625 | "expr": "synapse_connector_application_queue_duration{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}", 626 | "format": "table", 627 | "instant": true, 628 | "interval": "", 629 | "legendFormat": "", 630 | "refId": "A" 631 | }, 632 | { 633 | "expr": "max(synapse_connector_application_submit_time{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}) by (application_id) * 1000", 634 | "format": "table", 635 | "instant": true, 636 | "interval": "", 637 | "legendFormat": "", 638 | "refId": "B" 639 | }, 640 | { 641 | "expr": "max(synapse_connector_application_running_duration{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}) by (application_id)", 642 | "format": "table", 643 | "instant": true, 644 | "interval": "", 645 | "legendFormat": "", 646 | "refId": "C" 647 | } 648 | ], 649 | "timeFrom": null, 650 | "timeShift": null, 651 | "title": " ", 652 | "transformations": [ 653 | { 654 | "id": "filterFieldsByName", 655 | "options": { 656 | "include": { 657 | "names": [ 658 | "application_id", 659 | "livy_id", 660 | "name", 661 | "spark_pool_name", 662 | "submitter", 663 | "workspace_name", 664 | "Value #A", 665 | "Value #B", 666 | "Value #C" 667 | ] 668 | } 669 | } 670 | }, 671 | { 672 | "id": "seriesToColumns", 673 | "options": { 674 | "byField": "application_id" 675 | } 676 | }, 677 | { 678 | "id": "organize", 679 | "options": { 680 | "excludeByName": { 681 | "submitter": true 682 | }, 683 | "indexByName": { 684 | "Value #A": 6, 685 | "Value #B": 7, 686 | "application_id": 1, 687 | "livy_id": 2, 688 | "name": 0, 689 | "spark_pool_name": 3, 690 | "submitter": 4, 691 | "workspace_name": 5 692 | }, 693 | "renameByName": { 694 | "Value #A": "Queue Duration", 695 | "Value #B": "Submit Time", 696 | "Value #C": "Running Duration", 697 | "application_id": "Application ID", 698 | "livy_id": "Livy ID", 699 | "name": "Name", 700 | "spark_pool_name": "Spark pool", 701 | "submitter": "Submitter", 702 | "workspace_name": "Workspace" 703 | } 704 | } 705 | } 706 | ], 707 | "type": "table" 708 | }, 709 | { 710 | "collapsed": false, 711 | "datasource": null, 712 | "gridPos": { 713 | "h": 1, 714 | "w": 24, 715 | "x": 0, 716 | "y": 9 717 | }, 718 | "id": 33, 719 | "panels": [], 720 | "title": "Executors", 721 | "type": "row" 722 | }, 723 | { 724 | "datasource": "$Datasource", 725 | "description": "", 726 | "fieldConfig": { 727 | "defaults": { 728 | "custom": { 729 | "align": null, 730 | "displayMode": "auto" 731 | }, 732 | "mappings": [], 733 | "thresholds": { 734 | "mode": "absolute", 735 | "steps": [ 736 | { 737 | "color": "green", 738 | "value": null 739 | }, 740 | { 741 | "color": "red", 742 | "value": 80 743 | } 744 | ] 745 | }, 746 | "unit": "none" 747 | }, 748 | "overrides": [ 749 | { 750 | "matcher": { 751 | "id": "byName", 752 | "options": "Total Duration" 753 | }, 754 | "properties": [ 755 | { 756 | "id": "unit", 757 | "value": "s" 758 | } 759 | ] 760 | } 761 | ] 762 | }, 763 | "gridPos": { 764 | "h": 8, 765 | "w": 24, 766 | "x": 0, 767 | "y": 10 768 | }, 769 | "id": 20, 770 | "options": { 771 | "showHeader": true, 772 | "sortBy": [ 773 | { 774 | "desc": false, 775 | "displayName": "executor_id" 776 | } 777 | ] 778 | }, 779 | "pluginVersion": "7.0.3", 780 | "targets": [ 781 | { 782 | "expr": "max_over_time(\r\n max(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"})\n by (spark_pool_name, application_id, name, application_name, executor_id)[$__range:1m]\r\n)", 783 | "format": "table", 784 | "instant": true, 785 | "interval": "", 786 | "legendFormat": "", 787 | "refId": "A" 788 | } 789 | ], 790 | "timeFrom": null, 791 | "timeShift": null, 792 | "title": "Application Executors Current Status", 793 | "transformations": [ 794 | { 795 | "id": "organize", 796 | "options": { 797 | "excludeByName": { 798 | "Time": true, 799 | "application_name": true 800 | }, 801 | "indexByName": { 802 | "Time": 0, 803 | "Value": 6, 804 | "application_id": 3, 805 | "application_name": 4, 806 | "executor_id": 5, 807 | "name": 2, 808 | "spark_pool_name": 1 809 | }, 810 | "renameByName": { 811 | "Value": "Total Duration", 812 | "application_id": "Application ID", 813 | "application_name": "Application Name", 814 | "executor_id": "Executor ID", 815 | "name": "Name", 816 | "spark_pool_name": "Spark Pool Name" 817 | } 818 | } 819 | } 820 | ], 821 | "type": "table" 822 | }, 823 | { 824 | "collapsed": false, 825 | "datasource": null, 826 | "gridPos": { 827 | "h": 1, 828 | "w": 24, 829 | "x": 0, 830 | "y": 18 831 | }, 832 | "id": 22, 833 | "panels": [], 834 | "title": "Details", 835 | "type": "row" 836 | }, 837 | { 838 | "datasource": "$Datasource", 839 | "fieldConfig": { 840 | "defaults": { 841 | "custom": {}, 842 | "mappings": [], 843 | "thresholds": { 844 | "mode": "percentage", 845 | "steps": [ 846 | { 847 | "color": "green", 848 | "value": null 849 | }, 850 | { 851 | "color": "#EAB839", 852 | "value": 90 853 | } 854 | ] 855 | }, 856 | "unit": "s" 857 | }, 858 | "overrides": [] 859 | }, 860 | "gridPos": { 861 | "h": 8, 862 | "w": 12, 863 | "x": 0, 864 | "y": 19 865 | }, 866 | "id": 41, 867 | "options": { 868 | "displayMode": "gradient", 869 | "orientation": "auto", 870 | "reduceOptions": { 871 | "calcs": [ 872 | "lastNotNull" 873 | ], 874 | "fields": "", 875 | "values": false 876 | }, 877 | "showUnfilled": true 878 | }, 879 | "pluginVersion": "7.0.3", 880 | "targets": [ 881 | { 882 | "expr": "max(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}) by (executor_id)", 883 | "instant": false, 884 | "interval": "", 885 | "legendFormat": "{{executor_id}}", 886 | "refId": "A" 887 | } 888 | ], 889 | "timeFrom": null, 890 | "timeShift": null, 891 | "title": "Total Tasks Time per Executor", 892 | "type": "bargauge" 893 | }, 894 | { 895 | "aliasColors": {}, 896 | "bars": false, 897 | "dashLength": 10, 898 | "dashes": false, 899 | "datasource": "$Datasource", 900 | "description": "", 901 | "fieldConfig": { 902 | "defaults": { 903 | "custom": {}, 904 | "mappings": [], 905 | "thresholds": { 906 | "mode": "absolute", 907 | "steps": [ 908 | { 909 | "color": "green", 910 | "value": null 911 | }, 912 | { 913 | "color": "red", 914 | "value": 80 915 | } 916 | ] 917 | } 918 | }, 919 | "overrides": [] 920 | }, 921 | "fill": 1, 922 | "fillGradient": 0, 923 | "gridPos": { 924 | "h": 8, 925 | "w": 12, 926 | "x": 12, 927 | "y": 19 928 | }, 929 | "hiddenSeries": false, 930 | "id": 2, 931 | "legend": { 932 | "alignAsTable": true, 933 | "avg": false, 934 | "current": true, 935 | "max": false, 936 | "min": false, 937 | "rightSide": true, 938 | "show": true, 939 | "total": false, 940 | "values": true 941 | }, 942 | "lines": true, 943 | "linewidth": 1, 944 | "nullPointMode": "connected", 945 | "options": { 946 | "dataLinks": [] 947 | }, 948 | "percentage": false, 949 | "pluginVersion": "7.0.3", 950 | "pointradius": 2, 951 | "points": false, 952 | "renderer": "flot", 953 | "seriesOverrides": [], 954 | "spaceLength": 10, 955 | "stack": false, 956 | "steppedLine": false, 957 | "targets": [ 958 | { 959 | "expr": "spark_executor_memoryUsed_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}", 960 | "interval": "", 961 | "intervalFactor": 3, 962 | "legendFormat": "[{{name}}] executor={{executor_id}}", 963 | "refId": "A" 964 | } 965 | ], 966 | "thresholds": [], 967 | "timeFrom": null, 968 | "timeRegions": [], 969 | "timeShift": null, 970 | "title": "Executor Memory Used", 971 | "tooltip": { 972 | "shared": true, 973 | "sort": 2, 974 | "value_type": "individual" 975 | }, 976 | "type": "graph", 977 | "xaxis": { 978 | "buckets": null, 979 | "mode": "time", 980 | "name": null, 981 | "show": true, 982 | "values": [] 983 | }, 984 | "yaxes": [ 985 | { 986 | "format": "bytes", 987 | "label": null, 988 | "logBase": 1, 989 | "max": null, 990 | "min": null, 991 | "show": true 992 | }, 993 | { 994 | "format": "short", 995 | "label": null, 996 | "logBase": 1, 997 | "max": null, 998 | "min": null, 999 | "show": false 1000 | } 1001 | ], 1002 | "yaxis": { 1003 | "align": false, 1004 | "alignLevel": null 1005 | } 1006 | }, 1007 | { 1008 | "aliasColors": {}, 1009 | "bars": false, 1010 | "dashLength": 10, 1011 | "dashes": false, 1012 | "datasource": "$Datasource", 1013 | "fieldConfig": { 1014 | "defaults": { 1015 | "custom": {} 1016 | }, 1017 | "overrides": [] 1018 | }, 1019 | "fill": 2, 1020 | "fillGradient": 0, 1021 | "gridPos": { 1022 | "h": 8, 1023 | "w": 12, 1024 | "x": 0, 1025 | "y": 27 1026 | }, 1027 | "hiddenSeries": false, 1028 | "id": 16, 1029 | "legend": { 1030 | "alignAsTable": true, 1031 | "avg": false, 1032 | "current": true, 1033 | "max": false, 1034 | "min": false, 1035 | "rightSide": true, 1036 | "show": false, 1037 | "total": false, 1038 | "values": true 1039 | }, 1040 | "lines": true, 1041 | "linewidth": 1, 1042 | "nullPointMode": "null", 1043 | "options": { 1044 | "dataLinks": [] 1045 | }, 1046 | "percentage": false, 1047 | "pointradius": 2, 1048 | "points": false, 1049 | "renderer": "flot", 1050 | "seriesOverrides": [], 1051 | "spaceLength": 10, 1052 | "stack": false, 1053 | "steppedLine": false, 1054 | "targets": [ 1055 | { 1056 | "expr": "sum(\n rate(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\", executor_id!=\"driver\"}[2m])) \nby (name, application_name)", 1057 | "instant": false, 1058 | "interval": "", 1059 | "intervalFactor": 2, 1060 | "legendFormat": "[{{name}}]", 1061 | "refId": "A" 1062 | } 1063 | ], 1064 | "thresholds": [], 1065 | "timeFrom": null, 1066 | "timeRegions": [], 1067 | "timeShift": null, 1068 | "title": "Total Duration / Seconds", 1069 | "tooltip": { 1070 | "shared": true, 1071 | "sort": 0, 1072 | "value_type": "individual" 1073 | }, 1074 | "type": "graph", 1075 | "xaxis": { 1076 | "buckets": null, 1077 | "mode": "time", 1078 | "name": null, 1079 | "show": true, 1080 | "values": [] 1081 | }, 1082 | "yaxes": [ 1083 | { 1084 | "decimals": 2, 1085 | "format": "s", 1086 | "label": null, 1087 | "logBase": 1, 1088 | "max": null, 1089 | "min": null, 1090 | "show": true 1091 | }, 1092 | { 1093 | "format": "short", 1094 | "label": null, 1095 | "logBase": 1, 1096 | "max": null, 1097 | "min": null, 1098 | "show": false 1099 | } 1100 | ], 1101 | "yaxis": { 1102 | "align": false, 1103 | "alignLevel": null 1104 | } 1105 | }, 1106 | { 1107 | "aliasColors": {}, 1108 | "bars": false, 1109 | "dashLength": 10, 1110 | "dashes": false, 1111 | "datasource": "$Datasource", 1112 | "fieldConfig": { 1113 | "defaults": { 1114 | "custom": {} 1115 | }, 1116 | "overrides": [] 1117 | }, 1118 | "fill": 1, 1119 | "fillGradient": 0, 1120 | "gridPos": { 1121 | "h": 8, 1122 | "w": 12, 1123 | "x": 12, 1124 | "y": 27 1125 | }, 1126 | "hiddenSeries": false, 1127 | "id": 11, 1128 | "legend": { 1129 | "avg": false, 1130 | "current": false, 1131 | "max": false, 1132 | "min": false, 1133 | "show": true, 1134 | "total": false, 1135 | "values": false 1136 | }, 1137 | "lines": true, 1138 | "linewidth": 1, 1139 | "nullPointMode": "null", 1140 | "options": { 1141 | "dataLinks": [] 1142 | }, 1143 | "percentage": false, 1144 | "pointradius": 2, 1145 | "points": false, 1146 | "renderer": "flot", 1147 | "seriesOverrides": [], 1148 | "spaceLength": 10, 1149 | "stack": false, 1150 | "steppedLine": false, 1151 | "targets": [ 1152 | { 1153 | "expr": "rate(spark_executor_totalGCTime_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}[1m])", 1154 | "interval": "", 1155 | "legendFormat": "app={{application_name}} executor={{executor_id}}", 1156 | "refId": "A" 1157 | } 1158 | ], 1159 | "thresholds": [], 1160 | "timeFrom": null, 1161 | "timeRegions": [], 1162 | "timeShift": null, 1163 | "title": "Executor GC time", 1164 | "tooltip": { 1165 | "shared": true, 1166 | "sort": 0, 1167 | "value_type": "individual" 1168 | }, 1169 | "type": "graph", 1170 | "xaxis": { 1171 | "buckets": null, 1172 | "mode": "time", 1173 | "name": null, 1174 | "show": true, 1175 | "values": [] 1176 | }, 1177 | "yaxes": [ 1178 | { 1179 | "format": "s", 1180 | "label": null, 1181 | "logBase": 1, 1182 | "max": null, 1183 | "min": null, 1184 | "show": true 1185 | }, 1186 | { 1187 | "format": "short", 1188 | "label": null, 1189 | "logBase": 1, 1190 | "max": null, 1191 | "min": null, 1192 | "show": true 1193 | } 1194 | ], 1195 | "yaxis": { 1196 | "align": false, 1197 | "alignLevel": null 1198 | } 1199 | }, 1200 | { 1201 | "aliasColors": {}, 1202 | "bars": false, 1203 | "dashLength": 10, 1204 | "dashes": false, 1205 | "datasource": "$Datasource", 1206 | "fieldConfig": { 1207 | "defaults": { 1208 | "custom": {}, 1209 | "mappings": [], 1210 | "thresholds": { 1211 | "mode": "absolute", 1212 | "steps": [ 1213 | { 1214 | "color": "green", 1215 | "value": null 1216 | }, 1217 | { 1218 | "color": "red", 1219 | "value": 80 1220 | } 1221 | ] 1222 | } 1223 | }, 1224 | "overrides": [] 1225 | }, 1226 | "fill": 10, 1227 | "fillGradient": 0, 1228 | "gridPos": { 1229 | "h": 12, 1230 | "w": 24, 1231 | "x": 0, 1232 | "y": 35 1233 | }, 1234 | "hiddenSeries": false, 1235 | "id": 4, 1236 | "legend": { 1237 | "avg": false, 1238 | "current": false, 1239 | "max": false, 1240 | "min": false, 1241 | "show": true, 1242 | "total": false, 1243 | "values": false 1244 | }, 1245 | "lines": true, 1246 | "linewidth": 0, 1247 | "nullPointMode": "null", 1248 | "options": { 1249 | "dataLinks": [] 1250 | }, 1251 | "percentage": false, 1252 | "pluginVersion": "7.0.3", 1253 | "pointradius": 2, 1254 | "points": false, 1255 | "renderer": "flot", 1256 | "seriesOverrides": [], 1257 | "spaceLength": 10, 1258 | "stack": true, 1259 | "steppedLine": true, 1260 | "targets": [ 1261 | { 1262 | "expr": "spark_executor_activeTasks{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}", 1263 | "format": "time_series", 1264 | "instant": false, 1265 | "interval": "", 1266 | "legendFormat": "executor={{executor_id}}", 1267 | "refId": "A" 1268 | } 1269 | ], 1270 | "thresholds": [], 1271 | "timeFrom": null, 1272 | "timeRegions": [], 1273 | "timeShift": null, 1274 | "title": "Active Tasks per Executor", 1275 | "tooltip": { 1276 | "shared": true, 1277 | "sort": 0, 1278 | "value_type": "individual" 1279 | }, 1280 | "type": "graph", 1281 | "xaxis": { 1282 | "buckets": null, 1283 | "mode": "time", 1284 | "name": null, 1285 | "show": true, 1286 | "values": [] 1287 | }, 1288 | "yaxes": [ 1289 | { 1290 | "format": "short", 1291 | "label": null, 1292 | "logBase": 1, 1293 | "max": null, 1294 | "min": null, 1295 | "show": true 1296 | }, 1297 | { 1298 | "format": "short", 1299 | "label": null, 1300 | "logBase": 1, 1301 | "max": null, 1302 | "min": null, 1303 | "show": true 1304 | } 1305 | ], 1306 | "yaxis": { 1307 | "align": false, 1308 | "alignLevel": null 1309 | } 1310 | }, 1311 | { 1312 | "aliasColors": {}, 1313 | "bars": false, 1314 | "dashLength": 10, 1315 | "dashes": false, 1316 | "datasource": "$Datasource", 1317 | "description": "", 1318 | "fieldConfig": { 1319 | "defaults": { 1320 | "custom": {} 1321 | }, 1322 | "overrides": [] 1323 | }, 1324 | "fill": 1, 1325 | "fillGradient": 0, 1326 | "gridPos": { 1327 | "h": 8, 1328 | "w": 12, 1329 | "x": 0, 1330 | "y": 47 1331 | }, 1332 | "hiddenSeries": false, 1333 | "id": 9, 1334 | "legend": { 1335 | "avg": false, 1336 | "current": false, 1337 | "max": false, 1338 | "min": false, 1339 | "show": true, 1340 | "total": false, 1341 | "values": false 1342 | }, 1343 | "lines": true, 1344 | "linewidth": 1, 1345 | "nullPointMode": "null", 1346 | "options": { 1347 | "dataLinks": [] 1348 | }, 1349 | "percentage": false, 1350 | "pointradius": 2, 1351 | "points": false, 1352 | "renderer": "flot", 1353 | "seriesOverrides": [], 1354 | "spaceLength": 10, 1355 | "stack": false, 1356 | "steppedLine": false, 1357 | "targets": [ 1358 | { 1359 | "expr": "rate(spark_executor_totalShuffleRead_bytes_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}[5m])", 1360 | "hide": false, 1361 | "interval": "", 1362 | "legendFormat": "executor={{executor_id}}", 1363 | "refId": "A" 1364 | } 1365 | ], 1366 | "thresholds": [], 1367 | "timeFrom": null, 1368 | "timeRegions": [], 1369 | "timeShift": null, 1370 | "title": "Shuffle Bytes Read", 1371 | "tooltip": { 1372 | "shared": true, 1373 | "sort": 1, 1374 | "value_type": "individual" 1375 | }, 1376 | "type": "graph", 1377 | "xaxis": { 1378 | "buckets": null, 1379 | "mode": "time", 1380 | "name": null, 1381 | "show": true, 1382 | "values": [] 1383 | }, 1384 | "yaxes": [ 1385 | { 1386 | "format": "bytes", 1387 | "label": null, 1388 | "logBase": 1, 1389 | "max": null, 1390 | "min": null, 1391 | "show": true 1392 | }, 1393 | { 1394 | "format": "short", 1395 | "label": null, 1396 | "logBase": 1, 1397 | "max": null, 1398 | "min": null, 1399 | "show": false 1400 | } 1401 | ], 1402 | "yaxis": { 1403 | "align": false, 1404 | "alignLevel": null 1405 | } 1406 | }, 1407 | { 1408 | "aliasColors": {}, 1409 | "bars": false, 1410 | "dashLength": 10, 1411 | "dashes": false, 1412 | "datasource": "$Datasource", 1413 | "description": "", 1414 | "fieldConfig": { 1415 | "defaults": { 1416 | "custom": {} 1417 | }, 1418 | "overrides": [] 1419 | }, 1420 | "fill": 1, 1421 | "fillGradient": 0, 1422 | "gridPos": { 1423 | "h": 8, 1424 | "w": 12, 1425 | "x": 12, 1426 | "y": 47 1427 | }, 1428 | "hiddenSeries": false, 1429 | "id": 8, 1430 | "legend": { 1431 | "avg": false, 1432 | "current": false, 1433 | "max": false, 1434 | "min": false, 1435 | "show": true, 1436 | "total": false, 1437 | "values": false 1438 | }, 1439 | "lines": true, 1440 | "linewidth": 1, 1441 | "nullPointMode": "null", 1442 | "options": { 1443 | "dataLinks": [] 1444 | }, 1445 | "percentage": false, 1446 | "pointradius": 2, 1447 | "points": false, 1448 | "renderer": "flot", 1449 | "seriesOverrides": [], 1450 | "spaceLength": 10, 1451 | "stack": false, 1452 | "steppedLine": false, 1453 | "targets": [ 1454 | { 1455 | "expr": "rate(spark_executor_totalShuffleWrite_bytes_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}[5m])", 1456 | "hide": false, 1457 | "interval": "", 1458 | "legendFormat": "executor={{executor_id}}", 1459 | "refId": "A" 1460 | } 1461 | ], 1462 | "thresholds": [], 1463 | "timeFrom": null, 1464 | "timeRegions": [], 1465 | "timeShift": null, 1466 | "title": "Shuffle Bytes Written", 1467 | "tooltip": { 1468 | "shared": true, 1469 | "sort": 1, 1470 | "value_type": "individual" 1471 | }, 1472 | "type": "graph", 1473 | "xaxis": { 1474 | "buckets": null, 1475 | "mode": "time", 1476 | "name": null, 1477 | "show": true, 1478 | "values": [] 1479 | }, 1480 | "yaxes": [ 1481 | { 1482 | "format": "bytes", 1483 | "label": null, 1484 | "logBase": 1, 1485 | "max": null, 1486 | "min": null, 1487 | "show": true 1488 | }, 1489 | { 1490 | "format": "short", 1491 | "label": null, 1492 | "logBase": 1, 1493 | "max": null, 1494 | "min": null, 1495 | "show": false 1496 | } 1497 | ], 1498 | "yaxis": { 1499 | "align": false, 1500 | "alignLevel": null 1501 | } 1502 | }, 1503 | { 1504 | "aliasColors": {}, 1505 | "bars": false, 1506 | "dashLength": 10, 1507 | "dashes": false, 1508 | "datasource": "$Datasource", 1509 | "fieldConfig": { 1510 | "defaults": { 1511 | "custom": {} 1512 | }, 1513 | "overrides": [] 1514 | }, 1515 | "fill": 1, 1516 | "fillGradient": 0, 1517 | "gridPos": { 1518 | "h": 8, 1519 | "w": 12, 1520 | "x": 0, 1521 | "y": 55 1522 | }, 1523 | "hiddenSeries": false, 1524 | "id": 18, 1525 | "legend": { 1526 | "avg": false, 1527 | "current": false, 1528 | "max": false, 1529 | "min": false, 1530 | "show": true, 1531 | "total": false, 1532 | "values": false 1533 | }, 1534 | "lines": true, 1535 | "linewidth": 1, 1536 | "nullPointMode": "connected", 1537 | "options": { 1538 | "dataLinks": [] 1539 | }, 1540 | "percentage": false, 1541 | "pointradius": 2, 1542 | "points": false, 1543 | "renderer": "flot", 1544 | "seriesOverrides": [], 1545 | "spaceLength": 10, 1546 | "stack": false, 1547 | "steppedLine": false, 1548 | "targets": [ 1549 | { 1550 | "expr": "rate(spark_executor_totalInputBytes_bytes_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}[5m])", 1551 | "interval": "", 1552 | "legendFormat": "executor={{executor_id}}", 1553 | "refId": "A" 1554 | } 1555 | ], 1556 | "thresholds": [], 1557 | "timeFrom": null, 1558 | "timeRegions": [], 1559 | "timeShift": null, 1560 | "title": "Total Input Bytes/Seconds", 1561 | "tooltip": { 1562 | "shared": true, 1563 | "sort": 0, 1564 | "value_type": "individual" 1565 | }, 1566 | "type": "graph", 1567 | "xaxis": { 1568 | "buckets": null, 1569 | "mode": "time", 1570 | "name": null, 1571 | "show": true, 1572 | "values": [] 1573 | }, 1574 | "yaxes": [ 1575 | { 1576 | "format": "Bps", 1577 | "label": null, 1578 | "logBase": 1, 1579 | "max": null, 1580 | "min": null, 1581 | "show": true 1582 | }, 1583 | { 1584 | "format": "short", 1585 | "label": null, 1586 | "logBase": 1, 1587 | "max": null, 1588 | "min": null, 1589 | "show": true 1590 | } 1591 | ], 1592 | "yaxis": { 1593 | "align": false, 1594 | "alignLevel": null 1595 | } 1596 | }, 1597 | { 1598 | "aliasColors": {}, 1599 | "bars": false, 1600 | "dashLength": 10, 1601 | "dashes": false, 1602 | "datasource": "$Datasource", 1603 | "description": "", 1604 | "fieldConfig": { 1605 | "defaults": { 1606 | "custom": {} 1607 | }, 1608 | "overrides": [] 1609 | }, 1610 | "fill": 10, 1611 | "fillGradient": 0, 1612 | "gridPos": { 1613 | "h": 8, 1614 | "w": 12, 1615 | "x": 12, 1616 | "y": 55 1617 | }, 1618 | "hiddenSeries": false, 1619 | "id": 7, 1620 | "legend": { 1621 | "avg": false, 1622 | "current": false, 1623 | "max": false, 1624 | "min": false, 1625 | "show": true, 1626 | "total": false, 1627 | "values": false 1628 | }, 1629 | "lines": true, 1630 | "linewidth": 0, 1631 | "nullPointMode": "null", 1632 | "options": { 1633 | "dataLinks": [] 1634 | }, 1635 | "percentage": false, 1636 | "pointradius": 2, 1637 | "points": false, 1638 | "renderer": "flot", 1639 | "seriesOverrides": [], 1640 | "spaceLength": 10, 1641 | "stack": false, 1642 | "steppedLine": false, 1643 | "targets": [ 1644 | { 1645 | "expr": "sum(rate(spark_executor_totalShuffleWrite_bytes_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}[5m]))", 1646 | "hide": false, 1647 | "interval": "", 1648 | "intervalFactor": 1, 1649 | "legendFormat": "Write", 1650 | "refId": "A" 1651 | }, 1652 | { 1653 | "expr": "sum(rate(spark_executor_totalShuffleRead_bytes_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", application_id=\"$application_id\"}[5m]))", 1654 | "interval": "", 1655 | "intervalFactor": 1, 1656 | "legendFormat": "Read", 1657 | "refId": "B" 1658 | } 1659 | ], 1660 | "thresholds": [], 1661 | "timeFrom": null, 1662 | "timeRegions": [], 1663 | "timeShift": null, 1664 | "title": "Total Shuffle IO", 1665 | "tooltip": { 1666 | "shared": true, 1667 | "sort": 1, 1668 | "value_type": "individual" 1669 | }, 1670 | "type": "graph", 1671 | "xaxis": { 1672 | "buckets": null, 1673 | "mode": "time", 1674 | "name": null, 1675 | "show": true, 1676 | "values": [] 1677 | }, 1678 | "yaxes": [ 1679 | { 1680 | "format": "bytes", 1681 | "label": null, 1682 | "logBase": 1, 1683 | "max": null, 1684 | "min": null, 1685 | "show": true 1686 | }, 1687 | { 1688 | "format": "short", 1689 | "label": null, 1690 | "logBase": 1, 1691 | "max": null, 1692 | "min": null, 1693 | "show": false 1694 | } 1695 | ], 1696 | "yaxis": { 1697 | "align": false, 1698 | "alignLevel": null 1699 | } 1700 | } 1701 | ], 1702 | "refresh": "30s", 1703 | "schemaVersion": 25, 1704 | "style": "dark", 1705 | "tags": [ 1706 | "Synapse", 1707 | "Spark" 1708 | ], 1709 | "templating": { 1710 | "list": [ 1711 | { 1712 | "current": { 1713 | "selected": false, 1714 | "text": "Prometheus", 1715 | "value": "Prometheus" 1716 | }, 1717 | "hide": 2, 1718 | "includeAll": false, 1719 | "label": null, 1720 | "multi": false, 1721 | "name": "Datasource", 1722 | "options": [], 1723 | "query": "prometheus", 1724 | "queryValue": "", 1725 | "refresh": 1, 1726 | "regex": "", 1727 | "skipUrlSync": false, 1728 | "type": "datasource" 1729 | }, 1730 | { 1731 | "allValue": null, 1732 | "current": { 1733 | "selected": false, 1734 | "text": "", 1735 | "value": "" 1736 | }, 1737 | "datasource": "$Datasource", 1738 | "definition": "label_values(spark_executor_totalDuration_seconds_total{}, workspace_name)", 1739 | "hide": 0, 1740 | "includeAll": false, 1741 | "label": "Workspace", 1742 | "multi": false, 1743 | "name": "workspace_name", 1744 | "options": [], 1745 | "query": "label_values(spark_executor_totalDuration_seconds_total{}, workspace_name)", 1746 | "refresh": 2, 1747 | "regex": "", 1748 | "skipUrlSync": false, 1749 | "sort": 0, 1750 | "tagValuesQuery": "", 1751 | "tags": [], 1752 | "tagsQuery": "", 1753 | "type": "query", 1754 | "useTags": false 1755 | }, 1756 | { 1757 | "allValue": null, 1758 | "current": { 1759 | "selected": false, 1760 | "text": "", 1761 | "value": "" 1762 | }, 1763 | "datasource": "$Datasource", 1764 | "definition": "label_values(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\"}, spark_pool_name)", 1765 | "hide": 0, 1766 | "includeAll": false, 1767 | "label": "Spark pool", 1768 | "multi": false, 1769 | "name": "spark_pool_name", 1770 | "options": [], 1771 | "query": "label_values(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\"}, spark_pool_name)", 1772 | "refresh": 2, 1773 | "regex": "", 1774 | "skipUrlSync": false, 1775 | "sort": 5, 1776 | "tagValuesQuery": "", 1777 | "tags": [], 1778 | "tagsQuery": "", 1779 | "type": "query", 1780 | "useTags": false 1781 | }, 1782 | { 1783 | "allValue": null, 1784 | "current": { 1785 | "selected": false, 1786 | "text": "", 1787 | "value": "" 1788 | }, 1789 | "datasource": "$Datasource", 1790 | "definition": "label_values(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}, name)", 1791 | "hide": 0, 1792 | "includeAll": false, 1793 | "label": "Name", 1794 | "multi": false, 1795 | "name": "name", 1796 | "options": [], 1797 | "query": "label_values(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}, name)", 1798 | "refresh": 2, 1799 | "regex": "", 1800 | "skipUrlSync": false, 1801 | "sort": 2, 1802 | "tagValuesQuery": "", 1803 | "tags": [], 1804 | "tagsQuery": "", 1805 | "type": "query", 1806 | "useTags": false 1807 | }, 1808 | { 1809 | "allValue": null, 1810 | "current": { 1811 | "selected": false, 1812 | "text": "", 1813 | "value": "" 1814 | }, 1815 | "datasource": "$Datasource", 1816 | "definition": "label_values(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", name=\"$name\"}, application_name)", 1817 | "hide": 2, 1818 | "includeAll": false, 1819 | "label": "Application Name", 1820 | "multi": false, 1821 | "name": "application_name", 1822 | "options": [], 1823 | "query": "label_values(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", name=\"$name\"}, application_name)", 1824 | "refresh": 2, 1825 | "regex": "", 1826 | "skipUrlSync": false, 1827 | "sort": 1, 1828 | "tagValuesQuery": "", 1829 | "tags": [], 1830 | "tagsQuery": "", 1831 | "type": "query", 1832 | "useTags": false 1833 | }, 1834 | { 1835 | "allValue": null, 1836 | "current": { 1837 | "selected": false, 1838 | "text": "", 1839 | "value": "" 1840 | }, 1841 | "datasource": "$Datasource", 1842 | "definition": "label_values(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", name=\"$name\"}, application_id)", 1843 | "hide": 2, 1844 | "includeAll": false, 1845 | "label": "Application ID", 1846 | "multi": false, 1847 | "name": "application_id", 1848 | "options": [], 1849 | "query": "label_values(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", name=\"$name\"}, application_id)", 1850 | "refresh": 2, 1851 | "regex": "", 1852 | "skipUrlSync": false, 1853 | "sort": 0, 1854 | "tagValuesQuery": "", 1855 | "tags": [], 1856 | "tagsQuery": "", 1857 | "type": "query", 1858 | "useTags": false 1859 | }, 1860 | { 1861 | "allValue": null, 1862 | "current": { 1863 | "selected": false, 1864 | "text": "", 1865 | "value": "" 1866 | }, 1867 | "datasource": "$Datasource", 1868 | "definition": "label_values(synapse_connector_application_info{workspace_name=\"$workspace_name\"}, resource_group)", 1869 | "hide": 2, 1870 | "includeAll": false, 1871 | "label": null, 1872 | "multi": false, 1873 | "name": "resource_group", 1874 | "options": [], 1875 | "query": "label_values(synapse_connector_application_info{workspace_name=\"$workspace_name\"}, resource_group)", 1876 | "refresh": 2, 1877 | "regex": "", 1878 | "skipUrlSync": false, 1879 | "sort": 0, 1880 | "tagValuesQuery": "", 1881 | "tags": [], 1882 | "tagsQuery": "", 1883 | "type": "query", 1884 | "useTags": false 1885 | }, 1886 | { 1887 | "allValue": null, 1888 | "current": { 1889 | "selected": false, 1890 | "text": "", 1891 | "value": "" 1892 | }, 1893 | "datasource": "$Datasource", 1894 | "definition": "label_values(synapse_connector_application_info{workspace_name=\"$workspace_name\"}, subscription_id)", 1895 | "hide": 2, 1896 | "includeAll": false, 1897 | "label": null, 1898 | "multi": false, 1899 | "name": "subscription_id", 1900 | "options": [], 1901 | "query": "label_values(synapse_connector_application_info{workspace_name=\"$workspace_name\"}, subscription_id)", 1902 | "refresh": 2, 1903 | "regex": "", 1904 | "skipUrlSync": false, 1905 | "sort": 0, 1906 | "tagValuesQuery": "", 1907 | "tags": [], 1908 | "tagsQuery": "", 1909 | "type": "query", 1910 | "useTags": false 1911 | }, 1912 | { 1913 | "allValue": null, 1914 | "current": { 1915 | "selected": false, 1916 | "text": "", 1917 | "value": "" 1918 | }, 1919 | "datasource": "$Datasource", 1920 | "definition": "label_values(synapse_connector_application_info{workspace_name=\"$workspace_name\", application_id=\"$application_id\"}, livy_id)", 1921 | "hide": 2, 1922 | "includeAll": false, 1923 | "label": null, 1924 | "multi": false, 1925 | "name": "livy_id", 1926 | "options": [], 1927 | "query": "label_values(synapse_connector_application_info{workspace_name=\"$workspace_name\", application_id=\"$application_id\"}, livy_id)", 1928 | "refresh": 2, 1929 | "regex": "", 1930 | "skipUrlSync": false, 1931 | "sort": 0, 1932 | "tagValuesQuery": "", 1933 | "tags": [], 1934 | "tagsQuery": "", 1935 | "type": "query", 1936 | "useTags": false 1937 | } 1938 | ] 1939 | }, 1940 | "time": { 1941 | "from": "now-1h", 1942 | "to": "now" 1943 | }, 1944 | "timepicker": { 1945 | "refresh_intervals": [ 1946 | "30s", 1947 | "1m", 1948 | "5m", 1949 | "15m", 1950 | "30m", 1951 | "1h", 1952 | "2h", 1953 | "1d" 1954 | ] 1955 | }, 1956 | "timezone": "browser", 1957 | "title": "Synapse Workspace / Spark Application", 1958 | "uid": "K7Oq5VSGz", 1959 | "version": 1 1960 | } -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/grafana_dashboards/Synapse_Workspace_Sparkpools.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "editable": false, 16 | "gnetId": null, 17 | "graphTooltip": 1, 18 | "id": 25, 19 | "iteration": 1597997851529, 20 | "links": [ 21 | { 22 | "$$hashKey": "object:325", 23 | "icon": "cloud", 24 | "tags": [], 25 | "targetBlank": true, 26 | "title": "Go and Manage Applications", 27 | "type": "link", 28 | "url": "https://web.azuresynapse.net/monitoring/sparkapplication?workspace=%2Fsubscriptions%2F${subscription_id}%2FresourceGroups%2F${resource_group}%2Fproviders%2FMicrosoft.Synapse%2Fworkspaces%2F${workspace_name}&&sparkPoolName=${spark_pool_name}" 29 | }, 30 | { 31 | "$$hashKey": "object:326", 32 | "icon": "cloud", 33 | "tags": [], 34 | "targetBlank": true, 35 | "title": "Manage Spark Pools", 36 | "type": "link", 37 | "url": "https://web.azuresynapse.net/management/apachesparkpools?workspace=%2Fsubscriptions%2F${subscription_id}%2FresourceGroups%2F${resource_group}%2Fproviders%2FMicrosoft.Synapse%2Fworkspaces%2F${workspace_name}&&sparkPoolName=${spark_pool_name}" 38 | } 39 | ], 40 | "panels": [ 41 | { 42 | "collapsed": false, 43 | "datasource": null, 44 | "gridPos": { 45 | "h": 1, 46 | "w": 24, 47 | "x": 0, 48 | "y": 0 49 | }, 50 | "id": 31, 51 | "panels": [], 52 | "title": "Summary", 53 | "type": "row" 54 | }, 55 | { 56 | "datasource": "$Datasource", 57 | "fieldConfig": { 58 | "defaults": { 59 | "custom": {}, 60 | "mappings": [], 61 | "thresholds": { 62 | "mode": "absolute", 63 | "steps": [ 64 | { 65 | "color": "green", 66 | "value": null 67 | } 68 | ] 69 | } 70 | }, 71 | "overrides": [] 72 | }, 73 | "gridPos": { 74 | "h": 5, 75 | "w": 3, 76 | "x": 0, 77 | "y": 1 78 | }, 79 | "id": 42, 80 | "options": { 81 | "colorMode": "value", 82 | "graphMode": "area", 83 | "justifyMode": "auto", 84 | "orientation": "auto", 85 | "reduceOptions": { 86 | "calcs": [ 87 | "lastNotNull" 88 | ], 89 | "fields": "", 90 | "values": false 91 | } 92 | }, 93 | "pluginVersion": "7.0.3", 94 | "targets": [ 95 | { 96 | "expr": "count(\n sum(spark_executor_memoryUsed_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\"})\n by (spark_pool_name, executor_id, application_id, name, application_name))", 97 | "format": "time_series", 98 | "instant": false, 99 | "interval": "", 100 | "legendFormat": "app={{application_name}} executor={{executor_id}}", 101 | "refId": "A" 102 | } 103 | ], 104 | "timeFrom": null, 105 | "timeShift": null, 106 | "title": "Executors", 107 | "type": "stat" 108 | }, 109 | { 110 | "datasource": "$Datasource", 111 | "fieldConfig": { 112 | "defaults": { 113 | "custom": {}, 114 | "decimals": 1, 115 | "mappings": [], 116 | "thresholds": { 117 | "mode": "absolute", 118 | "steps": [ 119 | { 120 | "color": "green", 121 | "value": null 122 | }, 123 | { 124 | "color": "red", 125 | "value": 80 126 | } 127 | ] 128 | }, 129 | "unit": "percentunit" 130 | }, 131 | "overrides": [] 132 | }, 133 | "gridPos": { 134 | "h": 5, 135 | "w": 3, 136 | "x": 3, 137 | "y": 1 138 | }, 139 | "id": 24, 140 | "options": { 141 | "orientation": "auto", 142 | "reduceOptions": { 143 | "calcs": [ 144 | "lastNotNull" 145 | ], 146 | "fields": "", 147 | "values": false 148 | }, 149 | "showThresholdLabels": false, 150 | "showThresholdMarkers": true 151 | }, 152 | "pluginVersion": "7.0.3", 153 | "targets": [ 154 | { 155 | "expr": "sum(spark_executor_memoryUsed_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\"}) by (workspace_name, spark_pool_name)\n/\nsum(spark_executor_maxMemory_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\"}) by (workspace_name, spark_pool_name)", 156 | "instant": false, 157 | "interval": "", 158 | "legendFormat": "", 159 | "refId": "A" 160 | } 161 | ], 162 | "timeFrom": null, 163 | "timeShift": null, 164 | "title": "Executor Storage Memory", 165 | "type": "gauge" 166 | }, 167 | { 168 | "datasource": "$Datasource", 169 | "fieldConfig": { 170 | "defaults": { 171 | "custom": {}, 172 | "decimals": 1, 173 | "mappings": [], 174 | "thresholds": { 175 | "mode": "percentage", 176 | "steps": [ 177 | { 178 | "color": "green", 179 | "value": null 180 | }, 181 | { 182 | "color": "#EAB839", 183 | "value": 80 184 | }, 185 | { 186 | "color": "red", 187 | "value": 100 188 | } 189 | ] 190 | }, 191 | "unit": "percentunit" 192 | }, 193 | "overrides": [] 194 | }, 195 | "gridPos": { 196 | "h": 5, 197 | "w": 3, 198 | "x": 6, 199 | "y": 1 200 | }, 201 | "id": 25, 202 | "options": { 203 | "orientation": "auto", 204 | "reduceOptions": { 205 | "calcs": [ 206 | "lastNotNull" 207 | ], 208 | "fields": "", 209 | "values": false 210 | }, 211 | "showThresholdLabels": false, 212 | "showThresholdMarkers": true 213 | }, 214 | "pluginVersion": "7.0.3", 215 | "targets": [ 216 | { 217 | "expr": "max(spark_executor_memoryUsed_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id=\"driver\"}) by (workspace_name, spark_pool_name)\n/ \nmax(spark_executor_maxMemory_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id=\"driver\"}) by (workspace_name, spark_pool_name)", 218 | "instant": false, 219 | "interval": "", 220 | "legendFormat": "", 221 | "refId": "A" 222 | } 223 | ], 224 | "timeFrom": null, 225 | "timeShift": null, 226 | "title": "Driver Storage Memory", 227 | "type": "gauge" 228 | }, 229 | { 230 | "datasource": "$Datasource", 231 | "fieldConfig": { 232 | "defaults": { 233 | "custom": {}, 234 | "mappings": [], 235 | "thresholds": { 236 | "mode": "absolute", 237 | "steps": [ 238 | { 239 | "color": "green", 240 | "value": null 241 | }, 242 | { 243 | "color": "red", 244 | "value": 250 245 | } 246 | ] 247 | }, 248 | "unit": "s" 249 | }, 250 | "overrides": [] 251 | }, 252 | "gridPos": { 253 | "h": 5, 254 | "w": 3, 255 | "x": 9, 256 | "y": 1 257 | }, 258 | "id": 26, 259 | "options": { 260 | "colorMode": "value", 261 | "graphMode": "area", 262 | "justifyMode": "auto", 263 | "orientation": "auto", 264 | "reduceOptions": { 265 | "calcs": [ 266 | "max" 267 | ], 268 | "fields": "", 269 | "values": false 270 | } 271 | }, 272 | "pluginVersion": "7.0.3", 273 | "targets": [ 274 | { 275 | "expr": "max(rate(spark_executor_totalGCTime_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}[1m]))", 276 | "instant": false, 277 | "interval": "", 278 | "legendFormat": "", 279 | "refId": "A" 280 | } 281 | ], 282 | "timeFrom": null, 283 | "timeShift": null, 284 | "title": "Max Executor GC time", 285 | "type": "stat" 286 | }, 287 | { 288 | "datasource": "$Datasource", 289 | "fieldConfig": { 290 | "defaults": { 291 | "custom": {}, 292 | "mappings": [], 293 | "thresholds": { 294 | "mode": "absolute", 295 | "steps": [ 296 | { 297 | "color": "green", 298 | "value": null 299 | }, 300 | { 301 | "color": "#EAB839", 302 | "value": 4 303 | }, 304 | { 305 | "color": "red", 306 | "value": 6 307 | } 308 | ] 309 | } 310 | }, 311 | "overrides": [] 312 | }, 313 | "gridPos": { 314 | "h": 5, 315 | "w": 3, 316 | "x": 12, 317 | "y": 1 318 | }, 319 | "id": 27, 320 | "options": { 321 | "orientation": "auto", 322 | "reduceOptions": { 323 | "calcs": [ 324 | "max" 325 | ], 326 | "fields": "", 327 | "values": false 328 | }, 329 | "showThresholdLabels": false, 330 | "showThresholdMarkers": true 331 | }, 332 | "pluginVersion": "7.0.3", 333 | "targets": [ 334 | { 335 | "expr": "max(spark_executor_activeTasks{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"})", 336 | "format": "time_series", 337 | "interval": "", 338 | "legendFormat": "app={{application_name}} executor={{executor_id}}", 339 | "refId": "A" 340 | } 341 | ], 342 | "timeFrom": null, 343 | "timeShift": null, 344 | "title": "Max Tasks on Executor", 345 | "type": "gauge" 346 | }, 347 | { 348 | "datasource": "$Datasource", 349 | "fieldConfig": { 350 | "defaults": { 351 | "custom": {}, 352 | "mappings": [], 353 | "thresholds": { 354 | "mode": "absolute", 355 | "steps": [ 356 | { 357 | "color": "green", 358 | "value": null 359 | }, 360 | { 361 | "color": "#EAB839", 362 | "value": 10 363 | }, 364 | { 365 | "color": "red", 366 | "value": 20 367 | } 368 | ] 369 | } 370 | }, 371 | "overrides": [] 372 | }, 373 | "gridPos": { 374 | "h": 5, 375 | "w": 3, 376 | "x": 15, 377 | "y": 1 378 | }, 379 | "id": 36, 380 | "options": { 381 | "colorMode": "value", 382 | "graphMode": "area", 383 | "justifyMode": "auto", 384 | "orientation": "auto", 385 | "reduceOptions": { 386 | "calcs": [ 387 | "max" 388 | ], 389 | "fields": "", 390 | "values": false 391 | } 392 | }, 393 | "pluginVersion": "7.0.3", 394 | "targets": [ 395 | { 396 | "expr": "sum(spark_executor_failedTasks_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\"})", 397 | "format": "time_series", 398 | "interval": "", 399 | "legendFormat": "app={{application_name}} executor={{executor_id}}", 400 | "refId": "A" 401 | } 402 | ], 403 | "timeFrom": null, 404 | "timeShift": null, 405 | "title": "Total Failed Tasks", 406 | "type": "stat" 407 | }, 408 | { 409 | "datasource": "$Datasource", 410 | "fieldConfig": { 411 | "defaults": { 412 | "custom": {}, 413 | "mappings": [], 414 | "thresholds": { 415 | "mode": "absolute", 416 | "steps": [ 417 | { 418 | "color": "green", 419 | "value": null 420 | } 421 | ] 422 | } 423 | }, 424 | "overrides": [] 425 | }, 426 | "gridPos": { 427 | "h": 5, 428 | "w": 3, 429 | "x": 18, 430 | "y": 1 431 | }, 432 | "id": 38, 433 | "options": { 434 | "colorMode": "value", 435 | "graphMode": "area", 436 | "justifyMode": "auto", 437 | "orientation": "auto", 438 | "reduceOptions": { 439 | "calcs": [ 440 | "max" 441 | ], 442 | "fields": "", 443 | "values": false 444 | } 445 | }, 446 | "pluginVersion": "7.0.3", 447 | "targets": [ 448 | { 449 | "expr": "sum(spark_executor_completedTasks_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\"})", 450 | "interval": "", 451 | "legendFormat": "", 452 | "refId": "B" 453 | } 454 | ], 455 | "timeFrom": null, 456 | "timeShift": null, 457 | "title": "Total Completed Tasks", 458 | "type": "stat" 459 | }, 460 | { 461 | "collapsed": false, 462 | "datasource": null, 463 | "gridPos": { 464 | "h": 1, 465 | "w": 24, 466 | "x": 0, 467 | "y": 6 468 | }, 469 | "id": 33, 470 | "panels": [], 471 | "title": "Applications", 472 | "type": "row" 473 | }, 474 | { 475 | "datasource": "$Datasource", 476 | "description": "", 477 | "fieldConfig": { 478 | "defaults": { 479 | "custom": { 480 | "align": null, 481 | "displayMode": "auto" 482 | }, 483 | "mappings": [], 484 | "thresholds": { 485 | "mode": "absolute", 486 | "steps": [ 487 | { 488 | "color": "green", 489 | "value": null 490 | }, 491 | { 492 | "color": "red", 493 | "value": 80 494 | } 495 | ] 496 | }, 497 | "unit": "none" 498 | }, 499 | "overrides": [ 500 | { 501 | "matcher": { 502 | "id": "byName", 503 | "options": "Total Duration" 504 | }, 505 | "properties": [ 506 | { 507 | "id": "unit", 508 | "value": "s" 509 | } 510 | ] 511 | }, 512 | { 513 | "matcher": { 514 | "id": "byName", 515 | "options": "Name" 516 | }, 517 | "properties": [ 518 | { 519 | "id": "links", 520 | "value": [ 521 | { 522 | "targetBlank": true, 523 | "title": "Drill into Application", 524 | "url": "/d/K7Oq5VSGz/synapse-workspace-spark-application?var-workspace_name=${__data.fields[workspace_name]}&var-spark_pool_name=${__data.fields[spark_pool_name]}&var-name=${__data.fields[name]}&${__url_time_range}" 525 | } 526 | ] 527 | } 528 | ] 529 | } 530 | ] 531 | }, 532 | "gridPos": { 533 | "h": 8, 534 | "w": 24, 535 | "x": 0, 536 | "y": 7 537 | }, 538 | "id": 20, 539 | "options": { 540 | "showHeader": true, 541 | "sortBy": [] 542 | }, 543 | "pluginVersion": "7.0.3", 544 | "targets": [ 545 | { 546 | "expr": "max_over_time(\r\n max(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"})\n by (workspace_name, spark_pool_name, application_id, name, application_name)[$__range:1m]\r\n)", 547 | "format": "table", 548 | "instant": true, 549 | "interval": "", 550 | "legendFormat": "", 551 | "refId": "A" 552 | } 553 | ], 554 | "timeFrom": null, 555 | "timeShift": null, 556 | "title": "Applications Status", 557 | "transformations": [ 558 | { 559 | "id": "organize", 560 | "options": { 561 | "excludeByName": { 562 | "Time": true 563 | }, 564 | "indexByName": { 565 | "Time": 0, 566 | "Value": 6, 567 | "application_id": 4, 568 | "application_name": 5, 569 | "name": 1, 570 | "spark_pool_name": 3, 571 | "workspace_name": 2 572 | }, 573 | "renameByName": { 574 | "Value": "Total Duration", 575 | "application_id": "Application ID", 576 | "application_name": "Application Name", 577 | "executor_id": "Executor ID", 578 | "name": "Name", 579 | "spark_pool_name": "Spark Pool", 580 | "workspace_name": "Workspace" 581 | } 582 | } 583 | } 584 | ], 585 | "type": "table" 586 | }, 587 | { 588 | "collapsed": false, 589 | "datasource": null, 590 | "gridPos": { 591 | "h": 1, 592 | "w": 24, 593 | "x": 0, 594 | "y": 15 595 | }, 596 | "id": 22, 597 | "panels": [], 598 | "title": "Details", 599 | "type": "row" 600 | }, 601 | { 602 | "aliasColors": {}, 603 | "bars": false, 604 | "dashLength": 10, 605 | "dashes": false, 606 | "datasource": "$Datasource", 607 | "fieldConfig": { 608 | "defaults": { 609 | "custom": {} 610 | }, 611 | "overrides": [] 612 | }, 613 | "fill": 2, 614 | "fillGradient": 0, 615 | "gridPos": { 616 | "h": 8, 617 | "w": 12, 618 | "x": 0, 619 | "y": 16 620 | }, 621 | "hiddenSeries": false, 622 | "id": 16, 623 | "legend": { 624 | "alignAsTable": true, 625 | "avg": false, 626 | "current": true, 627 | "max": false, 628 | "min": false, 629 | "rightSide": true, 630 | "show": false, 631 | "total": false, 632 | "values": true 633 | }, 634 | "lines": true, 635 | "linewidth": 1, 636 | "nullPointMode": "null", 637 | "options": { 638 | "dataLinks": [] 639 | }, 640 | "percentage": false, 641 | "pointradius": 2, 642 | "points": false, 643 | "renderer": "flot", 644 | "seriesOverrides": [], 645 | "spaceLength": 10, 646 | "stack": false, 647 | "steppedLine": false, 648 | "targets": [ 649 | { 650 | "expr": "sum(\n rate(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\", executor_id!=\"driver\"}[2m])) \nby (workspace_name, spark_pool_name, name)", 651 | "instant": false, 652 | "interval": "", 653 | "intervalFactor": 2, 654 | "legendFormat": "{{name}}", 655 | "refId": "A" 656 | } 657 | ], 658 | "thresholds": [], 659 | "timeFrom": null, 660 | "timeRegions": [], 661 | "timeShift": null, 662 | "title": "Total Duration / Seconds", 663 | "tooltip": { 664 | "shared": false, 665 | "sort": 0, 666 | "value_type": "individual" 667 | }, 668 | "type": "graph", 669 | "xaxis": { 670 | "buckets": null, 671 | "mode": "time", 672 | "name": null, 673 | "show": true, 674 | "values": [] 675 | }, 676 | "yaxes": [ 677 | { 678 | "decimals": 2, 679 | "format": "s", 680 | "label": null, 681 | "logBase": 1, 682 | "max": null, 683 | "min": null, 684 | "show": true 685 | }, 686 | { 687 | "format": "short", 688 | "label": null, 689 | "logBase": 1, 690 | "max": null, 691 | "min": null, 692 | "show": false 693 | } 694 | ], 695 | "yaxis": { 696 | "align": false, 697 | "alignLevel": null 698 | } 699 | }, 700 | { 701 | "aliasColors": {}, 702 | "bars": false, 703 | "dashLength": 10, 704 | "dashes": false, 705 | "datasource": "$Datasource", 706 | "description": "", 707 | "fieldConfig": { 708 | "defaults": { 709 | "custom": {}, 710 | "mappings": [], 711 | "thresholds": { 712 | "mode": "absolute", 713 | "steps": [ 714 | { 715 | "color": "green", 716 | "value": null 717 | }, 718 | { 719 | "color": "red", 720 | "value": 80 721 | } 722 | ] 723 | } 724 | }, 725 | "overrides": [] 726 | }, 727 | "fill": 1, 728 | "fillGradient": 0, 729 | "gridPos": { 730 | "h": 8, 731 | "w": 12, 732 | "x": 12, 733 | "y": 16 734 | }, 735 | "hiddenSeries": false, 736 | "id": 2, 737 | "legend": { 738 | "alignAsTable": true, 739 | "avg": false, 740 | "current": true, 741 | "max": false, 742 | "min": false, 743 | "rightSide": true, 744 | "show": false, 745 | "total": false, 746 | "values": true 747 | }, 748 | "lines": true, 749 | "linewidth": 1, 750 | "nullPointMode": "null", 751 | "options": { 752 | "dataLinks": [] 753 | }, 754 | "percentage": false, 755 | "pluginVersion": "7.0.3", 756 | "pointradius": 2, 757 | "points": false, 758 | "renderer": "flot", 759 | "seriesOverrides": [], 760 | "spaceLength": 10, 761 | "stack": false, 762 | "steppedLine": false, 763 | "targets": [ 764 | { 765 | "expr": "sum (\n spark_executor_memoryUsed_bytes{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}\n) by (workspace_name, spark_pool_name, name, application_name, application_id)", 766 | "interval": "", 767 | "intervalFactor": 3, 768 | "legendFormat": "{{name}}", 769 | "refId": "A" 770 | } 771 | ], 772 | "thresholds": [], 773 | "timeFrom": null, 774 | "timeRegions": [], 775 | "timeShift": null, 776 | "title": "Executor Storage Memory Used", 777 | "tooltip": { 778 | "shared": false, 779 | "sort": 2, 780 | "value_type": "individual" 781 | }, 782 | "type": "graph", 783 | "xaxis": { 784 | "buckets": null, 785 | "mode": "time", 786 | "name": null, 787 | "show": true, 788 | "values": [] 789 | }, 790 | "yaxes": [ 791 | { 792 | "format": "bytes", 793 | "label": null, 794 | "logBase": 1, 795 | "max": null, 796 | "min": null, 797 | "show": true 798 | }, 799 | { 800 | "format": "short", 801 | "label": null, 802 | "logBase": 1, 803 | "max": null, 804 | "min": null, 805 | "show": false 806 | } 807 | ], 808 | "yaxis": { 809 | "align": false, 810 | "alignLevel": null 811 | } 812 | }, 813 | { 814 | "aliasColors": {}, 815 | "bars": false, 816 | "dashLength": 10, 817 | "dashes": false, 818 | "datasource": "$Datasource", 819 | "fieldConfig": { 820 | "defaults": { 821 | "custom": {}, 822 | "mappings": [], 823 | "thresholds": { 824 | "mode": "absolute", 825 | "steps": [ 826 | { 827 | "color": "green", 828 | "value": null 829 | }, 830 | { 831 | "color": "red", 832 | "value": 80 833 | } 834 | ] 835 | } 836 | }, 837 | "overrides": [] 838 | }, 839 | "fill": 10, 840 | "fillGradient": 0, 841 | "gridPos": { 842 | "h": 8, 843 | "w": 12, 844 | "x": 0, 845 | "y": 24 846 | }, 847 | "hiddenSeries": false, 848 | "id": 4, 849 | "legend": { 850 | "avg": false, 851 | "current": false, 852 | "max": false, 853 | "min": false, 854 | "show": true, 855 | "total": false, 856 | "values": false 857 | }, 858 | "lines": true, 859 | "linewidth": 0, 860 | "nullPointMode": "null", 861 | "options": { 862 | "dataLinks": [] 863 | }, 864 | "percentage": false, 865 | "pluginVersion": "7.0.3", 866 | "pointradius": 2, 867 | "points": false, 868 | "renderer": "flot", 869 | "seriesOverrides": [], 870 | "spaceLength": 10, 871 | "stack": true, 872 | "steppedLine": true, 873 | "targets": [ 874 | { 875 | "expr": "sum(spark_executor_activeTasks{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}) by (worksapce_name, spark_pool_name, name, application_name, application_id)", 876 | "format": "time_series", 877 | "instant": false, 878 | "interval": "", 879 | "legendFormat": "{{name}}", 880 | "refId": "A" 881 | } 882 | ], 883 | "thresholds": [], 884 | "timeFrom": null, 885 | "timeRegions": [], 886 | "timeShift": null, 887 | "title": "Active Tasks per Executor", 888 | "tooltip": { 889 | "shared": false, 890 | "sort": 0, 891 | "value_type": "individual" 892 | }, 893 | "type": "graph", 894 | "xaxis": { 895 | "buckets": null, 896 | "mode": "time", 897 | "name": null, 898 | "show": true, 899 | "values": [] 900 | }, 901 | "yaxes": [ 902 | { 903 | "format": "short", 904 | "label": null, 905 | "logBase": 1, 906 | "max": null, 907 | "min": null, 908 | "show": true 909 | }, 910 | { 911 | "format": "short", 912 | "label": null, 913 | "logBase": 1, 914 | "max": null, 915 | "min": null, 916 | "show": true 917 | } 918 | ], 919 | "yaxis": { 920 | "align": false, 921 | "alignLevel": null 922 | } 923 | }, 924 | { 925 | "aliasColors": {}, 926 | "bars": false, 927 | "dashLength": 10, 928 | "dashes": false, 929 | "datasource": "$Datasource", 930 | "fieldConfig": { 931 | "defaults": { 932 | "custom": {} 933 | }, 934 | "overrides": [] 935 | }, 936 | "fill": 1, 937 | "fillGradient": 0, 938 | "gridPos": { 939 | "h": 8, 940 | "w": 12, 941 | "x": 12, 942 | "y": 24 943 | }, 944 | "hiddenSeries": false, 945 | "id": 11, 946 | "legend": { 947 | "avg": false, 948 | "current": false, 949 | "max": false, 950 | "min": false, 951 | "show": true, 952 | "total": false, 953 | "values": false 954 | }, 955 | "lines": true, 956 | "linewidth": 1, 957 | "nullPointMode": "null", 958 | "options": { 959 | "dataLinks": [] 960 | }, 961 | "percentage": false, 962 | "pointradius": 2, 963 | "points": false, 964 | "renderer": "flot", 965 | "seriesOverrides": [], 966 | "spaceLength": 10, 967 | "stack": false, 968 | "steppedLine": false, 969 | "targets": [ 970 | { 971 | "expr": "sum (\n rate(spark_executor_totalGCTime_seconds_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}[1m])\n) by (workspace_name, spark_pool_name, name, application_name, application_id)", 972 | "interval": "", 973 | "legendFormat": "{{name}}", 974 | "refId": "A" 975 | } 976 | ], 977 | "thresholds": [], 978 | "timeFrom": null, 979 | "timeRegions": [], 980 | "timeShift": null, 981 | "title": "Executor GC time", 982 | "tooltip": { 983 | "shared": false, 984 | "sort": 0, 985 | "value_type": "individual" 986 | }, 987 | "type": "graph", 988 | "xaxis": { 989 | "buckets": null, 990 | "mode": "time", 991 | "name": null, 992 | "show": true, 993 | "values": [] 994 | }, 995 | "yaxes": [ 996 | { 997 | "format": "s", 998 | "label": null, 999 | "logBase": 1, 1000 | "max": null, 1001 | "min": null, 1002 | "show": true 1003 | }, 1004 | { 1005 | "format": "short", 1006 | "label": null, 1007 | "logBase": 1, 1008 | "max": null, 1009 | "min": null, 1010 | "show": true 1011 | } 1012 | ], 1013 | "yaxis": { 1014 | "align": false, 1015 | "alignLevel": null 1016 | } 1017 | }, 1018 | { 1019 | "aliasColors": {}, 1020 | "bars": false, 1021 | "dashLength": 10, 1022 | "dashes": false, 1023 | "datasource": "$Datasource", 1024 | "description": "", 1025 | "fieldConfig": { 1026 | "defaults": { 1027 | "custom": {} 1028 | }, 1029 | "overrides": [] 1030 | }, 1031 | "fill": 1, 1032 | "fillGradient": 0, 1033 | "gridPos": { 1034 | "h": 8, 1035 | "w": 12, 1036 | "x": 0, 1037 | "y": 32 1038 | }, 1039 | "hiddenSeries": false, 1040 | "id": 9, 1041 | "legend": { 1042 | "avg": false, 1043 | "current": false, 1044 | "max": false, 1045 | "min": false, 1046 | "show": true, 1047 | "total": false, 1048 | "values": false 1049 | }, 1050 | "lines": true, 1051 | "linewidth": 1, 1052 | "nullPointMode": "null", 1053 | "options": { 1054 | "dataLinks": [] 1055 | }, 1056 | "percentage": false, 1057 | "pointradius": 2, 1058 | "points": false, 1059 | "renderer": "flot", 1060 | "seriesOverrides": [], 1061 | "spaceLength": 10, 1062 | "stack": false, 1063 | "steppedLine": false, 1064 | "targets": [ 1065 | { 1066 | "expr": "sum(rate(spark_executor_totalShuffleRead_bytes_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}[5m]))\nby (workspace_name, spark_pool_name, name, application_name, application_id)", 1067 | "hide": false, 1068 | "interval": "", 1069 | "legendFormat": "{{name}}", 1070 | "refId": "A" 1071 | } 1072 | ], 1073 | "thresholds": [], 1074 | "timeFrom": null, 1075 | "timeRegions": [], 1076 | "timeShift": null, 1077 | "title": "Shuffle Bytes Read", 1078 | "tooltip": { 1079 | "shared": false, 1080 | "sort": 1, 1081 | "value_type": "individual" 1082 | }, 1083 | "type": "graph", 1084 | "xaxis": { 1085 | "buckets": null, 1086 | "mode": "time", 1087 | "name": null, 1088 | "show": true, 1089 | "values": [] 1090 | }, 1091 | "yaxes": [ 1092 | { 1093 | "format": "bytes", 1094 | "label": null, 1095 | "logBase": 1, 1096 | "max": null, 1097 | "min": null, 1098 | "show": true 1099 | }, 1100 | { 1101 | "format": "short", 1102 | "label": null, 1103 | "logBase": 1, 1104 | "max": null, 1105 | "min": null, 1106 | "show": false 1107 | } 1108 | ], 1109 | "yaxis": { 1110 | "align": false, 1111 | "alignLevel": null 1112 | } 1113 | }, 1114 | { 1115 | "aliasColors": {}, 1116 | "bars": false, 1117 | "dashLength": 10, 1118 | "dashes": false, 1119 | "datasource": "$Datasource", 1120 | "description": "", 1121 | "fieldConfig": { 1122 | "defaults": { 1123 | "custom": {} 1124 | }, 1125 | "overrides": [] 1126 | }, 1127 | "fill": 1, 1128 | "fillGradient": 0, 1129 | "gridPos": { 1130 | "h": 8, 1131 | "w": 12, 1132 | "x": 12, 1133 | "y": 32 1134 | }, 1135 | "hiddenSeries": false, 1136 | "id": 8, 1137 | "legend": { 1138 | "avg": false, 1139 | "current": false, 1140 | "max": false, 1141 | "min": false, 1142 | "show": true, 1143 | "total": false, 1144 | "values": false 1145 | }, 1146 | "lines": true, 1147 | "linewidth": 1, 1148 | "nullPointMode": "null", 1149 | "options": { 1150 | "dataLinks": [] 1151 | }, 1152 | "percentage": false, 1153 | "pointradius": 2, 1154 | "points": false, 1155 | "renderer": "flot", 1156 | "seriesOverrides": [], 1157 | "spaceLength": 10, 1158 | "stack": false, 1159 | "steppedLine": false, 1160 | "targets": [ 1161 | { 1162 | "expr": "sum(\n rate(spark_executor_totalShuffleWrite_bytes_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}[5m])\n) by (workspace_name, spark_pool_name, name, application_name, application_id)", 1163 | "hide": false, 1164 | "interval": "", 1165 | "legendFormat": "{{name}}", 1166 | "refId": "A" 1167 | } 1168 | ], 1169 | "thresholds": [], 1170 | "timeFrom": null, 1171 | "timeRegions": [], 1172 | "timeShift": null, 1173 | "title": "Shuffle Bytes Written", 1174 | "tooltip": { 1175 | "shared": false, 1176 | "sort": 1, 1177 | "value_type": "individual" 1178 | }, 1179 | "type": "graph", 1180 | "xaxis": { 1181 | "buckets": null, 1182 | "mode": "time", 1183 | "name": null, 1184 | "show": true, 1185 | "values": [] 1186 | }, 1187 | "yaxes": [ 1188 | { 1189 | "format": "bytes", 1190 | "label": null, 1191 | "logBase": 1, 1192 | "max": null, 1193 | "min": null, 1194 | "show": true 1195 | }, 1196 | { 1197 | "format": "short", 1198 | "label": null, 1199 | "logBase": 1, 1200 | "max": null, 1201 | "min": null, 1202 | "show": false 1203 | } 1204 | ], 1205 | "yaxis": { 1206 | "align": false, 1207 | "alignLevel": null 1208 | } 1209 | }, 1210 | { 1211 | "aliasColors": {}, 1212 | "bars": false, 1213 | "dashLength": 10, 1214 | "dashes": false, 1215 | "datasource": "$Datasource", 1216 | "fieldConfig": { 1217 | "defaults": { 1218 | "custom": {} 1219 | }, 1220 | "overrides": [] 1221 | }, 1222 | "fill": 1, 1223 | "fillGradient": 0, 1224 | "gridPos": { 1225 | "h": 8, 1226 | "w": 12, 1227 | "x": 0, 1228 | "y": 40 1229 | }, 1230 | "hiddenSeries": false, 1231 | "id": 18, 1232 | "legend": { 1233 | "avg": false, 1234 | "current": false, 1235 | "max": false, 1236 | "min": false, 1237 | "show": true, 1238 | "total": false, 1239 | "values": false 1240 | }, 1241 | "lines": true, 1242 | "linewidth": 1, 1243 | "nullPointMode": "connected", 1244 | "options": { 1245 | "dataLinks": [] 1246 | }, 1247 | "percentage": false, 1248 | "pointradius": 2, 1249 | "points": false, 1250 | "renderer": "flot", 1251 | "seriesOverrides": [], 1252 | "spaceLength": 10, 1253 | "stack": false, 1254 | "steppedLine": false, 1255 | "targets": [ 1256 | { 1257 | "expr": "sum (\r\n rate(spark_executor_totalInputBytes_bytes_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}[5m])\n) by (workspace_name, spark_pool_name, name, application_name, application_id)\r", 1258 | "interval": "", 1259 | "legendFormat": "{{name}}", 1260 | "refId": "A" 1261 | } 1262 | ], 1263 | "thresholds": [], 1264 | "timeFrom": null, 1265 | "timeRegions": [], 1266 | "timeShift": null, 1267 | "title": "Total Input Bytes/Seconds", 1268 | "tooltip": { 1269 | "shared": false, 1270 | "sort": 0, 1271 | "value_type": "individual" 1272 | }, 1273 | "type": "graph", 1274 | "xaxis": { 1275 | "buckets": null, 1276 | "mode": "time", 1277 | "name": null, 1278 | "show": true, 1279 | "values": [] 1280 | }, 1281 | "yaxes": [ 1282 | { 1283 | "format": "Bps", 1284 | "label": null, 1285 | "logBase": 1, 1286 | "max": null, 1287 | "min": null, 1288 | "show": true 1289 | }, 1290 | { 1291 | "format": "short", 1292 | "label": null, 1293 | "logBase": 1, 1294 | "max": null, 1295 | "min": null, 1296 | "show": true 1297 | } 1298 | ], 1299 | "yaxis": { 1300 | "align": false, 1301 | "alignLevel": null 1302 | } 1303 | }, 1304 | { 1305 | "aliasColors": {}, 1306 | "bars": false, 1307 | "dashLength": 10, 1308 | "dashes": false, 1309 | "datasource": "$Datasource", 1310 | "description": "", 1311 | "fieldConfig": { 1312 | "defaults": { 1313 | "custom": {} 1314 | }, 1315 | "overrides": [] 1316 | }, 1317 | "fill": 10, 1318 | "fillGradient": 0, 1319 | "gridPos": { 1320 | "h": 8, 1321 | "w": 12, 1322 | "x": 12, 1323 | "y": 40 1324 | }, 1325 | "hiddenSeries": false, 1326 | "id": 7, 1327 | "legend": { 1328 | "avg": false, 1329 | "current": false, 1330 | "max": false, 1331 | "min": false, 1332 | "show": true, 1333 | "total": false, 1334 | "values": false 1335 | }, 1336 | "lines": true, 1337 | "linewidth": 0, 1338 | "nullPointMode": "null", 1339 | "options": { 1340 | "dataLinks": [] 1341 | }, 1342 | "percentage": false, 1343 | "pointradius": 2, 1344 | "points": false, 1345 | "renderer": "flot", 1346 | "seriesOverrides": [], 1347 | "spaceLength": 10, 1348 | "stack": false, 1349 | "steppedLine": false, 1350 | "targets": [ 1351 | { 1352 | "expr": "sum(\n rate(spark_executor_totalShuffleWrite_bytes_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}[5m]))\nby (workspace_name, spark_pool_name, name, application_name, application_id)", 1353 | "hide": false, 1354 | "interval": "", 1355 | "intervalFactor": 1, 1356 | "legendFormat": "{{name}} Write", 1357 | "refId": "A" 1358 | }, 1359 | { 1360 | "expr": "sum(\n rate(spark_executor_totalShuffleRead_bytes_total{workspace_name=\"$workspace_name\", spark_pool_name=\"$spark_pool_name\"}[5m]))\nby (workspace_name, spark_pool_name, name, application_name, application_id)", 1361 | "interval": "", 1362 | "intervalFactor": 1, 1363 | "legendFormat": "{{name}} Read", 1364 | "refId": "B" 1365 | } 1366 | ], 1367 | "thresholds": [], 1368 | "timeFrom": null, 1369 | "timeRegions": [], 1370 | "timeShift": null, 1371 | "title": "Total Shuffle IO", 1372 | "tooltip": { 1373 | "shared": false, 1374 | "sort": 1, 1375 | "value_type": "individual" 1376 | }, 1377 | "type": "graph", 1378 | "xaxis": { 1379 | "buckets": null, 1380 | "mode": "time", 1381 | "name": null, 1382 | "show": true, 1383 | "values": [] 1384 | }, 1385 | "yaxes": [ 1386 | { 1387 | "format": "bytes", 1388 | "label": null, 1389 | "logBase": 1, 1390 | "max": null, 1391 | "min": null, 1392 | "show": true 1393 | }, 1394 | { 1395 | "format": "short", 1396 | "label": null, 1397 | "logBase": 1, 1398 | "max": null, 1399 | "min": null, 1400 | "show": false 1401 | } 1402 | ], 1403 | "yaxis": { 1404 | "align": false, 1405 | "alignLevel": null 1406 | } 1407 | } 1408 | ], 1409 | "refresh": "30s", 1410 | "schemaVersion": 25, 1411 | "style": "dark", 1412 | "tags": [ 1413 | "Synapse", 1414 | "Spark" 1415 | ], 1416 | "templating": { 1417 | "list": [ 1418 | { 1419 | "current": { 1420 | "selected": false, 1421 | "text": "Prometheus", 1422 | "value": "Prometheus" 1423 | }, 1424 | "hide": 2, 1425 | "includeAll": false, 1426 | "label": null, 1427 | "multi": false, 1428 | "name": "Datasource", 1429 | "options": [], 1430 | "query": "prometheus", 1431 | "queryValue": "", 1432 | "refresh": 1, 1433 | "regex": "", 1434 | "skipUrlSync": false, 1435 | "type": "datasource" 1436 | }, 1437 | { 1438 | "allValue": null, 1439 | "current": { 1440 | "selected": false, 1441 | "text": "", 1442 | "value": "" 1443 | }, 1444 | "datasource": "$Datasource", 1445 | "definition": "label_values(spark_executor_totalDuration_seconds_total{}, workspace_name)", 1446 | "hide": 0, 1447 | "includeAll": false, 1448 | "label": "Workspace", 1449 | "multi": false, 1450 | "name": "workspace_name", 1451 | "options": [], 1452 | "query": "label_values(spark_executor_totalDuration_seconds_total{}, workspace_name)", 1453 | "refresh": 2, 1454 | "regex": "", 1455 | "skipUrlSync": false, 1456 | "sort": 0, 1457 | "tagValuesQuery": "", 1458 | "tags": [], 1459 | "tagsQuery": "", 1460 | "type": "query", 1461 | "useTags": false 1462 | }, 1463 | { 1464 | "allValue": null, 1465 | "current": { 1466 | "selected": false, 1467 | "text": "", 1468 | "value": "" 1469 | }, 1470 | "datasource": "$Datasource", 1471 | "definition": "label_values(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\"}, spark_pool_name)", 1472 | "hide": 0, 1473 | "includeAll": false, 1474 | "label": "Spark pool", 1475 | "multi": false, 1476 | "name": "spark_pool_name", 1477 | "options": [], 1478 | "query": "label_values(spark_executor_totalDuration_seconds_total{workspace_name=\"$workspace_name\"}, spark_pool_name)", 1479 | "refresh": 2, 1480 | "regex": "", 1481 | "skipUrlSync": false, 1482 | "sort": 5, 1483 | "tagValuesQuery": "", 1484 | "tags": [], 1485 | "tagsQuery": "", 1486 | "type": "query", 1487 | "useTags": false 1488 | }, 1489 | { 1490 | "allValue": null, 1491 | "current": { 1492 | "selected": false, 1493 | "text": "", 1494 | "value": "" 1495 | }, 1496 | "datasource": "$Datasource", 1497 | "definition": "label_values(synapse_connector_application_info{workspace_name=\"$workspace_name\"}, resource_group)", 1498 | "hide": 2, 1499 | "includeAll": false, 1500 | "label": null, 1501 | "multi": false, 1502 | "name": "resource_group", 1503 | "options": [], 1504 | "query": "label_values(synapse_connector_application_info{workspace_name=\"$workspace_name\"}, resource_group)", 1505 | "refresh": 2, 1506 | "regex": "", 1507 | "skipUrlSync": false, 1508 | "sort": 0, 1509 | "tagValuesQuery": "", 1510 | "tags": [], 1511 | "tagsQuery": "", 1512 | "type": "query", 1513 | "useTags": false 1514 | }, 1515 | { 1516 | "allValue": null, 1517 | "current": { 1518 | "selected": false, 1519 | "text": "", 1520 | "value": "" 1521 | }, 1522 | "datasource": "$Datasource", 1523 | "definition": "label_values(synapse_connector_application_info{workspace_name=\"$workspace_name\"}, subscription_id)", 1524 | "hide": 2, 1525 | "includeAll": false, 1526 | "label": null, 1527 | "multi": false, 1528 | "name": "subscription_id", 1529 | "options": [], 1530 | "query": "label_values(synapse_connector_application_info{workspace_name=\"$workspace_name\"}, subscription_id)", 1531 | "refresh": 2, 1532 | "regex": "", 1533 | "skipUrlSync": false, 1534 | "sort": 0, 1535 | "tagValuesQuery": "", 1536 | "tags": [], 1537 | "tagsQuery": "", 1538 | "type": "query", 1539 | "useTags": false 1540 | } 1541 | ] 1542 | }, 1543 | "time": { 1544 | "from": "now-1h", 1545 | "to": "now" 1546 | }, 1547 | "timepicker": { 1548 | "refresh_intervals": [ 1549 | "30s", 1550 | "1m", 1551 | "5m", 1552 | "15m", 1553 | "30m", 1554 | "1h", 1555 | "2h", 1556 | "1d" 1557 | ] 1558 | }, 1559 | "timezone": "browser", 1560 | "title": "Synapse Workspace / Sparkpools", 1561 | "uid": "M2V_vnHMk", 1562 | "version": 1 1563 | } -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/grafana_dashboards/Synapse_Workspace_Workspace.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "editable": false, 16 | "gnetId": null, 17 | "graphTooltip": 0, 18 | "iteration": 1599188952473, 19 | "links": [ 20 | { 21 | "$$hashKey": "object:54", 22 | "icon": "external link", 23 | "tags": [], 24 | "targetBlank": true, 25 | "title": "Go to Azure Synapse Studio", 26 | "tooltip": "", 27 | "type": "link", 28 | "url": "https://web.azuresynapse.net/monitoring/sparkapplication?workspace=%2Fsubscriptions%2F${subscription_id}%2FresourceGroups%2F${resource_group}%2Fproviders%2FMicrosoft.Synapse%2Fworkspaces%2F${WORKSPACE}" 29 | } 30 | ], 31 | "panels": [ 32 | { 33 | "collapsed": false, 34 | "datasource": null, 35 | "gridPos": { 36 | "h": 1, 37 | "w": 24, 38 | "x": 0, 39 | "y": 0 40 | }, 41 | "id": 31, 42 | "panels": [], 43 | "title": "Header", 44 | "type": "row" 45 | }, 46 | { 47 | "datasource": "$Datasource", 48 | "fieldConfig": { 49 | "defaults": { 50 | "custom": {}, 51 | "mappings": [], 52 | "thresholds": { 53 | "mode": "absolute", 54 | "steps": [ 55 | { 56 | "color": "green", 57 | "value": null 58 | }, 59 | { 60 | "color": "#EAB839", 61 | "value": 10 62 | }, 63 | { 64 | "color": "red", 65 | "value": 20 66 | } 67 | ] 68 | } 69 | }, 70 | "overrides": [] 71 | }, 72 | "gridPos": { 73 | "h": 5, 74 | "w": 8, 75 | "x": 0, 76 | "y": 1 77 | }, 78 | "id": 39, 79 | "options": { 80 | "colorMode": "value", 81 | "graphMode": "none", 82 | "justifyMode": "auto", 83 | "orientation": "auto", 84 | "reduceOptions": { 85 | "calcs": [ 86 | "last" 87 | ], 88 | "fields": "", 89 | "values": false 90 | } 91 | }, 92 | "pluginVersion": "7.0.3", 93 | "targets": [ 94 | { 95 | "expr": "count(count(synapse_connector_application_info{workspace_name=\"$WORKSPACE\"}) by (application_id)) or vector(0)", 96 | "format": "time_series", 97 | "interval": "", 98 | "legendFormat": "", 99 | "refId": "A" 100 | } 101 | ], 102 | "timeFrom": null, 103 | "timeShift": null, 104 | "title": "Current Application Count", 105 | "type": "stat" 106 | }, 107 | { 108 | "datasource": "$Datasource", 109 | "fieldConfig": { 110 | "defaults": { 111 | "custom": { 112 | "align": null 113 | }, 114 | "decimals": 0, 115 | "mappings": [], 116 | "thresholds": { 117 | "mode": "absolute", 118 | "steps": [ 119 | { 120 | "color": "green", 121 | "value": null 122 | } 123 | ] 124 | }, 125 | "unit": "bytes" 126 | }, 127 | "overrides": [] 128 | }, 129 | "gridPos": { 130 | "h": 5, 131 | "w": 8, 132 | "x": 8, 133 | "y": 1 134 | }, 135 | "id": 24, 136 | "options": { 137 | "colorMode": "value", 138 | "graphMode": "area", 139 | "justifyMode": "auto", 140 | "orientation": "auto", 141 | "reduceOptions": { 142 | "calcs": [ 143 | "lastNotNull" 144 | ], 145 | "fields": "", 146 | "values": false 147 | } 148 | }, 149 | "pluginVersion": "7.0.3", 150 | "targets": [ 151 | { 152 | "expr": "sum(spark_executor_memoryUsed_bytes{workspace_name=\"$WORKSPACE\"})", 153 | "format": "time_series", 154 | "instant": false, 155 | "interval": "", 156 | "legendFormat": "", 157 | "refId": "A" 158 | } 159 | ], 160 | "timeFrom": null, 161 | "timeShift": null, 162 | "title": "Current Total Memory Usage", 163 | "transformations": [], 164 | "type": "stat" 165 | }, 166 | { 167 | "datasource": "$Datasource", 168 | "fieldConfig": { 169 | "defaults": { 170 | "custom": {}, 171 | "mappings": [], 172 | "thresholds": { 173 | "mode": "absolute", 174 | "steps": [ 175 | { 176 | "color": "green", 177 | "value": null 178 | }, 179 | { 180 | "color": "#EAB839", 181 | "value": 100 182 | }, 183 | { 184 | "color": "red", 185 | "value": 160 186 | } 187 | ] 188 | }, 189 | "unit": "none" 190 | }, 191 | "overrides": [] 192 | }, 193 | "gridPos": { 194 | "h": 5, 195 | "w": 8, 196 | "x": 16, 197 | "y": 1 198 | }, 199 | "id": 26, 200 | "options": { 201 | "orientation": "auto", 202 | "reduceOptions": { 203 | "calcs": [ 204 | "max" 205 | ], 206 | "fields": "", 207 | "values": false 208 | }, 209 | "showThresholdLabels": false, 210 | "showThresholdMarkers": true 211 | }, 212 | "pluginVersion": "7.0.3", 213 | "targets": [ 214 | { 215 | "expr": "max(sum(spark_executor_totalCores{workspace_name=\"$WORKSPACE\"}))", 216 | "interval": "", 217 | "legendFormat": "app={{application_name}} executor={{executor_id}}", 218 | "refId": "A" 219 | } 220 | ], 221 | "timeFrom": null, 222 | "timeShift": null, 223 | "title": "Current Total Cores", 224 | "type": "gauge" 225 | }, 226 | { 227 | "collapsed": false, 228 | "datasource": null, 229 | "gridPos": { 230 | "h": 1, 231 | "w": 24, 232 | "x": 0, 233 | "y": 6 234 | }, 235 | "id": 33, 236 | "panels": [], 237 | "title": "Spark Pools", 238 | "type": "row" 239 | }, 240 | { 241 | "datasource": "$Datasource", 242 | "description": "", 243 | "fieldConfig": { 244 | "defaults": { 245 | "custom": { 246 | "align": null, 247 | "displayMode": "auto" 248 | }, 249 | "links": [], 250 | "mappings": [], 251 | "thresholds": { 252 | "mode": "absolute", 253 | "steps": [ 254 | { 255 | "color": "green", 256 | "value": null 257 | }, 258 | { 259 | "color": "red", 260 | "value": 80 261 | } 262 | ] 263 | }, 264 | "unit": "none" 265 | }, 266 | "overrides": [ 267 | { 268 | "matcher": { 269 | "id": "byName", 270 | "options": "Spark pool" 271 | }, 272 | "properties": [ 273 | { 274 | "id": "links", 275 | "value": [ 276 | { 277 | "targetBlank": true, 278 | "title": "Drill into Spark pool", 279 | "url": "/d/M2V_vnHMk/synapse-workspace-sparkpools?var-workspace_name=${WORKSPACE}&var-spark_pool_name=${__data.fields[spark_pool_name]}&${__url_time_range}" 280 | } 281 | ] 282 | } 283 | ] 284 | }, 285 | { 286 | "matcher": { 287 | "id": "byName", 288 | "options": "Link" 289 | }, 290 | "properties": [ 291 | { 292 | "id": "links", 293 | "value": [ 294 | { 295 | "targetBlank": true, 296 | "title": "Go to Synapse Studio", 297 | "url": "https://web.azuresynapse.net/management/apachesparkpools?workspace=%2Fsubscriptions%2F${subscription_id}%2FresourceGroups%2F${resource_group}%2Fproviders%2FMicrosoft.Synapse%2Fworkspaces%2F${WORKSPACE}" 298 | } 299 | ] 300 | } 301 | ] 302 | } 303 | ] 304 | }, 305 | "gridPos": { 306 | "h": 9, 307 | "w": 12, 308 | "x": 0, 309 | "y": 7 310 | }, 311 | "id": 34, 312 | "links": [], 313 | "options": { 314 | "frameIndex": 0, 315 | "showHeader": true, 316 | "sortBy": [] 317 | }, 318 | "pluginVersion": "7.0.3", 319 | "targets": [ 320 | { 321 | "expr": "label_replace(\nmax_over_time(\n max(synapse_connector_application_info{workspace_name=\"$WORKSPACE\"}) by (workspace_name, spark_pool_name)\n [$__range:2m]\r)\n ,\"info\", \"Manage\", \"\", \"\")", 322 | "format": "table", 323 | "instant": true, 324 | "interval": "", 325 | "legendFormat": "", 326 | "refId": "A" 327 | }, 328 | { 329 | "expr": "count(\n max(spark_executor_totalDuration_seconds_total{workspace_name=\"$WORKSPACE\"}) by (spark_pool_name, application_id)\n) by (spark_pool_name)", 330 | "format": "table", 331 | "instant": true, 332 | "interval": "", 333 | "legendFormat": "", 334 | "refId": "B" 335 | }, 336 | { 337 | "expr": "count(synapse_connector_application_info{workspace_name=\"$WORKSPACE\"}) by (spark_pool_name)", 338 | "format": "table", 339 | "instant": true, 340 | "interval": "", 341 | "legendFormat": "", 342 | "refId": "C" 343 | }, 344 | { 345 | "expr": "sum(spark_executor_totalCores{workspace_name=\"$WORKSPACE\"}) by (spark_pool_name)", 346 | "format": "table", 347 | "instant": true, 348 | "interval": "", 349 | "legendFormat": "", 350 | "refId": "D" 351 | } 352 | ], 353 | "timeFrom": null, 354 | "timeShift": null, 355 | "title": "Active Spark pools", 356 | "transformations": [ 357 | { 358 | "id": "seriesToColumns", 359 | "options": { 360 | "byField": "spark_pool_name" 361 | } 362 | }, 363 | { 364 | "id": "organize", 365 | "options": { 366 | "excludeByName": { 367 | "Time": true, 368 | "Value #A": true, 369 | "Value #B": true, 370 | "workspace_name": true 371 | }, 372 | "indexByName": { 373 | "Time": 4, 374 | "Value #A": 6, 375 | "Value #B": 7, 376 | "Value #C": 1, 377 | "Value #D": 2, 378 | "info": 3, 379 | "spark_pool_name": 0, 380 | "workspace_name": 5 381 | }, 382 | "renameByName": { 383 | "Value #C": "Active Application(s)", 384 | "Value #D": "Allocated vCores", 385 | "info": "Link", 386 | "spark_pool_name": "Spark pool" 387 | } 388 | } 389 | } 390 | ], 391 | "type": "table" 392 | }, 393 | { 394 | "aliasColors": {}, 395 | "bars": false, 396 | "dashLength": 10, 397 | "dashes": false, 398 | "datasource": "$Datasource", 399 | "fieldConfig": { 400 | "defaults": { 401 | "custom": {}, 402 | "mappings": [], 403 | "thresholds": { 404 | "mode": "percentage", 405 | "steps": [ 406 | { 407 | "color": "green", 408 | "value": null 409 | }, 410 | { 411 | "color": "#EAB839", 412 | "value": 90 413 | } 414 | ] 415 | }, 416 | "unit": "ms" 417 | }, 418 | "overrides": [] 419 | }, 420 | "fill": 1, 421 | "fillGradient": 0, 422 | "gridPos": { 423 | "h": 9, 424 | "w": 12, 425 | "x": 12, 426 | "y": 7 427 | }, 428 | "hiddenSeries": false, 429 | "id": 41, 430 | "legend": { 431 | "avg": false, 432 | "current": false, 433 | "max": false, 434 | "min": false, 435 | "show": true, 436 | "total": false, 437 | "values": false 438 | }, 439 | "lines": true, 440 | "linewidth": 1, 441 | "nullPointMode": "null", 442 | "options": { 443 | "dataLinks": [] 444 | }, 445 | "percentage": false, 446 | "pluginVersion": "7.0.3", 447 | "pointradius": 2, 448 | "points": false, 449 | "renderer": "flot", 450 | "seriesOverrides": [], 451 | "spaceLength": 10, 452 | "stack": false, 453 | "steppedLine": true, 454 | "targets": [ 455 | { 456 | "expr": "sum(spark_executor_totalCores{workspace_name=\"$WORKSPACE\"}) by (workspace_name)", 457 | "format": "time_series", 458 | "instant": false, 459 | "interval": "", 460 | "intervalFactor": 10, 461 | "legendFormat": "Total Cores", 462 | "refId": "A" 463 | } 464 | ], 465 | "thresholds": [], 466 | "timeFrom": null, 467 | "timeRegions": [], 468 | "timeShift": null, 469 | "title": "Total Running CPU Cores", 470 | "tooltip": { 471 | "shared": true, 472 | "sort": 0, 473 | "value_type": "individual" 474 | }, 475 | "transformations": [ 476 | { 477 | "id": "organize", 478 | "options": { 479 | "excludeByName": { 480 | "Time": false 481 | }, 482 | "indexByName": {}, 483 | "renameByName": { 484 | "Value": "", 485 | "application_id": "", 486 | "{application_id=\"application_1596581849279_0001\"}": "{{$application_id}}" 487 | } 488 | } 489 | } 490 | ], 491 | "type": "graph", 492 | "xaxis": { 493 | "buckets": null, 494 | "mode": "time", 495 | "name": null, 496 | "show": true, 497 | "values": [] 498 | }, 499 | "yaxes": [ 500 | { 501 | "$$hashKey": "object:1361", 502 | "decimals": 0, 503 | "format": "short", 504 | "label": null, 505 | "logBase": 1, 506 | "max": null, 507 | "min": "0", 508 | "show": true 509 | }, 510 | { 511 | "$$hashKey": "object:1362", 512 | "format": "short", 513 | "label": null, 514 | "logBase": 1, 515 | "max": null, 516 | "min": null, 517 | "show": false 518 | } 519 | ], 520 | "yaxis": { 521 | "align": false, 522 | "alignLevel": null 523 | } 524 | }, 525 | { 526 | "datasource": "$Datasource", 527 | "fieldConfig": { 528 | "defaults": { 529 | "custom": { 530 | "align": null 531 | }, 532 | "mappings": [], 533 | "thresholds": { 534 | "mode": "absolute", 535 | "steps": [ 536 | { 537 | "color": "green", 538 | "value": null 539 | }, 540 | { 541 | "color": "red", 542 | "value": 80 543 | } 544 | ] 545 | } 546 | }, 547 | "overrides": [ 548 | { 549 | "matcher": { 550 | "id": "byName", 551 | "options": "provisioning_state" 552 | }, 553 | "properties": [] 554 | } 555 | ] 556 | }, 557 | "gridPos": { 558 | "h": 8, 559 | "w": 24, 560 | "x": 0, 561 | "y": 16 562 | }, 563 | "id": 43, 564 | "options": { 565 | "showHeader": true 566 | }, 567 | "pluginVersion": "7.0.3", 568 | "targets": [ 569 | { 570 | "expr": "synapse_connector_spark_pool_info{workspace_name=\"$WORKSPACE\"}", 571 | "format": "table", 572 | "instant": true, 573 | "interval": "", 574 | "legendFormat": "", 575 | "refId": "A" 576 | } 577 | ], 578 | "timeFrom": null, 579 | "timeShift": null, 580 | "title": "Spark Pools", 581 | "transformations": [ 582 | { 583 | "id": "organize", 584 | "options": { 585 | "excludeByName": { 586 | "Time": true, 587 | "Value": true, 588 | "__name__": true, 589 | "instance": true, 590 | "job": true, 591 | "workspace_name": true 592 | }, 593 | "indexByName": { 594 | "Time": 0, 595 | "Value": 14, 596 | "__name__": 1, 597 | "auto_scale_enabled": 13, 598 | "instance": 2, 599 | "job": 3, 600 | "location": 6, 601 | "node_count": 7, 602 | "node_cpu_cores": 9, 603 | "node_memory_size": 10, 604 | "node_size": 8, 605 | "provisioning_state": 12, 606 | "spark_pool_name": 5, 607 | "spark_version": 11, 608 | "workspace_name": 4 609 | }, 610 | "renameByName": { 611 | "auto_scale_enabled": "AutoScale", 612 | "instance": "", 613 | "job": "", 614 | "location": "Location", 615 | "node_count": "Node Count", 616 | "node_cpu_cores": "CPU", 617 | "node_memory_size": "Memory", 618 | "node_size": "Node Size", 619 | "provisioning_state": "Provisioning State", 620 | "spark_pool_name": "Spark pool", 621 | "spark_version": "Spark Version" 622 | } 623 | } 624 | }, 625 | { 626 | "id": "seriesToColumns", 627 | "options": { 628 | "byField": "Spark pool" 629 | } 630 | } 631 | ], 632 | "type": "table" 633 | } 634 | ], 635 | "refresh": "1m", 636 | "schemaVersion": 25, 637 | "style": "dark", 638 | "tags": [ 639 | "Synapse", 640 | "Spark" 641 | ], 642 | "templating": { 643 | "list": [ 644 | { 645 | "current": { 646 | "selected": false, 647 | "text": "Prometheus", 648 | "value": "Prometheus" 649 | }, 650 | "hide": 2, 651 | "includeAll": false, 652 | "label": null, 653 | "multi": false, 654 | "name": "Datasource", 655 | "options": [], 656 | "query": "prometheus", 657 | "queryValue": "", 658 | "refresh": 1, 659 | "regex": "", 660 | "skipUrlSync": false, 661 | "type": "datasource" 662 | }, 663 | { 664 | "allValue": null, 665 | "current": { 666 | "selected": false, 667 | "text": "", 668 | "value": "" 669 | }, 670 | "datasource": "$Datasource", 671 | "definition": "label_values(synapse_connector_spark_pool_info, workspace_name)", 672 | "hide": 0, 673 | "includeAll": false, 674 | "label": "Workspace", 675 | "multi": false, 676 | "name": "WORKSPACE", 677 | "options": [], 678 | "query": "label_values(synapse_connector_spark_pool_info, workspace_name)", 679 | "refresh": 2, 680 | "regex": "", 681 | "skipUrlSync": false, 682 | "sort": 0, 683 | "tagValuesQuery": "", 684 | "tags": [], 685 | "tagsQuery": "", 686 | "type": "query", 687 | "useTags": false 688 | }, 689 | { 690 | "allValue": null, 691 | "current": { 692 | "isNone": true, 693 | "selected": false, 694 | "text": "None", 695 | "value": "" 696 | }, 697 | "datasource": "$Datasource", 698 | "definition": "label_values(synapse_connector_application_info{workspace_name=\"$WORKSPACE\"}, subscription_id)", 699 | "hide": 2, 700 | "includeAll": false, 701 | "label": null, 702 | "multi": false, 703 | "name": "subscription_id", 704 | "options": [], 705 | "query": "label_values(synapse_connector_application_info{workspace_name=\"$WORKSPACE\"}, subscription_id)", 706 | "refresh": 2, 707 | "regex": "", 708 | "skipUrlSync": false, 709 | "sort": 0, 710 | "tagValuesQuery": "", 711 | "tags": [], 712 | "tagsQuery": "", 713 | "type": "query", 714 | "useTags": false 715 | }, 716 | { 717 | "allValue": null, 718 | "current": { 719 | "isNone": true, 720 | "selected": false, 721 | "text": "None", 722 | "value": "" 723 | }, 724 | "datasource": "$Datasource", 725 | "definition": "label_values(synapse_connector_application_info{workspace_name=\"$WORKSPACE\"}, resource_group)", 726 | "hide": 2, 727 | "includeAll": false, 728 | "label": null, 729 | "multi": false, 730 | "name": "resource_group", 731 | "options": [], 732 | "query": "label_values(synapse_connector_application_info{workspace_name=\"$WORKSPACE\"}, resource_group)", 733 | "refresh": 2, 734 | "regex": "", 735 | "skipUrlSync": false, 736 | "sort": 0, 737 | "tagValuesQuery": "", 738 | "tags": [], 739 | "tagsQuery": "", 740 | "type": "query", 741 | "useTags": false 742 | } 743 | ] 744 | }, 745 | "time": { 746 | "from": "now-30m", 747 | "to": "now" 748 | }, 749 | "timepicker": { 750 | "refresh_intervals": [ 751 | "30s", 752 | "1m", 753 | "5m", 754 | "15m", 755 | "30m", 756 | "1h", 757 | "2h", 758 | "1d" 759 | ] 760 | }, 761 | "timezone": "browser", 762 | "title": "Synapse Workspace / Workspace", 763 | "uid": "Ip-vTJIGz", 764 | "version": 1 765 | } -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/templates/NOTES.txt: -------------------------------------------------------------------------------- 1 | 1. The Synapse Prometheus Operator has been installed. Check its status by running: 2 | kubectl --namespace {{ template "synapse-prometheus-operator.namespace" . }} get pods 3 | 4 | 2. Get your '{{ .Values.grafana.adminUser }}' user password by running: 5 | kubectl get secret --namespace {{ template "synapse-prometheus-operator.namespace" . }} {{ include "call-nested" (list . "kube-prometheus-stack.grafana" "grafana.fullname") }} -o jsonpath="{.data.admin-password}" | base64 --decode ; echo 6 | 7 | 3. Get the Grafana Internal/External address: 8 | kubectl get svc --namespace {{ template "synapse-prometheus-operator.namespace" . }} {{ include "call-nested" (list . "kube-prometheus-stack.grafana" "grafana.fullname") }} 9 | -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "synapse-prometheus-operator.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "synapse-prometheus-operator.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | 28 | {{/* 29 | NOTE: This utility template is needed until https://git.io/JvuGN is resolved. 30 | 31 | Call a template from the context of a subchart. 32 | 33 | Usage: 34 | {{ include "call-nested" (list . "" "") }} 35 | */}} 36 | {{- define "call-nested" }} 37 | {{- $dot := index . 0 }} 38 | {{- $subchart := index . 1 | splitList "." }} 39 | {{- $template := index . 2 }} 40 | {{- $values := $dot.Values }} 41 | {{- range $subchart }} 42 | {{- $values = index $values . }} 43 | {{- end }} 44 | {{- include $template (dict "Chart" (dict "Name" (last $subchart)) "Values" $values "Release" $dot.Release "Capabilities" $dot.Capabilities) }} 45 | {{- end }} 46 | 47 | {{/* 48 | Create chart name and version as used by the chart label. 49 | */}} 50 | {{- define "synapse-prometheus-operator.chart" -}} 51 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 52 | {{- end -}} 53 | 54 | {{/* 55 | Common labels 56 | */}} 57 | {{- define "synapse-prometheus-operator.labels" -}} 58 | app.kubernetes.io/name: {{ include "synapse-prometheus-operator.name" . }} 59 | helm.sh/chart: {{ include "synapse-prometheus-operator.chart" . }} 60 | app.kubernetes.io/instance: {{ .Release.Name }} 61 | {{- if .Chart.AppVersion }} 62 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 63 | {{- end }} 64 | app.kubernetes.io/managed-by: {{ .Release.Service }} 65 | {{- end -}} 66 | 67 | {{/* 68 | Create the name of the service account to use 69 | */}} 70 | {{- define "synapse-prometheus-operator.serviceAccountName" -}} 71 | {{- if .Values.serviceAccount.create -}} 72 | {{ default (include "synapse-prometheus-operator.fullname" .) .Values.serviceAccount.name }} 73 | {{- else -}} 74 | {{ default "default" .Values.serviceAccount.name }} 75 | {{- end -}} 76 | {{- end -}} 77 | 78 | 79 | {{- define "synapse-prometheus-operator.namespace" -}} 80 | {{- if .Values.namespaceOverride -}} 81 | {{- .Values.namespaceOverride -}} 82 | {{- else -}} 83 | {{- .Release.Namespace -}} 84 | {{- end -}} 85 | {{- end -}} -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/templates/configmap-grafana-dashboards.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.synapse.defaultDashboards.enabled }} 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | namespace: {{ template "synapse-prometheus-operator.namespace" . }} 6 | name: {{ printf "%s-%s" (include "synapse-prometheus-operator.fullname" $) "synapse-dashboards" | trunc 63 | trimSuffix "-" }} 7 | annotations: 8 | {{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} 9 | labels: 10 | {{- if $.Values.grafana.sidecar.dashboards.label }} 11 | {{ $.Values.grafana.sidecar.dashboards.label }}: "1" 12 | {{- end }} 13 | {{ include "synapse-prometheus-operator.labels" . | indent 4 }} 14 | data: 15 | {{ (.Files.Glob "grafana_dashboards/*").AsConfig | indent 2 }} 16 | {{- end }} -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/templates/secret-discovery-config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: synapse-application-discovery-config 5 | namespace: {{ template "synapse-prometheus-operator.namespace" . }} 6 | labels: 7 | {{ include "synapse-prometheus-operator.labels" . | indent 4 }} 8 | stringData: 9 | synapse_scrape_config.yaml: |- 10 | {{- if .Values.synapse.workspaces }} 11 | {{- range .Values.synapse.workspaces }} 12 | - job_name: synapse-workspace-{{ .workspace_name }} 13 | bearer_token_file: /app/synapse_connector/output/workspace/{{ .workspace_name }}/bearer_token 14 | file_sd_configs: 15 | - files: 16 | - /app/synapse_connector/output/workspace/{{ .workspace_name }}/application_discovery.json 17 | refresh_interval: 10s 18 | metric_relabel_configs: 19 | - source_labels: [ __name__ ] 20 | target_label: __name__ 21 | regex: metrics_application_[0-9]+_[0-9]+_(.+) 22 | replacement: spark_$1 23 | - source_labels: [ __name__ ] 24 | target_label: __name__ 25 | regex: metrics_(.+) 26 | replacement: spark_$1 27 | {{- end }} 28 | {{- end }} 29 | - job_name: synapse-prometheus-connector 30 | static_configs: 31 | - labels: 32 | __metrics_path__: /metrics 33 | __scheme__: http 34 | targets: 35 | - localhost:8000 -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/templates/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: synapse-connector-config 5 | namespace: {{ template "synapse-prometheus-operator.namespace" . }} 6 | labels: 7 | {{ include "synapse-prometheus-operator.labels" . | indent 4 }} 8 | stringData: 9 | config.yaml: |- 10 | {{ toYaml .Values.synapse | indent 4 }} 11 | -------------------------------------------------------------------------------- /helm/synapse-prometheus-operator/values.yaml: -------------------------------------------------------------------------------- 1 | # Default values for synapse-prometheus-operator. 2 | # This is a YAML-formatted file. 3 | # Declare variables to be passed into your templates. 4 | 5 | ## Provide a name in place of synapse-prometheus-operator for `app:` labels 6 | ## 7 | nameOverride: "" 8 | 9 | ## Provide a name to substitute for the full names of resources 10 | ## 11 | fullnameOverride: "" 12 | 13 | ## Provide Synapse workspaces config 14 | ## 15 | synapse: 16 | workspaces: [] 17 | # - workspace_name: 18 | # tenant_id: 19 | # service_principal_name: 20 | # service_principal_password: "" 21 | 22 | defaultDashboards: 23 | enabled: true 24 | 25 | ## Prometheus Operator 26 | ## 27 | # https://github.com/helm/charts/blob/master/stable/prometheus-operator/values.yaml 28 | kube-prometheus-stack: 29 | nameOverride: "" 30 | fullnameOverride: "" 31 | 32 | # Disable master node monitoring in managed AKS cluster. 33 | defaultRules: 34 | rules: 35 | etcd: false 36 | kubeScheduler: false 37 | kubeApiServer: 38 | enabled: false 39 | 40 | prometheus: 41 | enabled: true 42 | service: 43 | type: ClusterIP 44 | 45 | prometheusSpec: 46 | replicas: 1 47 | retention: 15d 48 | 49 | # resources: {} 50 | 51 | ## Prometheus StorageSpec for persistent data 52 | ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/user-guides/storage.md 53 | ## 54 | storageSpec: 55 | volumeClaimTemplate: 56 | spec: 57 | storageClassName: default 58 | accessModes: ["ReadWriteOnce"] 59 | resources: 60 | requests: 61 | storage: 512Gi 62 | 63 | ## Synapse Connector 64 | volumes: 65 | - name: synapse-connector-out 66 | emptyDir: {} 67 | - name: azurecli-config 68 | emptyDir: {} 69 | - name: synapse-connector-config 70 | secret: 71 | secretName: synapse-connector-config 72 | - name: synapse-connector-tmp 73 | emptyDir: {} 74 | volumeMounts: 75 | - name: synapse-connector-out 76 | mountPath: "/app/synapse_connector/output" 77 | 78 | containers: 79 | - name: synapse-prometheus-connector 80 | image: mcr.microsoft.com/azuresynapse/synapse-prometheus-connector:0.0.25 81 | volumeMounts: 82 | - name: synapse-connector-out 83 | mountPath: "/app/synapse_connector/output" 84 | - name: azurecli-config 85 | mountPath: "/app/azurecli" 86 | - name: synapse-connector-config 87 | mountPath: "/app/synapse_connector/config" 88 | - name: synapse-connector-tmp 89 | mountPath: "/tmp" 90 | ports: 91 | - containerPort: 8000 92 | name: http 93 | env: 94 | - name: POD_NAMESPACE 95 | valueFrom: 96 | fieldRef: 97 | fieldPath: metadata.namespace 98 | 99 | additionalScrapeConfigsSecret: 100 | enabled: true 101 | name: synapse-application-discovery-config 102 | key: synapse_scrape_config.yaml 103 | 104 | 105 | # https://github.com/helm/charts/blob/master/stable/grafana/values.yaml 106 | grafana: 107 | enabled: true 108 | adminUser: admin 109 | adminPassword: "" 110 | 111 | service: 112 | type: LoadBalancer 113 | 114 | persistence: 115 | enabled: true 116 | type: pvc 117 | storageClassName: default 118 | accessModes: 119 | - ReadWriteOnce 120 | size: 10Gi 121 | -------------------------------------------------------------------------------- /synapse-prometheus-connector/.dockerignore: -------------------------------------------------------------------------------- 1 | */.vscode/ 2 | */__pycache__/ 3 | src/output/ 4 | src/tests/ 5 | src/config/config.yaml 6 | -------------------------------------------------------------------------------- /synapse-prometheus-connector/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | 4 | FROM python:3.7.9-alpine as base 5 | 6 | WORKDIR /app/synapse_connector 7 | ENV AZURE_CONFIG_DIR /app/azurecli 8 | 9 | RUN python -m pip install \ 10 | requests \ 11 | python-dateutil \ 12 | attrs \ 13 | cattrs \ 14 | PyYAML \ 15 | prometheus_client \ 16 | timestring 17 | 18 | COPY src . 19 | 20 | ENTRYPOINT ["python", "main.py"] -------------------------------------------------------------------------------- /synapse-prometheus-connector/src/access_token.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Copyright (c) Microsoft Corporation. 4 | # Licensed under the MIT License. 5 | 6 | import requests 7 | 8 | 9 | def get_access_token(service_principal_name, service_principal_password, tenant_id, resource_uri): 10 | url = "https://login.microsoftonline.com/{tenant_id}/oauth2/token".format(tenant_id=tenant_id) 11 | payload = { 12 | 'grant_type': 'client_credentials', 13 | 'client_id': service_principal_name, 14 | 'client_secret': service_principal_password, 15 | 'resource': resource_uri, 16 | } 17 | response = requests.post(url, data=payload) 18 | response.raise_for_status() 19 | bearer_token = response.json()['access_token'] 20 | return bearer_token 21 | -------------------------------------------------------------------------------- /synapse-prometheus-connector/src/config.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Copyright (c) Microsoft Corporation. 4 | # Licensed under the MIT License. 5 | 6 | import enum 7 | import typing 8 | 9 | import attr 10 | import cattr 11 | import yaml 12 | from attr import attrib, attrs 13 | 14 | 15 | @attrs(auto_attribs=True) 16 | class SynapseConnectorWorkspaceConfig(object): 17 | tenant_id: str = attrib() 18 | service_principal_name: str = attrib() 19 | service_principal_password: str = attrib() 20 | workspace_name: str = attrib() 21 | subscription_id: str = attrib(default='') 22 | resource_group: str = attrib(default='') 23 | synapse_host_suffix: str = attrib(default='dev.azuresynapse.net') 24 | synapse_api_version: str = attrib(default='2019-11-01-preview') 25 | resource_uri: str = attrib(default='https://dev.azuresynapse.net') 26 | service_discovery_output_folder: str = attrib(default='output/') 27 | token_refresh_interval_sec: int = attrib(default=1800) 28 | spark_application_discovery_interval_sec: int = attrib(default=10) 29 | azure_management_resource_uri: str = attrib(default='https://management.azure.com') 30 | enable_spark_pools_metadata_metrics: bool = attrib(default=True) 31 | 32 | def synapse_host(self): 33 | return '{workspace_name}.{suffix}'.format(workspace_name=self.workspace_name, suffix=self.synapse_host_suffix) 34 | 35 | 36 | @attrs(auto_attribs=True) 37 | class SynapseConnectorConfig(object): 38 | workspaces: typing.List[SynapseConnectorWorkspaceConfig] = attrib() 39 | spark_application_discovery_config_secret_name: str = attrib(default='synapse-application-discovery-config') 40 | 41 | 42 | def read_config(filename) -> SynapseConnectorConfig: 43 | with open(filename, 'r', encoding='utf-8') as f: 44 | data = yaml.safe_load(f) 45 | return cattr.structure(data, SynapseConnectorConfig) 46 | -------------------------------------------------------------------------------- /synapse-prometheus-connector/src/config/config.example.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | workspaces: 4 | - workspace_name: 5 | synapse_host_suffix: "dev.azuresynapse.net" 6 | tenant_id: 7 | service_principal_name: 8 | service_principal_password: "" 9 | synapse_api_version: 2019-11-01-preview 10 | resource_uri: https://dev.azuresynapse.net 11 | token_refresh_interval_sec: 1800 12 | spark_application_discovery_interval_sec: 10 -------------------------------------------------------------------------------- /synapse-prometheus-connector/src/main.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Copyright (c) Microsoft Corporation. 4 | # Licensed under the MIT License. 5 | 6 | import json 7 | import os 8 | import signal 9 | import time 10 | import traceback 11 | 12 | import requests 13 | 14 | import access_token 15 | import config 16 | import metrics 17 | import model 18 | import spark_pools 19 | 20 | 21 | def write_string_to_path(path, filename, content): 22 | _path = os.path.join(path, filename) 23 | os.makedirs(path, exist_ok=True) 24 | with open(_path, 'w', encoding='utf-8') as f: 25 | f.write(content) 26 | 27 | 28 | def generate_spark_application_scrape_configs(application_list, workspace_name, synapse_host, api_version): 29 | livy_path_template = f'/livyApi/versions/{api_version}' + '/sparkpools/{spark_pool_name}/sessions/{livy_id}/applications/{application_id}' 30 | metrics_paths = [ 31 | '/metrics/executors/prometheus', 32 | ] 33 | static_configs = [] 34 | for app in application_list: 35 | livy_path = livy_path_template.format(spark_pool_name=app.spark_pool_name, livy_id=app.livy_id, application_id=app.spark_application_id) 36 | for metrics_path in metrics_paths: 37 | static_configs.append(model.PrometheusStaticConfig( 38 | targets=[ synapse_host ], 39 | labels={ 40 | 'synapse_api_version': str(api_version), 41 | 'workspace_name': str(workspace_name), 42 | 'spark_pool_name': str(app.spark_pool_name), 43 | 'livy_id': str(app.livy_id), 44 | 'application_id': str(app.spark_application_id), 45 | 'name': str(app.name), 46 | '__metrics_path__': str(livy_path + metrics_path), 47 | '__param_format': 'html', 48 | '__scheme__': 'https', 49 | } 50 | )) 51 | return static_configs 52 | 53 | 54 | def get_spark_applications(synapse_host, synapse_api_version, bearer_token): 55 | path = '/monitoring/workloadTypes/spark/applications' 56 | url = f'https://{synapse_host}{path}' 57 | params = { 58 | 'api-version': synapse_api_version, 59 | 'skip': 0, 60 | 'filter': "(state eq 'submitting') or (state eq 'inprogress')", 61 | } 62 | headers = { 63 | 'Authorization': f'Bearer {bearer_token}' 64 | } 65 | response = requests.get(url, params=params, headers=headers, timeout=15) 66 | if response.status_code == 200: 67 | apps_info = response.json() 68 | applications = apps_info.get('sparkJobs') 69 | application_list = [] 70 | if applications: 71 | for _app in applications: 72 | app = model.spark_application_from_dict(_app) 73 | if not app.spark_application_id: 74 | continue 75 | application_list.append(app) 76 | return application_list 77 | print(response.json()) 78 | response.raise_for_status() 79 | 80 | 81 | def token_refresh_by_workspace(workspace_config, workspace_context): 82 | if not workspace_context or time.time() - workspace_context.get('token_refresh_time', 0) >= workspace_config.token_refresh_interval_sec: 83 | metrics.token_refresh_count.labels(workspace_name=workspace_config.workspace_name).inc() 84 | try: 85 | print('refreshing token...') 86 | bearer_token = access_token.get_access_token( 87 | workspace_config.service_principal_name, 88 | workspace_config.service_principal_password, 89 | workspace_config.tenant_id, 90 | workspace_config.resource_uri) 91 | workspace_context['token_refresh_time'] = int(time.time()) 92 | workspace_context['bearer_token'] = bearer_token 93 | print('token refreshed.') 94 | metrics.token_refresh_last_time.labels(workspace_name=workspace_config.workspace_name).set(int(time.time())) 95 | except: 96 | metrics.token_refresh_failed_count.labels(workspace_name=workspace_config.workspace_name).inc() 97 | traceback.print_exc() 98 | 99 | 100 | def spark_application_discovery_by_workspace(workspace_config, workspace_context): 101 | if time.time() - workspace_context.get('application_discovery_time', 0) >= workspace_config.spark_application_discovery_interval_sec: 102 | metrics.application_discovery_count.labels(workspace_name=workspace_config.workspace_name).inc() 103 | try: 104 | print('spark application discovery...') 105 | bearer_token = workspace_context.get('bearer_token') 106 | if not bearer_token: 107 | return 108 | synapse_host = workspace_config.synapse_host() 109 | synapse_api_version = workspace_config.synapse_api_version 110 | workspace_name = workspace_config.workspace_name 111 | with metrics.application_discovery_duration_histogram.labels(workspace_name).time(): 112 | application_list = get_spark_applications(synapse_host, synapse_api_version, bearer_token) 113 | workspace_scrape_configs = generate_spark_application_scrape_configs(application_list, workspace_name, synapse_host, synapse_api_version) 114 | 115 | if workspace_config.service_discovery_output_folder: 116 | folder = os.path.join(workspace_config.service_discovery_output_folder, f'workspace/{workspace_name}/') 117 | write_string_to_path(folder, 'bearer_token', bearer_token) 118 | write_string_to_path(folder, 'application_discovery.json', model.to_json(workspace_scrape_configs)) 119 | 120 | workspace_context['workspace_scrape_configs'] = workspace_scrape_configs 121 | workspace_context['application_list'] = application_list 122 | workspace_context['application_discovery_time'] = int(time.time()) 123 | print(f'spark application discovery, found targets: {len(application_list)}.') 124 | 125 | # spark pool metrics 126 | spark_pool_applications = {} 127 | for app in application_list: 128 | spark_pool_applications.setdefault(app.spark_pool_name, 0) 129 | spark_pool_applications[app.spark_pool_name] += 1 130 | print(f'{app.spark_pool_name}/sessions/{app.livy_id}/applications/{app.spark_application_id}\tstate:{app.state}') 131 | 132 | for spark_pool_name, application_count in spark_pool_applications.items(): 133 | metrics.application_discovery_target.labels(workspace_name=workspace_name, spark_pool_name=spark_pool_name).set(application_count) 134 | 135 | # spark application metrics 136 | metrics.application_info._metrics = {} 137 | metrics.application_submit_time._metrics = {} 138 | metrics.application_queue_duration._metrics = {} 139 | metrics.application_running_duration._metrics = {} 140 | for app in application_list: 141 | app_base_labels = dict(workspace_name=workspace_name, spark_pool_name=app.spark_pool_name, name=app.name, 142 | application_id=app.spark_application_id, livy_id=app.livy_id) 143 | metrics.application_info.labels(subscription_id=workspace_config.subscription_id, 144 | resource_group=workspace_config.resource_group, 145 | tenant_id=workspace_config.tenant_id, 146 | **app_base_labels).set(1) 147 | metrics.application_submit_time.labels(**app_base_labels).set(app.submit_time_seconds) 148 | metrics.application_queue_duration.labels(**app_base_labels).set(app.queued_duration_seconds) 149 | metrics.application_running_duration.labels(**app_base_labels).set(app.running_duration_seconds) 150 | except: 151 | metrics.application_discovery_failed_count.labels(workspace_name=workspace_config.workspace_name).inc() 152 | traceback.print_exc() 153 | 154 | 155 | def spark_pool_metrics_by_workspace(workspace_config, workspace_context): 156 | if not workspace_config.enable_spark_pools_metadata_metrics: 157 | return 158 | 159 | if not workspace_context or time.time() - workspace_context.get('spark_pool_metrics_token_refresh_time', 0) >= workspace_config.token_refresh_interval_sec: 160 | try: 161 | print('refreshing token for spark pool metrics...') 162 | bearer_token = access_token.get_access_token( 163 | workspace_config.service_principal_name, 164 | workspace_config.service_principal_password, 165 | workspace_config.tenant_id, 166 | workspace_config.azure_management_resource_uri) 167 | workspace_context['spark_pool_metrics_token_refresh_time'] = int(time.time()) 168 | workspace_context['spark_pool_metrics_bearer_token'] = bearer_token 169 | print('token refreshed for spark pool metrics.') 170 | except: 171 | traceback.print_exc() 172 | 173 | bearer_token = workspace_context.get('spark_pool_metrics_bearer_token') 174 | if bearer_token and time.time() - workspace_context.get('spark_pool_metrics_time', 0) >= 300: 175 | workspace_context['spark_pool_metrics_time'] = int(time.time()) 176 | try: 177 | spark_pools_info = spark_pools.get_spark_pools( 178 | workspace_config.subscription_id, 179 | workspace_config.resource_group, 180 | workspace_config.workspace_name, 181 | bearer_token) 182 | for sp in spark_pools_info: 183 | metrics.spark_pool_info.labels( 184 | workspace_name=workspace_config.workspace_name, 185 | spark_pool_name=sp.name, 186 | location=sp.location, 187 | spark_version=sp.spark_version, 188 | node_count=str(sp.node_count), 189 | node_size=str(sp.node_size), 190 | provisioning_state=str(sp.provisioning_state), 191 | auto_scale_enabled=str(sp.auto_scale_enabled), 192 | node_cpu_cores=str(sp.node_cpu_cores), 193 | node_memory_size=str(sp.node_memory_size), 194 | ).set(1) 195 | except: 196 | traceback.print_exc() 197 | 198 | 199 | class GracefulShutdown: 200 | shutdown = False 201 | def __init__(self): 202 | signal.signal(signal.SIGINT, self.set_shutdown) 203 | signal.signal(signal.SIGTERM, self.set_shutdown) 204 | 205 | def set_shutdown(self, signum, frame): 206 | self.shutdown = True 207 | 208 | 209 | def main(): 210 | graceful_shutdown = GracefulShutdown() 211 | 212 | cfg = config.read_config(filename='config/config.yaml') 213 | print('started, config loaded.') 214 | 215 | workspace_contexts = {} 216 | global_context = {} 217 | 218 | while not graceful_shutdown.shutdown: 219 | for workspace_config in cfg.workspaces: 220 | workspace_context = workspace_contexts.setdefault(workspace_config.workspace_name, {}) 221 | try: 222 | token_refresh_by_workspace(workspace_config, workspace_context) 223 | spark_application_discovery_by_workspace(workspace_config, workspace_context) 224 | spark_pool_metrics_by_workspace(workspace_config, workspace_context) 225 | except: 226 | traceback.print_exc() 227 | time.sleep(1) 228 | 229 | 230 | if __name__ == "__main__": 231 | main() 232 | -------------------------------------------------------------------------------- /synapse-prometheus-connector/src/metrics.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Copyright (c) Microsoft Corporation. 4 | # Licensed under the MIT License. 5 | 6 | from prometheus_client import Counter, Gauge, Histogram, Summary, Info 7 | from prometheus_client import start_http_server 8 | 9 | 10 | application_base_labelnames = ('workspace_name', 'spark_pool_name', 'name', 'application_id', 'livy_id') 11 | application_extra_labelnames = ('tenant_id', 'subscription_id', 'resource_group') 12 | 13 | 14 | # metrics by workspace, application 15 | application_info = Gauge( 16 | 'synapse_connector_application_info', 17 | 'synapse_connector_application_info', 18 | labelnames=application_base_labelnames + application_extra_labelnames) 19 | 20 | application_submit_time = Gauge( 21 | 'synapse_connector_application_submit_time', 22 | 'synapse_connector_application_submit_time', 23 | labelnames=application_base_labelnames) 24 | 25 | application_queue_duration = Gauge( 26 | 'synapse_connector_application_queue_duration', 27 | 'synapse_connector_application_queue_duration', 28 | labelnames=application_base_labelnames) 29 | 30 | application_running_duration = Gauge( 31 | 'synapse_connector_application_running_duration', 32 | 'synapse_connector_application_running_duration', 33 | labelnames=application_base_labelnames) 34 | 35 | # metrics workspace_name 36 | token_refresh_last_time = Gauge( 37 | 'synapse_connector_token_refresh_last_time', 38 | 'synapse_connector_token_refresh_last_time', 39 | labelnames=('workspace_name',)) 40 | 41 | token_refresh_count = Gauge( 42 | 'synapse_connector_token_refresh_count', 43 | 'synapse_connector_token_refresh_count', 44 | labelnames=('workspace_name',)) 45 | 46 | token_refresh_failed_count = Gauge( 47 | 'synapse_connector_token_refresh_failed_count', 48 | 'synapse_connector_token_refresh_failed_count', 49 | labelnames=('workspace_name',)) 50 | 51 | application_discovery_target = Gauge( 52 | 'synapse_connector_application_discovery_target', 53 | 'synapse_connector_application_discovery_target', 54 | labelnames=('workspace_name', 'spark_pool_name')) 55 | 56 | application_discovery_count = Gauge( 57 | 'synapse_connector_application_discovery_count', 58 | 'synapse_connector_application_discovery_count', 59 | labelnames=('workspace_name',)) 60 | 61 | application_discovery_failed_count = Gauge( 62 | 'synapse_connector_application_discovery_failed_count', 63 | 'synapse_connector_application_discovery_failed_count', 64 | labelnames=('workspace_name',)) 65 | 66 | application_discovery_duration_histogram = Histogram( 67 | 'synapse_connector_application_discovery_duration', 68 | 'synapse_connector_application_discovery_duration', 69 | labelnames=('workspace_name',), 70 | buckets=(.1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, 15.0, 20.0, 30.0, float('INF'))) 71 | 72 | 73 | # metrics spark pools 74 | spark_pool_base_labelnames = ('workspace_name', 'spark_pool_name') 75 | spark_pool_info_labelnames = ('location', 'spark_version', 'node_count', 'node_size', 'provisioning_state', 'auto_scale_enabled', 'node_cpu_cores', 'node_memory_size') 76 | spark_pool_info = Gauge( 77 | 'synapse_connector_spark_pool_info', 78 | 'synapse_connector_spark_pool_info', 79 | labelnames=spark_pool_base_labelnames + spark_pool_info_labelnames) 80 | 81 | 82 | start_http_server(8000) -------------------------------------------------------------------------------- /synapse-prometheus-connector/src/model.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Copyright (c) Microsoft Corporation. 4 | # Licensed under the MIT License. 5 | 6 | import json 7 | import time 8 | import typing 9 | from typing import Any, List, Optional 10 | 11 | import attr 12 | import cattr 13 | import timestring 14 | import yaml 15 | from attr import attrib, attrs 16 | 17 | 18 | @attrs(auto_attribs=True) 19 | class SparkApplication(object): 20 | state: Optional[str] 21 | name: Optional[str] 22 | submitter: Optional[str] 23 | compute: Optional[str] 24 | sparkPoolName: Optional[str] 25 | sparkApplicationId: Optional[str] 26 | livyId: Optional[str] 27 | timing: List[Any] 28 | jobType: Optional[str] 29 | submitTime: Optional[str] 30 | endTime: Optional[str] 31 | queuedDuration: Optional[str] 32 | runningDuration: Optional[str] 33 | totalDuration: Optional[str] 34 | _queued_duration_seconds: Optional[int] = attrib(default=None) 35 | _running_duration_seconds: Optional[int] = attrib(default=None) 36 | _total_duration_seconds: Optional[int] = attrib(default=None) 37 | 38 | @property 39 | def spark_pool_name(self): 40 | return self.sparkPoolName 41 | 42 | @property 43 | def spark_application_id(self): 44 | return self.sparkApplicationId 45 | 46 | @property 47 | def livy_id(self): 48 | return self.livyId 49 | 50 | @property 51 | def job_type(self): 52 | return self.jobType 53 | 54 | @property 55 | def submit_time(self): 56 | return self.submitTime 57 | 58 | @property 59 | def submit_time_seconds(self): 60 | return int(time.mktime(timestring.Date(self.submitTime).date.timetuple())) 61 | 62 | @property 63 | def end_time(self): 64 | return self.endTime 65 | 66 | @property 67 | def end_time_seconds(self): 68 | if self.end_time: 69 | return int(time.mktime(timestring.Date(self.endTime).date.timetuple())) 70 | return 0 71 | 72 | @property 73 | def queued_duration_seconds(self): 74 | if self._queued_duration_seconds is None: 75 | self._queued_duration_seconds = self._convert_to_seconds(self.queuedDuration) 76 | return self._queued_duration_seconds 77 | 78 | @property 79 | def running_duration_seconds(self): 80 | if self._running_duration_seconds is None: 81 | self._running_duration_seconds = self._convert_to_seconds(self.runningDuration) 82 | return self._running_duration_seconds 83 | 84 | @property 85 | def total_duration_seconds(self): 86 | if self._total_duration_seconds is None: 87 | self._total_duration_seconds = self._convert_to_seconds(self.totalDuration) 88 | return self._total_duration_seconds 89 | 90 | def _convert_to_seconds(self, s): 91 | return sum(map(lambda x: len(timestring.Range(x)), s.split(' '))) 92 | 93 | 94 | def spark_application_from_dict(d): 95 | obj = cattr.structure(d, SparkApplication) 96 | return obj 97 | 98 | 99 | @attrs(auto_attribs=True) 100 | class PrometheusStaticConfig(object): 101 | targets: typing.List[str] = attrib() 102 | labels: dict = attrib() 103 | 104 | 105 | @attrs(auto_attribs=True) 106 | class PrometheusFileSdConfig(object): 107 | refresh_interval: str = attrib(default='10s') 108 | files: typing.List[str] = attrib(factory=list) 109 | 110 | 111 | @attrs(auto_attribs=True) 112 | class SynapseScrapeConfig(object): 113 | job_name: str = attrib() 114 | bearer_token: str = attrib(default=None) 115 | static_configs: typing.List[PrometheusStaticConfig] = attrib(default=None) 116 | file_sd_configs: typing.List[PrometheusFileSdConfig] = attrib(default=None) 117 | 118 | 119 | @attrs(auto_attribs=True) 120 | class SynapseScrapeConfigs(object): 121 | configs: typing.List[SynapseScrapeConfig] = attrib(factory=list) 122 | 123 | def to_yaml(self): 124 | return to_yaml(self.configs) 125 | 126 | def to_dict(self): 127 | return to_dict(self.configs) 128 | 129 | 130 | def to_yaml(obj): 131 | return yaml.safe_dump(cattr.unstructure(obj)) 132 | 133 | def to_dict(obj): 134 | return cattr.unstructure(obj) 135 | 136 | def to_json(obj): 137 | return json.dumps(to_dict(obj), indent=2) 138 | -------------------------------------------------------------------------------- /synapse-prometheus-connector/src/requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.24.0 2 | python-dateutil==2.8.1 3 | attrs==19.3.0 4 | cattrs==1.0.0 5 | PyYAML==5.4 6 | prometheus-client==0.8.0 7 | timestring==1.6.4 -------------------------------------------------------------------------------- /synapse-prometheus-connector/src/spark_pools.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | # Copyright (c) Microsoft Corporation. 4 | # Licensed under the MIT License. 5 | 6 | from typing import Any, List, Optional 7 | 8 | import requests 9 | from attr import attrs 10 | 11 | _node_size_mapping = { 12 | 'Small': (4, 32), 13 | 'Medium': (8, 64), 14 | 'Large': (16, 128), 15 | 'XLarge': (32, 256), 16 | 'XXLarge': (64, 432), 17 | } 18 | 19 | @attrs(auto_attribs=True) 20 | class SparkPool(object): 21 | name: str 22 | location: Optional[str] 23 | spark_version: Optional[str] 24 | node_count: Optional[int] 25 | node_size: Optional[str] 26 | provisioning_state: Optional[str] 27 | auto_scale_enabled: bool 28 | 29 | @property 30 | def node_cpu_cores(self): 31 | if self.node_size and self.node_size in _node_size_mapping: 32 | return _node_size_mapping[self.node_size][0] 33 | return 0 34 | 35 | @property 36 | def node_memory_size(self): 37 | if self.node_size and self.node_size in _node_size_mapping: 38 | return _node_size_mapping[self.node_size][1] 39 | return 0 40 | 41 | 42 | def get_spark_pools(subscription_id, resource_group, workspace_name, bearer_token): 43 | assert subscription_id 44 | assert resource_group 45 | assert workspace_name 46 | api_version = '2019-06-01-preview' 47 | url = f'https://management.azure.com/subscriptions/{subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Synapse/workspaces/{workspace_name}/bigDataPools' 48 | params = { 49 | 'api-version': api_version 50 | } 51 | headers = { 52 | 'Authorization': f'Bearer {bearer_token}' 53 | } 54 | response = requests.get(url, params=params, headers=headers, timeout=15) 55 | response.raise_for_status() 56 | spark_pools_obj = response.json() 57 | value = spark_pools_obj.get('value') 58 | result = [] 59 | if value: 60 | for item in value: 61 | result.append(_convert_spark_pool_result(item)) 62 | return result 63 | 64 | 65 | def _convert_spark_pool_result(spark_pool_obj): 66 | properties = spark_pool_obj.get('properties', {}) 67 | auto_scale = properties.get('autoScale') 68 | return SparkPool( 69 | name=spark_pool_obj.get('name'), 70 | location=spark_pool_obj.get('location'), 71 | spark_version=properties.get('sparkVersion'), 72 | node_count=properties.get('nodeCount'), 73 | node_size=properties.get('nodeSize'), 74 | provisioning_state=properties.get('provisioningState'), 75 | auto_scale_enabled=bool(auto_scale.get('enabled')) if auto_scale else False 76 | ) 77 | --------------------------------------------------------------------------------