├── .dockerignore ├── .gitignore ├── .pre-commit-config.yaml ├── .vscode └── settings.json ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── alerts ├── vmware.rules └── vmware.rules.yml ├── dashboards └── esx.json ├── requirements.txt ├── setup.py ├── systemd └── vmware_exporter.service └── vmware_exporter ├── __init__.py ├── threader.py └── vmware_exporter.py /.dockerignore: -------------------------------------------------------------------------------- 1 | config.yml 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # Project 92 | config*.yml 93 | *.swp 94 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v1.3.0 4 | hooks: 5 | # Git state 6 | - id: check-merge-conflict 7 | stages: [commit] 8 | - id: check-added-large-files 9 | stages: [commit] 10 | # Sensitive information 11 | - id: detect-private-key 12 | stages: [commit] 13 | - id: detect-aws-credentials 14 | stages: [commit] 15 | # Generic file state 16 | - id: trailing-whitespace 17 | stages: [commit] 18 | - id: mixed-line-ending 19 | stages: [commit] 20 | - id: end-of-file-fixer 21 | stages: [commit] 22 | exclude: .*\.tfvars$ # terraform fmt separates everything with blank lines leaving a trailing line at the end 23 | - id: check-executables-have-shebangs 24 | stages: [commit] 25 | # Language syntax/formatting 26 | - id: check-yaml 27 | stages: [commit] 28 | - id: check-json 29 | stages: [commit] 30 | - id: pretty-format-json 31 | stages: [commit] 32 | args: 33 | - --autofix 34 | - id: flake8 35 | stages: [commit] 36 | - repo: https://github.com/Lucas-C/pre-commit-hooks-nodejs 37 | rev: v1.1.0 38 | hooks: 39 | - id: dockerfile_lint 40 | stages: [commit] 41 | - repo: https://github.com/mattlqx/pre-commit-sign 42 | rev: v1.1.1 43 | hooks: 44 | - id: sign-commit 45 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.tabSize": 4 3 | } 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2.7-alpine 2 | 3 | LABEL MAINTAINER Daniel Pryor 4 | LABEL NAME vmware_exporter 5 | LABEL VERSION 0.20 6 | 7 | WORKDIR /opt/vmware_exporter/ 8 | 9 | COPY . /opt/vmware_exporter/ 10 | 11 | RUN set -x; buildDeps="gcc python-dev musl-dev libffi-dev openssl openssl-dev" \ 12 | && apk add --no-cache --update $buildDeps \ 13 | && pip install -r requirements.txt \ 14 | && apk del $buildDeps 15 | 16 | EXPOSE 9272 17 | 18 | CMD ["/opt/vmware_exporter/vmware_exporter/vmware_exporter.py"] 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Remi Verchere 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include requirements.txt 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # vmware_exporter 2 | VMWare VCenter Exporter for Prometheus. 3 | 4 | Get VMWare VCenter information: 5 | - Current number of active snapshots 6 | - Snapshot Unix timestamp creation date 7 | - Datastore size and other stuff 8 | - Basic VM and Host metrics 9 | 10 | ## Archive 11 | 12 | Please note this repository is no longer maintained. 13 | 14 | Please go to https://github.com/pryorda/vmware_exporter for new comments. 15 | 16 | ## Usage 17 | 18 | - install with `$ python setup.py install` or `$ pip install vmware_exporter` (Installing from pip will install an old version. This is likely something I wont persue) 19 | - Create a `config.yml` file based on the configuration section. Some variables can be passed in as environment variables 20 | - Run `$ vmware_exporter -c /path/to/your/config` 21 | - Go to http://localhost:9272/metrics?vsphere_host=vcenter.company.com to see metrics 22 | 23 | Alternatively, if you don't wish to install the package, run using `$ vmware_exporter/vmware_exporter.py` or you can use the following docker command: 24 | 25 | ``` 26 | docker run -it --rm -p 9272:9272 -e VSPHERE_USER=${VSPHERE_USERNAME} -e VSPHERE_PASSWORD=${VSPHERE_PASSWORD} -e VSPHERE_HOST=${VSPHERE_HOST} -e VSPHERE_IGNORE_SSL=True --name vmware_exporter pryorda/vmware_exporter 27 | ``` 28 | 29 | ### Configuration amd limiting data collection 30 | 31 | You do not need to provide a configuration file unless you are not going to use Environment variables. If you do plan to use a configuration file be sure to override the container entrypoint or add -c config.yml to the command args. 32 | 33 | If you want to limit the scope of the metrics gather you can update the subsystem under `collect_only` in the config section, e.g. under `default`, or by using the environment variables: 34 | 35 | collect_only: 36 | vms: False 37 | datastores: True 38 | hosts: True 39 | 40 | This would only connect datastores and hosts. 41 | 42 | You can have multiple sections for different hosts and the configuration would look like: 43 | ``` 44 | default: 45 | vsphere_host: "vcenter" 46 | vsphere_user: "user" 47 | vsphere_password: "password" 48 | ignore_ssl: False 49 | collect_only: 50 | vms: True 51 | datastores: True 52 | hosts: True 53 | 54 | esx: 55 | vsphere_host: vc.example2.com 56 | vsphere_user: 'root' 57 | vsphere_password: 'password' 58 | ignore_ssl: True 59 | collect_only: 60 | vms: False 61 | datastores: False 62 | hosts: True 63 | 64 | limited: 65 | vsphere_host: slowvc.example.com 66 | vsphere_user: 'administrator@vsphere.local' 67 | vsphere_password: 'password' 68 | ignore_ssl: True 69 | collect_only: 70 | vms: False 71 | datastores: True 72 | hosts: False 73 | ``` 74 | Switching sections can be done by adding ?section=limited to the url. 75 | 76 | #### Environment Variables 77 | | Varible | Precedence | Defaults | Description | 78 | | ---------------------------- | ---------------------- | -------- | --------------------------------------- | 79 | | `VSPHERE_HOST` | config, env, get_param | n/a | vsphere server to connect to | 80 | | `VSPHERE_USER` | config, env | n/a | User for connecting to vsphere | 81 | | `VSPHERE_PASSWORD` | config, env | n/a | Password for connecting to vsphere | 82 | | `VSPHERE_IGNORE_SSL` | config, env | False | Ignore the ssl cert on the connection to vsphere host | 83 | | `VSPHERE_COLLECT_HOSTS` | config, env | True | Set to false to disable collect of hosts | 84 | | `VSPHERE_COLLECT_DATASTORES` | config, env | True | Set to false to disable collect of datastores | 85 | | `VSPHERE_COLLECT_VMS` | config, env | True | Set to false to disable collect of virtual machines | 86 | 87 | ### Prometheus configuration 88 | 89 | You can use the following parameters in prometheus configuration file. The `params` section is used to manage multiple login/passwords. 90 | 91 | ``` 92 | - job_name: 'vmware_vcenter' 93 | metrics_path: '/metrics' 94 | static_configs: 95 | - targets: 96 | - 'vcenter.company.com 97 | relabel_configs: 98 | - source_labels: [__address__] 99 | target_label: __param_target 100 | - source_labels: [__param_target] 101 | target_label: instance 102 | - target_label: __address__ 103 | replacement: localhost:9272 104 | 105 | - job_name: 'vmware_esx' 106 | metrics_path: '/metrics' 107 | file_sd_configs: 108 | - files: 109 | - /etc/prometheus/esx.yml 110 | params: 111 | section: [esx] 112 | relabel_configs: 113 | - source_labels: [__address__] 114 | target_label: __param_target 115 | - source_labels: [__param_target] 116 | target_label: instance 117 | - target_label: __address__ 118 | replacement: localhost:9272 119 | ``` 120 | 121 | ## Current Status 122 | 123 | - VCenter and ESXi 6 and 6.5 have been tested. 124 | - VM information, Snapshot, Host and Datastore basic information is exported, i.e: 125 | ``` 126 | # HELP vmware_snapshots VMWare current number of existing snapshots 127 | # TYPE vmware_snapshot_count gauge 128 | vmware_snapshot_timestamp_seconds{vm_name="My Super Virtual Machine"} 2.0 129 | # HELP vmware_snapshot_timestamp_seconds VMWare Snapshot creation time in seconds 130 | # TYPE vmware_snapshot_timestamp_seconds gauge 131 | vmware_snapshot_age{vm_name="My Super Virtual Machine",vm_snapshot_name="Very old snaphot"} 1478146956.96092 132 | vmware_snapshot_age{vm_name="My Super Virtual Machine",vm_snapshot_name="Old snapshot"} 1478470046.975632 133 | 134 | # HELP vmware_datastore_capacity_size VMWare Datasore capacity in bytes 135 | # TYPE vmware_datastore_capacity_size gauge 136 | vmware_datastore_capacity_size{ds_name="ESX1-LOCAL"} 67377299456.0 137 | # HELP vmware_datastore_freespace_size VMWare Datastore freespace in bytes 138 | # TYPE vmware_datastore_freespace_size gauge 139 | vmware_datastore_freespace_size{ds_name="ESX1-LOCAL"} 66349694976.0 140 | # HELP vmware_datastore_uncommited_size VMWare Datastore uncommitted in bytes 141 | # TYPE vmware_datastore_uncommited_size gauge 142 | vmware_datastore_uncommited_size{ds_name="ESX1-LOCAL"} 0.0 143 | # HELP vmware_datastore_provisoned_size VMWare Datastore provisoned in bytes 144 | # TYPE vmware_datastore_provisoned_size gauge 145 | vmware_datastore_provisoned_size{ds_name="ESX1-LOCAL"} 1027604480.0 146 | # HELP vmware_datastore_hosts VMWare Hosts number using this datastore 147 | # TYPE vmware_datastore_hosts gauge 148 | vmware_datastore_hosts{ds_name="ESX1-LOCAL"} 1.0 149 | # HELP vmware_datastore_vms VMWare Virtual Machines number using this datastore 150 | # TYPE vmware_datastore_vms gauge 151 | vmware_datastore_vms{ds_name="ESX1-LOCAL"} 0.0 152 | 153 | # HELP vmware_host_power_state VMWare Host Power state (On / Off) 154 | # TYPE vmware_host_power_state gauge 155 | vmware_host_power_state{host_name="esx1.company.com"} 1.0 156 | # HELP vmware_host_cpu_usage VMWare Host CPU usage in Mhz 157 | # TYPE vmware_host_cpu_usage gauge 158 | vmware_host_cpu_usage{host_name="esx1.company.com"} 2959.0 159 | # HELP vmware_host_cpu_max VMWare Host CPU max availability in Mhz 160 | # TYPE vmware_host_cpu_max gauge 161 | vmware_host_cpu_max{host_name="esx1.company.com"} 28728.0 162 | # HELP vmware_host_memory_usage VMWare Host Memory usage in Mbytes 163 | # TYPE vmware_host_memory_usage gauge 164 | vmware_host_memory_usage{host_name="esx1.company.com"} 107164.0 165 | # HELP vmware_host_memory_max VMWare Host Memory Max availability in Mbytes 166 | # TYPE vmware_host_memory_max gauge 167 | vmware_host_memory_max{host_name="esx1.company.com"} 131059.01953125 168 | ``` 169 | 170 | ## References 171 | 172 | The VMWare exporter uses theses libraries: 173 | - [pyVmomi](https://github.com/vmware/pyvmomi) for VMWare connection 174 | - Prometheus [client_python](https://github.com/prometheus/client_python) for Prometheus supervision 175 | - [Twisted](http://twistedmatrix.com/trac/) for http server 176 | 177 | The initial code is mainly inspired from: 178 | - https://www.robustperception.io/writing-a-jenkins-exporter-in-python/ 179 | - https://github.com/vmware/pyvmomi-community-samples 180 | - https://github.com/jbidinger/pyvmomi-tools 181 | 182 | ## Maintainer 183 | 184 | Daniel Pryor [pryorda](https://github.com/pryorda) 185 | 186 | ## License 187 | 188 | See LICENSE file 189 | -------------------------------------------------------------------------------- /alerts/vmware.rules: -------------------------------------------------------------------------------- 1 | ALERT VMWarnMemoryUsage 2 | IF 3 | ((vmware_vm_mem_usage_average / 100) >= 90) and ((vmware_vm_mem_usage_average / 100) < 95) 4 | FOR 30m 5 | LABELS { severity = "warning" } 6 | ANNOTATIONS { 7 | title = "High memory usage on {{ $labels.instance }}: {{ $value | printf \"%.2f\" }}%", 8 | } 9 | 10 | ALERT VMCritMemoryUsage 11 | IF 12 | ((vmware_vm_mem_usage_average / 100) >= 95) 13 | FOR 5m 14 | LABELS { severity = "critical" } 15 | ANNOTATIONS { 16 | title = "Very High memory usage on {{ $labels.instance }}: {{ $value | printf \"%.2f\" }}%", 17 | } 18 | 19 | ALERT VMWarnNumberSnapshots 20 | IF 21 | (vmware_vm_snapshots < 3) 22 | FOR 30m 23 | LABELS { severity = "warning" } 24 | ANNOTATIONS { 25 | title = "High snapshots number on {{ $labels.instance }}: {{ $value }}", 26 | } 27 | 28 | ALERT VMCritNumberSnapshots 29 | IF 30 | (vmware_vm_snapshots >= 3) 31 | FOR 30m 32 | LABELS { severity = "critical" } 33 | ANNOTATIONS { 34 | title = "Very high snapshot number on {{ $labels.instance }}: {{ $value }}", 35 | } 36 | 37 | ALERT VMWarnAgeSnapshots 38 | IF 39 | ((time() - vmware_vm_snapshot_timestamp_seconds) / (60*60*24) >= 7) 40 | FOR 5m 41 | LABELS { severity = "warning" } 42 | ANNOTATIONS { 43 | title = "Outdated snapshot on {{ $labels.instance }}: {{ $value | printf \"%.0f\" }} days", 44 | } -------------------------------------------------------------------------------- /alerts/vmware.rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: vmware.rules 3 | rules: 4 | - alert: VMWarnMemoryUsage 5 | expr: ((vmware_vm_mem_usage_average / 100) >= 90) and ((vmware_vm_mem_usage_average 6 | / 100) < 95) 7 | for: 30m 8 | labels: 9 | severity: warning 10 | annotations: 11 | title: 'High memory usage on {{ $labels.instance }}: {{ $value | printf "%.2f" 12 | }}%' 13 | - alert: VMCritMemoryUsage 14 | expr: ((vmware_vm_mem_usage_average / 100) >= 95) 15 | for: 5m 16 | labels: 17 | severity: critical 18 | annotations: 19 | title: 'Very High memory usage on {{ $labels.instance }}: {{ $value | printf 20 | "%.2f" }}%' 21 | - alert: VMWarnNumberSnapshots 22 | expr: (vmware_vm_snapshots < 3) 23 | for: 30m 24 | labels: 25 | severity: warning 26 | annotations: 27 | title: 'High snapshots number on {{ $labels.instance }}: {{ $value }}' 28 | - alert: VMCritNumberSnapshots 29 | expr: (vmware_vm_snapshots >= 3) 30 | for: 30m 31 | labels: 32 | severity: critical 33 | annotations: 34 | title: 'Very high snapshot number on {{ $labels.instance }}: {{ $value }}' 35 | - alert: VMWarnAgeSnapshots 36 | expr: ((time() - vmware_vm_snapshot_timestamp_seconds) / (60 * 60 * 24) >= 7) 37 | for: 5m 38 | labels: 39 | severity: warning 40 | annotations: 41 | title: 'Outdated snapshot on {{ $labels.instance }}: {{ $value | printf "%.0f" 42 | }} days' 43 | -------------------------------------------------------------------------------- /dashboards/esx.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_PROMETHEUS", 5 | "label": "prometheus", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "prometheus", 9 | "pluginName": "Prometheus" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "4.3.0" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "prometheus", 28 | "name": "Prometheus", 29 | "version": "1.0.0" 30 | }, 31 | { 32 | "type": "panel", 33 | "id": "singlestat", 34 | "name": "Singlestat", 35 | "version": "" 36 | } 37 | ], 38 | "annotations": { 39 | "list": [] 40 | }, 41 | "editable": true, 42 | "gnetId": null, 43 | "graphTooltip": 0, 44 | "hideControls": false, 45 | "id": null, 46 | "links": [], 47 | "refresh": "10s", 48 | "rows": [ 49 | { 50 | "collapse": false, 51 | "height": 250, 52 | "panels": [ 53 | { 54 | "cacheTimeout": null, 55 | "colorBackground": false, 56 | "colorValue": false, 57 | "colors": [ 58 | "rgba(245, 54, 54, 0.9)", 59 | "rgba(237, 129, 40, 0.89)", 60 | "rgba(50, 172, 45, 0.97)" 61 | ], 62 | "datasource": "${DS_PROMETHEUS}", 63 | "decimals": 1, 64 | "description": "System uptime", 65 | "format": "s", 66 | "gauge": { 67 | "maxValue": 100, 68 | "minValue": 0, 69 | "show": false, 70 | "thresholdLabels": false, 71 | "thresholdMarkers": true 72 | }, 73 | "id": 3, 74 | "interval": null, 75 | "links": [], 76 | "mappingType": 1, 77 | "mappingTypes": [ 78 | { 79 | "name": "value to text", 80 | "value": 1 81 | }, 82 | { 83 | "name": "range to text", 84 | "value": 2 85 | } 86 | ], 87 | "maxDataPoints": 100, 88 | "minSpan": 1, 89 | "nullPointMode": "connected", 90 | "nullText": null, 91 | "postfix": "s", 92 | "postfixFontSize": "50%", 93 | "prefix": "", 94 | "prefixFontSize": "50%", 95 | "rangeMaps": [ 96 | { 97 | "from": "null", 98 | "text": "N/A", 99 | "to": "null" 100 | } 101 | ], 102 | "span": 3, 103 | "sparkline": { 104 | "fillColor": "rgba(31, 118, 189, 0.18)", 105 | "full": false, 106 | "lineColor": "rgb(31, 120, 193)", 107 | "show": false 108 | }, 109 | "tableColumn": "", 110 | "targets": [ 111 | { 112 | "expr": "time() - vmware_host_boot_timestamp_seconds{host_name=\"$hostname\"}", 113 | "format": "time_series", 114 | "intervalFactor": 2, 115 | "legendFormat": "", 116 | "refId": "A", 117 | "step": 4 118 | } 119 | ], 120 | "thresholds": "", 121 | "title": "Uptime", 122 | "type": "singlestat", 123 | "valueFontSize": "50%", 124 | "valueMaps": [ 125 | { 126 | "op": "=", 127 | "text": "N/A", 128 | "value": "null" 129 | } 130 | ], 131 | "valueName": "current" 132 | }, 133 | { 134 | "cacheTimeout": null, 135 | "colorBackground": false, 136 | "colorValue": true, 137 | "colors": [ 138 | "rgba(50, 172, 45, 0.97)", 139 | "rgba(237, 129, 40, 0.89)", 140 | "rgba(245, 54, 54, 0.9)" 141 | ], 142 | "datasource": "${DS_PROMETHEUS}", 143 | "format": "percent", 144 | "gauge": { 145 | "maxValue": 100, 146 | "minValue": 0, 147 | "show": true, 148 | "thresholdLabels": false, 149 | "thresholdMarkers": true 150 | }, 151 | "id": 4, 152 | "interval": null, 153 | "links": [], 154 | "mappingType": 1, 155 | "mappingTypes": [ 156 | { 157 | "name": "value to text", 158 | "value": 1 159 | }, 160 | { 161 | "name": "range to text", 162 | "value": 2 163 | } 164 | ], 165 | "maxDataPoints": 100, 166 | "minSpan": 1, 167 | "nullPointMode": "connected", 168 | "nullText": null, 169 | "postfix": "", 170 | "postfixFontSize": "50%", 171 | "prefix": "", 172 | "prefixFontSize": "50%", 173 | "rangeMaps": [ 174 | { 175 | "from": "null", 176 | "text": "N/A", 177 | "to": "null" 178 | } 179 | ], 180 | "repeat": null, 181 | "span": 3, 182 | "sparkline": { 183 | "fillColor": "rgba(31, 118, 189, 0.18)", 184 | "full": false, 185 | "lineColor": "rgb(31, 120, 193)", 186 | "show": true 187 | }, 188 | "tableColumn": "", 189 | "targets": [ 190 | { 191 | "expr": "vmware_host_cpu_usage{host_name=\"$hostname\"} / vmware_host_cpu_max{host_name=\"$hostname\"} * 100", 192 | "format": "time_series", 193 | "intervalFactor": 1, 194 | "legendFormat": "", 195 | "refId": "A", 196 | "step": 2 197 | } 198 | ], 199 | "thresholds": "80,90", 200 | "title": "CPU Usage", 201 | "type": "singlestat", 202 | "valueFontSize": "80%", 203 | "valueMaps": [ 204 | { 205 | "op": "=", 206 | "text": "N/A", 207 | "value": "null" 208 | } 209 | ], 210 | "valueName": "current" 211 | }, 212 | { 213 | "cacheTimeout": null, 214 | "colorBackground": false, 215 | "colorValue": true, 216 | "colors": [ 217 | "rgba(50, 172, 45, 0.97)", 218 | "rgba(237, 129, 40, 0.89)", 219 | "rgba(245, 54, 54, 0.9)" 220 | ], 221 | "datasource": "${DS_PROMETHEUS}", 222 | "format": "percent", 223 | "gauge": { 224 | "maxValue": 100, 225 | "minValue": 0, 226 | "show": true, 227 | "thresholdLabels": false, 228 | "thresholdMarkers": true 229 | }, 230 | "id": 5, 231 | "interval": null, 232 | "links": [], 233 | "mappingType": 1, 234 | "mappingTypes": [ 235 | { 236 | "name": "value to text", 237 | "value": 1 238 | }, 239 | { 240 | "name": "range to text", 241 | "value": 2 242 | } 243 | ], 244 | "maxDataPoints": 100, 245 | "minSpan": 1, 246 | "nullPointMode": "connected", 247 | "nullText": null, 248 | "postfix": "", 249 | "postfixFontSize": "50%", 250 | "prefix": "", 251 | "prefixFontSize": "50%", 252 | "rangeMaps": [ 253 | { 254 | "from": "null", 255 | "text": "N/A", 256 | "to": "null" 257 | } 258 | ], 259 | "span": 3, 260 | "sparkline": { 261 | "fillColor": "rgba(31, 118, 189, 0.18)", 262 | "full": false, 263 | "lineColor": "rgb(31, 120, 193)", 264 | "show": true 265 | }, 266 | "tableColumn": "", 267 | "targets": [ 268 | { 269 | "expr": "vmware_host_memory_usage{host_name=\"$hostname\"} / vmware_host_memory_max{host_name=\"$hostname\"} * 100", 270 | "format": "time_series", 271 | "intervalFactor": 1, 272 | "legendFormat": "", 273 | "refId": "A", 274 | "step": 2 275 | } 276 | ], 277 | "thresholds": "80,90", 278 | "title": "Memory Usage", 279 | "type": "singlestat", 280 | "valueFontSize": "80%", 281 | "valueMaps": [ 282 | { 283 | "op": "=", 284 | "text": "N/A", 285 | "value": "null" 286 | } 287 | ], 288 | "valueName": "current" 289 | }, 290 | { 291 | "cacheTimeout": null, 292 | "colorBackground": true, 293 | "colorValue": false, 294 | "colors": [ 295 | "rgba(245, 54, 54, 0.9)", 296 | "rgba(237, 129, 40, 0.89)", 297 | "rgba(50, 172, 45, 0.97)" 298 | ], 299 | "datasource": "${DS_PROMETHEUS}", 300 | "format": "none", 301 | "gauge": { 302 | "maxValue": 100, 303 | "minValue": 0, 304 | "show": false, 305 | "thresholdLabels": false, 306 | "thresholdMarkers": true 307 | }, 308 | "id": 6, 309 | "interval": null, 310 | "links": [], 311 | "mappingType": 1, 312 | "mappingTypes": [ 313 | { 314 | "name": "value to text", 315 | "value": 1 316 | }, 317 | { 318 | "name": "range to text", 319 | "value": 2 320 | } 321 | ], 322 | "maxDataPoints": 100, 323 | "nullPointMode": "connected", 324 | "nullText": null, 325 | "postfix": "", 326 | "postfixFontSize": "50%", 327 | "prefix": "", 328 | "prefixFontSize": "50%", 329 | "rangeMaps": [ 330 | { 331 | "from": "null", 332 | "text": "N/A", 333 | "to": "null" 334 | } 335 | ], 336 | "span": 3, 337 | "sparkline": { 338 | "fillColor": "rgba(31, 118, 189, 0.18)", 339 | "full": false, 340 | "lineColor": "rgb(31, 120, 193)", 341 | "show": false 342 | }, 343 | "tableColumn": "", 344 | "targets": [ 345 | { 346 | "expr": "vmware_host_power_state{host_name=\"$hostname\"}", 347 | "format": "time_series", 348 | "intervalFactor": 2, 349 | "refId": "A", 350 | "step": 4 351 | } 352 | ], 353 | "thresholds": "1,1", 354 | "title": "Host State", 355 | "type": "singlestat", 356 | "valueFontSize": "80%", 357 | "valueMaps": [ 358 | { 359 | "op": "=", 360 | "text": "UP", 361 | "value": "1" 362 | }, 363 | { 364 | "op": "=", 365 | "text": "DOWN", 366 | "value": "0" 367 | } 368 | ], 369 | "valueName": "current" 370 | } 371 | ], 372 | "repeat": null, 373 | "repeatIteration": null, 374 | "repeatRowId": null, 375 | "showTitle": false, 376 | "title": "Dashboard Row", 377 | "titleSize": "h6" 378 | }, 379 | { 380 | "collapse": false, 381 | "height": 398, 382 | "panels": [ 383 | { 384 | "aliasColors": {}, 385 | "bars": false, 386 | "dashLength": 10, 387 | "dashes": false, 388 | "datasource": "${DS_PROMETHEUS}", 389 | "decimals": 1, 390 | "fill": 0, 391 | "id": 1, 392 | "legend": { 393 | "alignAsTable": true, 394 | "avg": false, 395 | "current": true, 396 | "hideEmpty": false, 397 | "hideZero": false, 398 | "max": false, 399 | "min": false, 400 | "rightSide": true, 401 | "show": false, 402 | "total": false, 403 | "values": true 404 | }, 405 | "lines": true, 406 | "linewidth": 1, 407 | "links": [], 408 | "minSpan": 2, 409 | "nullPointMode": "null", 410 | "percentage": true, 411 | "pointradius": 5, 412 | "points": false, 413 | "renderer": "flot", 414 | "seriesOverrides": [], 415 | "spaceLength": 10, 416 | "span": 6, 417 | "stack": false, 418 | "steppedLine": false, 419 | "targets": [ 420 | { 421 | "expr": "vmware_host_cpu_usage{host_name=\"$hostname\"} / vmware_host_cpu_max{host_name=\"$hostname\"} * 100", 422 | "format": "time_series", 423 | "intervalFactor": 2, 424 | "legendFormat": "$hostname", 425 | "metric": "", 426 | "refId": "A", 427 | "step": 2 428 | } 429 | ], 430 | "thresholds": [], 431 | "timeFrom": null, 432 | "timeShift": null, 433 | "title": "ESX Host CPU Usage", 434 | "tooltip": { 435 | "shared": true, 436 | "sort": 0, 437 | "value_type": "individual" 438 | }, 439 | "type": "graph", 440 | "xaxis": { 441 | "buckets": null, 442 | "mode": "time", 443 | "name": null, 444 | "show": true, 445 | "values": [] 446 | }, 447 | "yaxes": [ 448 | { 449 | "format": "percent", 450 | "label": "", 451 | "logBase": 1, 452 | "max": "100", 453 | "min": "0", 454 | "show": true 455 | }, 456 | { 457 | "format": "short", 458 | "label": null, 459 | "logBase": 1, 460 | "max": null, 461 | "min": null, 462 | "show": false 463 | } 464 | ] 465 | }, 466 | { 467 | "aliasColors": {}, 468 | "bars": false, 469 | "dashLength": 10, 470 | "dashes": false, 471 | "datasource": "${DS_PROMETHEUS}", 472 | "decimals": 1, 473 | "fill": 1, 474 | "id": 2, 475 | "legend": { 476 | "alignAsTable": true, 477 | "avg": false, 478 | "current": true, 479 | "hideEmpty": false, 480 | "hideZero": false, 481 | "max": false, 482 | "min": false, 483 | "rightSide": true, 484 | "show": false, 485 | "total": false, 486 | "values": true 487 | }, 488 | "lines": true, 489 | "linewidth": 1, 490 | "links": [], 491 | "minSpan": 2, 492 | "nullPointMode": "null", 493 | "percentage": false, 494 | "pointradius": 5, 495 | "points": false, 496 | "renderer": "flot", 497 | "seriesOverrides": [], 498 | "spaceLength": 10, 499 | "span": 6, 500 | "stack": false, 501 | "steppedLine": false, 502 | "targets": [ 503 | { 504 | "expr": "vmware_host_memory_usage{host_name=\"$hostname\"} / vmware_host_memory_max{host_name=\"$hostname\"} * 100", 505 | "format": "time_series", 506 | "intervalFactor": 2, 507 | "legendFormat": "$hostname", 508 | "metric": "", 509 | "refId": "A", 510 | "step": 2 511 | } 512 | ], 513 | "thresholds": [], 514 | "timeFrom": null, 515 | "timeShift": null, 516 | "title": "ESX Host RAM Usage", 517 | "tooltip": { 518 | "shared": true, 519 | "sort": 0, 520 | "value_type": "individual" 521 | }, 522 | "type": "graph", 523 | "xaxis": { 524 | "buckets": null, 525 | "mode": "time", 526 | "name": null, 527 | "show": true, 528 | "values": [] 529 | }, 530 | "yaxes": [ 531 | { 532 | "format": "percent", 533 | "label": "", 534 | "logBase": 1, 535 | "max": "100", 536 | "min": "0", 537 | "show": true 538 | }, 539 | { 540 | "format": "short", 541 | "label": null, 542 | "logBase": 1, 543 | "max": null, 544 | "min": null, 545 | "show": false 546 | } 547 | ] 548 | } 549 | ], 550 | "repeat": null, 551 | "repeatIteration": null, 552 | "repeatRowId": null, 553 | "showTitle": false, 554 | "title": "Dashboard Row", 555 | "titleSize": "h6" 556 | } 557 | ], 558 | "schemaVersion": 14, 559 | "style": "dark", 560 | "tags": [], 561 | "templating": { 562 | "list": [ 563 | { 564 | "allValue": null, 565 | "current": {}, 566 | "datasource": "${DS_PROMETHEUS}", 567 | "hide": 0, 568 | "includeAll": false, 569 | "label": "Host:", 570 | "multi": false, 571 | "name": "hostname", 572 | "options": [], 573 | "query": "label_values(vmware_host_boot_timestamp_seconds,host_name)", 574 | "refresh": 1, 575 | "regex": "/([^:]+)/", 576 | "sort": 1, 577 | "tagValuesQuery": "", 578 | "tags": [], 579 | "tagsQuery": "", 580 | "type": "query", 581 | "useTags": false 582 | } 583 | ] 584 | }, 585 | "time": { 586 | "from": "now-5m", 587 | "to": "now" 588 | }, 589 | "timepicker": { 590 | "refresh_intervals": [ 591 | "5s", 592 | "10s", 593 | "30s", 594 | "1m", 595 | "5m", 596 | "15m", 597 | "30m", 598 | "1h", 599 | "2h", 600 | "1d" 601 | ], 602 | "time_options": [ 603 | "5m", 604 | "15m", 605 | "1h", 606 | "6h", 607 | "12h", 608 | "24h", 609 | "2d", 610 | "7d", 611 | "30d" 612 | ] 613 | }, 614 | "timezone": "browser", 615 | "title": "ESX Hosts Information", 616 | "version": 17 617 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | prometheus-client==0.0.19 2 | pytz 3 | pyvmomi>=6.5 4 | twisted>=14.0.2 5 | yamlconfig 6 | service-identity 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import vmware_exporter 3 | 4 | setup( 5 | name='vmware_exporter', 6 | version=vmware_exporter.__version__, 7 | author=vmware_exporter.__author__, 8 | description='VMWare VCenter Exporter for Prometheus', 9 | long_description=open('README.md').read(), 10 | url='https://github.com/rverchere/vmware_exporter', 11 | download_url=("https://github.com/rverchere/vmware_exporter/tarball/%s" % 12 | vmware_exporter.__version__), 13 | keywords=['VMWare', 'VCenter', 'Prometheus'], 14 | license=vmware_exporter.__license__, 15 | packages=find_packages(exclude=['*.test', '*.test.*']), 16 | include_package_data=True, 17 | install_requires=open('requirements.txt').readlines(), 18 | entry_points={ 19 | 'console_scripts': [ 20 | 'vmware_exporter=vmware_exporter.vmware_exporter:main' 21 | ] 22 | } 23 | ) 24 | -------------------------------------------------------------------------------- /systemd/vmware_exporter.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Prometheus VMWare Exporter 3 | After=network.target 4 | 5 | [Service] 6 | User=prometheus 7 | Group=prometheus 8 | WorkingDirectory=/opt/prometheus/vmware_exporter/ 9 | ExecStart=/opt/prometheus/vmware_exporter/vmware_exporter.py 10 | Type=simple 11 | 12 | [Install] 13 | WantedBy=multi-user.target 14 | -------------------------------------------------------------------------------- /vmware_exporter/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.0" 2 | __author__ = "Daniel Pryor" 3 | __license__ = "BSD 3-Clause License" 4 | -------------------------------------------------------------------------------- /vmware_exporter/threader.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | class Threader(object): 5 | """ 6 | Takes method and data and threads it 7 | """ 8 | _thread = '' 9 | 10 | def thread_it(self, method, data): 11 | """ 12 | Thread any method and data will be used as args 13 | """ 14 | self._thread = threading.Thread(target=method, args=(data)) 15 | self._thread.start() 16 | if threading.active_count() >= 50: 17 | self.join() 18 | 19 | def join(self): 20 | """ 21 | join all threads and complete them 22 | """ 23 | try: 24 | self._thread.join() 25 | except RuntimeError: 26 | # Thread terminated. 27 | pass 28 | except ReferenceError: 29 | pass 30 | -------------------------------------------------------------------------------- /vmware_exporter/vmware_exporter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- python -*- 3 | # -*- coding: utf-8 -*- 4 | """ 5 | Handles collection of metrics for vmware. 6 | """ 7 | 8 | from __future__ import print_function 9 | from datetime import datetime 10 | 11 | # Generic imports 12 | import argparse 13 | import os 14 | import ssl 15 | from threader import Threader 16 | import pytz 17 | import yaml 18 | 19 | from yamlconfig import YamlConfig 20 | 21 | # Twisted 22 | from twisted.web.server import Site, NOT_DONE_YET 23 | from twisted.web.resource import Resource 24 | from twisted.internet import reactor, endpoints 25 | from twisted.internet.task import deferLater 26 | 27 | # VMWare specific imports 28 | from pyVmomi import vim, vmodl 29 | from pyVim import connect 30 | 31 | # Prometheus specific imports 32 | from prometheus_client.core import GaugeMetricFamily, _floatToGoString 33 | 34 | 35 | class VMWareMetricsResource(Resource): 36 | """ 37 | VMWare twisted ``Resource`` handling multi endpoints 38 | Only handle /metrics and /healthz path 39 | """ 40 | isLeaf = True 41 | 42 | def __init__(self): 43 | """ 44 | Init Metric Resource 45 | """ 46 | Resource.__init__(self) 47 | self.threader = Threader() 48 | 49 | def configure(self, args): 50 | if args.config_file: 51 | try: 52 | self.config = YamlConfig(args.config_file) 53 | if 'default' not in self.config.keys(): 54 | log("Error, you must have a default section in config file (for now)") 55 | exit(1) 56 | except Exception as exception: 57 | raise SystemExit("Error while reading configuration file: {0}".format(exception.message)) 58 | else: 59 | config_data = """ 60 | default: 61 | vsphere_host: "{0}" 62 | vsphere_user: "{1}" 63 | vsphere_password: "{2}" 64 | ignore_ssl: {3} 65 | collect_only: 66 | vms: True 67 | datastores: True 68 | hosts: True 69 | """.format(os.environ.get('VSPHERE_HOST'), 70 | os.environ.get('VSPHERE_USER'), 71 | os.environ.get('VSPHERE_PASSWORD'), 72 | os.environ.get('VSPHERE_IGNORE_SSL', False) 73 | ) 74 | self.config = yaml.load(config_data) 75 | self.config['default']['collect_only']['hosts'] = os.environ.get('VSPHERE_COLLECT_HOSTS', True) 76 | self.config['default']['collect_only']['datastores'] = os.environ.get('VSPHERE_COLLECT_DATASTORES', True) 77 | self.config['default']['collect_only']['vms'] = os.environ.get('VSPHERE_COLLECT_VMS', True) 78 | 79 | def render_GET(self, request): 80 | """ handles get requests for metrics, health, and everything else """ 81 | path = request.path.decode() 82 | request.setHeader("Content-Type", "text/plain; charset=UTF-8") 83 | if path == '/metrics': 84 | deferred_request = deferLater(reactor, 0, lambda: request) 85 | deferred_request.addCallback(self.generate_latest_metrics) 86 | deferred_request.addErrback(self.errback, request) 87 | return NOT_DONE_YET 88 | elif path == '/healthz': 89 | request.setResponseCode(200) 90 | log("Service is UP") 91 | return 'Server is UP'.encode() 92 | else: 93 | log("Uri not found: " + request.uri) 94 | request.setResponseCode(404) 95 | return '404 Not Found'.encode() 96 | 97 | def errback(self, failure, request): 98 | """ handles failures from requests """ 99 | failure.printTraceback() 100 | log(failure) 101 | request.processingFailed(failure) # This will send a trace to the browser and close the request. 102 | return None 103 | 104 | def generate_latest_metrics(self, request): 105 | """ gets the latest metrics """ 106 | section = request.args.get('section', ['default'])[0] 107 | if not request.args.get('vsphere_host', [None])[0] and not self.config[section].get('vsphere_host'): 108 | request.setResponseCode(500) 109 | log("No vsphere_host defined") 110 | request.write('No vsphere_host defined!\n') 111 | request.finish() 112 | if self.config[section].get('vsphere_host'): 113 | vsphere_host = self.config[section].get('vsphere_host') 114 | else: 115 | vsphere_host = request.args.get('vsphere_host')[0] 116 | output = [] 117 | for metric in self.collect(vsphere_host, section): 118 | output.append('# HELP {0} {1}'.format( 119 | metric.name, metric.documentation.replace('\\', r'\\').replace('\n', r'\n'))) 120 | output.append('\n# TYPE {0} {1}\n'.format(metric.name, metric.type)) 121 | for name, labels, value in metric.samples: 122 | if labels: 123 | labelstr = '{{{0}}}'.format(','.join( 124 | ['{0}="{1}"'.format( 125 | k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')) 126 | for k, v in sorted(labels.items())])) 127 | else: 128 | labelstr = '' 129 | output.append('{0}{1} {2}\n'.format(name, labelstr, _floatToGoString(value))) 130 | if output != []: 131 | request.write(''.join(output).encode('utf-8')) 132 | request.finish() 133 | else: 134 | request.setResponseCode(500, message=('cannot connect to vmware')) 135 | request.finish() 136 | return 137 | 138 | def collect(self, vsphere_host, section='default'): 139 | """ collects metrics """ 140 | if section not in self.config.keys(): 141 | log("{} is not a valid section, using default".format(section)) 142 | section = 'default' 143 | metric_list = {} 144 | metric_list['vms'] = { 145 | 'vmware_vm_power_state': GaugeMetricFamily( 146 | 'vmware_vm_power_state', 147 | 'VMWare VM Power state (On / Off)', 148 | labels=['vm_name', 'host_name']), 149 | 'vmware_vm_boot_timestamp_seconds': GaugeMetricFamily( 150 | 'vmware_vm_boot_timestamp_seconds', 151 | 'VMWare VM boot time in seconds', 152 | labels=['vm_name', 'host_name']), 153 | 'vmware_vm_snapshots': GaugeMetricFamily( 154 | 'vmware_vm_snapshots', 155 | 'VMWare current number of existing snapshots', 156 | labels=['vm_name']), 157 | 'vmware_vm_snapshot_timestamp_seconds': GaugeMetricFamily( 158 | 'vmware_vm_snapshot_timestamp_seconds', 159 | 'VMWare Snapshot creation time in seconds', 160 | labels=['vm_name', 'vm_snapshot_name']), 161 | 'vmware_vm_num_cpu': GaugeMetricFamily( 162 | 'vmware_vm_num_cpu', 163 | 'VMWare Number of processors in the virtual machine', 164 | labels=['vm_name', 'host_name']) 165 | } 166 | metric_list['datastores'] = { 167 | 'vmware_datastore_capacity_size': GaugeMetricFamily( 168 | 'vmware_datastore_capacity_size', 169 | 'VMWare Datasore capacity in bytes', 170 | labels=['ds_name']), 171 | 'vmware_datastore_freespace_size': GaugeMetricFamily( 172 | 'vmware_datastore_freespace_size', 173 | 'VMWare Datastore freespace in bytes', 174 | labels=['ds_name']), 175 | 'vmware_datastore_uncommited_size': GaugeMetricFamily( 176 | 'vmware_datastore_uncommited_size', 177 | 'VMWare Datastore uncommitted in bytes', 178 | labels=['ds_name']), 179 | 'vmware_datastore_provisoned_size': GaugeMetricFamily( 180 | 'vmware_datastore_provisoned_size', 181 | 'VMWare Datastore provisoned in bytes', 182 | labels=['ds_name']), 183 | 'vmware_datastore_hosts': GaugeMetricFamily( 184 | 'vmware_datastore_hosts', 185 | 'VMWare Hosts number using this datastore', 186 | labels=['ds_name']), 187 | 'vmware_datastore_vms': GaugeMetricFamily( 188 | 'vmware_datastore_vms', 189 | 'VMWare Virtual Machines number using this datastore', 190 | labels=['ds_name']) 191 | } 192 | metric_list['hosts'] = { 193 | 'vmware_host_power_state': GaugeMetricFamily( 194 | 'vmware_host_power_state', 195 | 'VMWare Host Power state (On / Off)', 196 | labels=['host_name']), 197 | 'vmware_host_boot_timestamp_seconds': GaugeMetricFamily( 198 | 'vmware_host_boot_timestamp_seconds', 199 | 'VMWare Host boot time in seconds', 200 | labels=['host_name']), 201 | 'vmware_host_cpu_usage': GaugeMetricFamily( 202 | 'vmware_host_cpu_usage', 203 | 'VMWare Host CPU usage in Mhz', 204 | labels=['host_name']), 205 | 'vmware_host_cpu_max': GaugeMetricFamily( 206 | 'vmware_host_cpu_max', 207 | 'VMWare Host CPU max availability in Mhz', 208 | labels=['host_name']), 209 | 'vmware_host_memory_usage': GaugeMetricFamily( 210 | 'vmware_host_memory_usage', 211 | 'VMWare Host Memory usage in Mbytes', 212 | labels=['host_name']), 213 | 'vmware_host_memory_max': GaugeMetricFamily( 214 | 'vmware_host_memory_max', 215 | 'VMWare Host Memory Max availability in Mbytes', 216 | labels=['host_name']), 217 | } 218 | 219 | metrics = {} 220 | for key, value in self.config[section]['collect_only'].items(): 221 | if value is True: 222 | metrics.update(metric_list[key]) 223 | 224 | log("Start collecting vcenter metrics for {0}".format(vsphere_host)) 225 | 226 | self.vmware_connection = self._vmware_connect(vsphere_host, section) 227 | if not self.vmware_connection: 228 | log("Cannot connect to vmware") 229 | return 230 | 231 | content = self.vmware_connection.RetrieveContent() 232 | 233 | if self.config[section]['collect_only']['vms'] is True: 234 | # Get performance metrics counter information 235 | counter_info = self._vmware_perf_metrics(content) 236 | 237 | # Fill VM Informations 238 | log("Starting VM performance metric collection") 239 | self._vmware_get_vms(content, metrics, counter_info) 240 | log("Finish starting vm performance vm collection") 241 | 242 | # Fill Snapshots (count and age) 243 | log("Starting VM snapshot metric collection") 244 | vm_counts, vm_ages = self._vmware_get_snapshots(content) 245 | for v in vm_counts: 246 | metrics['vmware_vm_snapshots'].add_metric([v['vm_name']], v['snapshot_count']) 247 | for vm_age in vm_ages: 248 | for v in vm_age: 249 | metrics['vmware_vm_snapshot_timestamp_seconds'].add_metric([v['vm_name'], 250 | v['vm_snapshot_name']], 251 | v['vm_snapshot_timestamp_seconds'] 252 | ) 253 | log("Finished VM snapshot metric collection") 254 | 255 | # Fill Datastore 256 | if self.config[section]['collect_only']['datastores'] is True: 257 | self._vmware_get_datastores(content, metrics) 258 | 259 | # Fill Hosts Informations 260 | if self.config[section]['collect_only']['hosts'] is True: 261 | self._vmware_get_hosts(content, metrics) 262 | 263 | log("Stop collecting vcenter metrics for {0}".format(vsphere_host)) 264 | self.threader.join() 265 | self._vmware_disconnect() 266 | 267 | for _key, metric in metrics.items(): 268 | yield metric 269 | 270 | def _to_epoch(self, my_date): 271 | """ convert to epoch time """ 272 | return (my_date - datetime(1970, 1, 1, tzinfo=pytz.utc)).total_seconds() 273 | 274 | def _vmware_get_obj(self, content, vimtype, name=None): 275 | """ 276 | Get the vsphere object associated with a given text name 277 | """ 278 | obj = None 279 | container = content.viewManager.CreateContainerView( 280 | content.rootFolder, vimtype, True) 281 | if name: 282 | for view in container.view: 283 | if view.name == name: 284 | obj = view 285 | return [obj] 286 | else: 287 | return container.view 288 | 289 | def _vmware_connect(self, vsphere_host, section): 290 | """ 291 | Connect to Vcenter and get connection 292 | """ 293 | vsphere_user = self.config[section].get('vsphere_user') 294 | vsphere_password = self.config[section].get('vsphere_password') 295 | 296 | context = None 297 | if self.config[section].get('ignore_ssl') and \ 298 | hasattr(ssl, "_create_unverified_context"): 299 | context = ssl._create_unverified_context() 300 | 301 | try: 302 | vmware_connect = connect.Connect(vsphere_host, 443, 303 | vsphere_user, 304 | vsphere_password, 305 | sslContext=context 306 | ) 307 | 308 | return vmware_connect 309 | 310 | except vmodl.MethodFault as error: 311 | log("Caught vmodl fault: " + error.msg) 312 | return None 313 | 314 | def _vmware_disconnect(self): 315 | """ 316 | Disconnect from Vcenter 317 | """ 318 | connect.Disconnect(self.vmware_connection) 319 | 320 | def _vmware_perf_metrics(self, content): 321 | """ 322 | create a mapping from performance stats to their counterIDs 323 | counter_info: [performance stat => counterId] 324 | performance stat example: cpu.usagemhz.LATEST 325 | """ 326 | counter_info = {} 327 | for counter in content.perfManager.perfCounter: 328 | prefix = counter.groupInfo.key 329 | counter_full = "{}.{}.{}".format(prefix, counter.nameInfo.key, counter.rollupType) 330 | counter_info[counter_full] = counter.key 331 | return counter_info 332 | 333 | def _vmware_full_snapshots_list(self, snapshots): 334 | """ 335 | Get snapshots from a VM list, recursively 336 | """ 337 | snapshot_data = [] 338 | for snapshot in snapshots: 339 | snap_timestamp = self._to_epoch(snapshot.createTime) 340 | snap_info = {'vm_snapshot_name': snapshot.name, 'vm_snapshot_timestamp_seconds': snap_timestamp} 341 | snapshot_data.append(snap_info) 342 | snapshot_data = snapshot_data + self._vmware_full_snapshots_list( 343 | snapshot.childSnapshotList) 344 | return snapshot_data 345 | 346 | def _vmware_get_snapshot_details(self, snapshots_count_table, snapshots_age_table, virtual_machine): 347 | """ 348 | Gathers snapshot details 349 | """ 350 | snapshot_paths = self._vmware_full_snapshots_list(virtual_machine.snapshot.rootSnapshotList) 351 | for snapshot_path in snapshot_paths: 352 | snapshot_path['vm_name'] = virtual_machine.name 353 | # Add Snapshot count per VM 354 | snapshot_count = len(snapshot_paths) 355 | snapshot_count_info = { 356 | 'vm_name': virtual_machine.name, 357 | 'snapshot_count': snapshot_count 358 | } 359 | snapshots_count_table.append(snapshot_count_info) 360 | snapshots_age_table.append(snapshot_paths) 361 | 362 | def _vmware_get_snapshots(self, content): 363 | """ 364 | Get snapshots from all VM 365 | """ 366 | snapshots_count_table = [] 367 | snapshots_age_table = [] 368 | virtual_machines = self._vmware_get_obj(content, [vim.VirtualMachine]) 369 | for virtual_machine in virtual_machines: 370 | if not virtual_machine or virtual_machine.snapshot is None: 371 | continue 372 | else: 373 | self.threader.thread_it(self._vmware_get_snapshot_details, 374 | [snapshots_count_table, snapshots_age_table, virtual_machine]) 375 | return snapshots_count_table, snapshots_age_table 376 | 377 | def _vmware_get_datastores(self, content, ds_metrics): 378 | """ 379 | Get Datastore information 380 | """ 381 | log("Starting datastore metric collection") 382 | datastores = self._vmware_get_obj(content, [vim.Datastore]) 383 | for datastore in datastores: 384 | # ds.RefreshDatastoreStorageInfo() 385 | summary = datastore.summary 386 | self.threader.thread_it(self._vmware_get_datastore_metrics, [datastore, ds_metrics, summary]) 387 | log("Finished datastore metric collection") 388 | 389 | def _vmware_get_datastore_metrics(self, datastore, ds_metrics, summary): 390 | """ 391 | Get datastore metrics 392 | """ 393 | ds_capacity = summary.capacity 394 | ds_freespace = summary.freeSpace 395 | ds_uncommitted = summary.uncommitted if summary.uncommitted else 0 396 | ds_provisioned = ds_capacity - ds_freespace + ds_uncommitted 397 | 398 | ds_metrics['vmware_datastore_capacity_size'].add_metric([summary.name], ds_capacity) 399 | ds_metrics['vmware_datastore_freespace_size'].add_metric([summary.name], ds_freespace) 400 | ds_metrics['vmware_datastore_uncommited_size'].add_metric([summary.name], ds_uncommitted) 401 | ds_metrics['vmware_datastore_provisoned_size'].add_metric([summary.name], ds_provisioned) 402 | ds_metrics['vmware_datastore_hosts'].add_metric([summary.name], len(datastore.host)) 403 | ds_metrics['vmware_datastore_vms'].add_metric([summary.name], len(datastore.vm)) 404 | 405 | def _vmware_get_vms(self, content, vm_metrics, counter_info): 406 | """ 407 | Get VM information 408 | """ 409 | 410 | # List of performance counter we want 411 | perf_list = [ 412 | 'cpu.ready.summation', 413 | 'cpu.usage.average', 414 | 'cpu.usagemhz.average', 415 | 'disk.usage.average', 416 | 'disk.read.average', 417 | 'disk.write.average', 418 | 'mem.usage.average', 419 | 'net.received.average', 420 | 'net.transmitted.average', 421 | ] 422 | 423 | # Prepare gauges 424 | for p in perf_list: 425 | p_metric = 'vmware_vm_' + p.replace('.', '_') 426 | vm_metrics[p_metric] = GaugeMetricFamily( 427 | p_metric, 428 | p_metric, 429 | labels=['vm_name', 'host_name']) 430 | 431 | virtual_machines = self._vmware_get_obj(content, [vim.VirtualMachine]) 432 | log("Total Virtual Machines: {0}".format(len(virtual_machines))) 433 | for virtual_machine in virtual_machines: 434 | self.threader.thread_it(self._vmware_get_vm_perf_metrics, 435 | [content, counter_info, perf_list, virtual_machine, vm_metrics]) 436 | 437 | def _vmware_get_vm_perf_metrics(self, content, counter_info, perf_list, virtual_machine, vm_metrics): 438 | """ 439 | Loops over metrics in perf_list on vm 440 | """ 441 | # DEBUG ME: log("Starting VM: " + vm.name) 442 | summary = virtual_machine.summary 443 | 444 | power_state = 1 if summary.runtime.powerState == 'poweredOn' else 0 445 | num_cpu = summary.config.numCpu 446 | vm_host = summary.runtime.host 447 | vm_host_name = vm_host.name 448 | vm_metrics['vmware_vm_power_state'].add_metric([virtual_machine.name, vm_host_name], power_state) 449 | vm_metrics['vmware_vm_num_cpu'].add_metric([virtual_machine.name, vm_host_name], num_cpu) 450 | 451 | # Get metrics for poweredOn vms only 452 | if power_state: 453 | if summary.runtime.bootTime: 454 | vm_metrics['vmware_vm_boot_timestamp_seconds'].add_metric([virtual_machine.name, 455 | vm_host_name], 456 | self._to_epoch(summary.runtime.bootTime)) 457 | 458 | for p in perf_list: 459 | self.threader.thread_it(self._vmware_get_vm_perf_metric, 460 | [content, counter_info, p, virtual_machine, vm_host_name, vm_metrics]) 461 | 462 | # Debug Me. log("Finished VM: " + vm.name) 463 | 464 | def _vmware_get_vm_perf_metric(self, content, counter_info, perf_metric, virtual_machine, vm_host_name, vm_metrics): 465 | """ 466 | Get vm perf metric 467 | """ 468 | perf_metric_name = 'vmware_vm_' + perf_metric.replace('.', '_') 469 | counter_key = counter_info[perf_metric] 470 | metric_id = vim.PerformanceManager.MetricId( 471 | counterId=counter_key, 472 | instance='' 473 | ) 474 | spec = vim.PerformanceManager.QuerySpec( 475 | maxSample=1, 476 | entity=virtual_machine, 477 | metricId=[metric_id], 478 | intervalId=20 479 | ) 480 | result = content.perfManager.QueryStats(querySpec=[spec]) 481 | # DEBUG ME: log("{0} {1}: {2}".format(vm.name, p, float(sum(result[0].value[0].value)))) 482 | try: 483 | vm_metrics[perf_metric_name].add_metric([virtual_machine.name, vm_host_name], 484 | float(sum(result[0].value[0].value))) 485 | except: # noqa: E722 486 | log("Error, cannot get vm metrics {0} for {1}".format(perf_metric_name, 487 | virtual_machine.name)) 488 | 489 | def _vmware_get_hosts(self, content, host_metrics): 490 | """ 491 | Get Host (ESXi) information 492 | """ 493 | log("Starting host metric collection") 494 | hosts = self._vmware_get_obj(content, [vim.HostSystem]) 495 | for host in hosts: 496 | summary = host.summary 497 | # Power state 498 | power_state = 1 if summary.runtime.powerState == 'poweredOn' else 0 499 | host_metrics['vmware_host_power_state'].add_metric([host.name], power_state) 500 | 501 | if power_state: 502 | self.threader.thread_it(self._vmware_get_host_metrics, [host, host_metrics, summary]) 503 | log("Finished host metric collection") 504 | 505 | def _vmware_get_host_metrics(self, host, host_metrics, summary): 506 | """ 507 | Get Host Metrics 508 | """ 509 | # Uptime 510 | if summary.runtime.bootTime: 511 | host_metrics['vmware_host_boot_timestamp_seconds'].add_metric([host.name], 512 | self._to_epoch( 513 | summary.runtime.bootTime) 514 | ) 515 | # CPU Usage (in Mhz) 516 | host_metrics['vmware_host_cpu_usage'].add_metric([host.name], summary.quickStats.overallCpuUsage) 517 | cpu_core_num = summary.hardware.numCpuCores 518 | cpu_total = summary.hardware.cpuMhz * cpu_core_num 519 | host_metrics['vmware_host_cpu_max'].add_metric([host.name], cpu_total) 520 | 521 | # Memory Usage (in MB) 522 | host_metrics['vmware_host_memory_usage'].add_metric([host.name], summary.quickStats.overallMemoryUsage) 523 | host_metrics['vmware_host_memory_max'].add_metric([host.name], float(summary.hardware.memorySize) / 1024 / 1024) 524 | 525 | 526 | def log(data): 527 | """ 528 | Log any message in a uniform format 529 | """ 530 | print("[{0}] {1}".format(datetime.utcnow().replace(tzinfo=pytz.utc), data)) 531 | 532 | 533 | def main(): 534 | """ start up twisted reactor """ 535 | parser = argparse.ArgumentParser(description='VMWare metrics exporter for Prometheus') 536 | parser.add_argument('-c', '--config', dest='config_file', 537 | default=None, help="configuration file") 538 | parser.add_argument('-p', '--port', dest='port', type=int, 539 | default=9272, help="HTTP port to expose metrics") 540 | 541 | args = parser.parse_args() 542 | 543 | # Start up the server to expose the metrics. 544 | root = VMWareMetricsResource() 545 | root.configure(args) 546 | root.putChild(b'metrics', VMWareMetricsResource()) 547 | root.putChild(b'healthz', VMWareMetricsResource()) 548 | 549 | factory = Site(root) 550 | log("Starting web server on port {}".format(args.port)) 551 | endpoint = endpoints.TCP4ServerEndpoint(reactor, args.port) 552 | endpoint.listen(factory) 553 | reactor.run() 554 | 555 | 556 | if __name__ == '__main__': 557 | main() 558 | --------------------------------------------------------------------------------