├── .circleci └── config.yml ├── .github ├── CODEOWNERS └── stale.yml ├── .gitignore ├── CHANGELOG ├── Dockerfile ├── LICENSE ├── README.md ├── elasticstat ├── __init__.py └── elasticstat.py ├── extra └── man │ ├── README.md │ ├── elasticstat.1 │ └── elasticstat.ronn ├── requirements ├── dev.txt └── prod.txt ├── setup.py └── tox.ini /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | references: 3 | objectrocket-docker-auth: &objectrocket-docker-auth 4 | auth: 5 | username: ${DOCKER_USERNAME} 6 | password: ${DOCKER_PASSWORD} 7 | context-to-use: &context-to-use 8 | context: objectrocket-shared 9 | jobs: 10 | lint_test: 11 | docker: 12 | - <<: *objectrocket-docker-auth 13 | image: circleci/python:2.7.13 14 | steps: 15 | - checkout 16 | 17 | - run: 18 | name: install test dependencies 19 | command: sudo pip install --upgrade pip tox 20 | - run: 21 | name: lint and test 22 | command: tox -r 23 | 24 | workflows: 25 | version: 2 26 | basic-workflow: 27 | jobs: 28 | - lint_test: *context-to-use 29 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Each line is a file pattern followed by one or more owners. 2 | 3 | # elasticstat 4 | * @objectrocket/beta @paulrossmeier 5 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 30 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 7 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - security 8 | # Label to use when marking an issue as stale 9 | staleLabel: stale 10 | # Comment to post when marking an issue as stale. Set to `false` to disable 11 | markComment: > 12 | This issue has been automatically marked as stale because it has not had 13 | recent activity. It will be closed if no further activity occurs. Thank you 14 | for your contributions. 15 | # Comment to post when closing a stale issue. Set to `false` to disable 16 | closeComment: false 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | *.egg-info 4 | build/ 5 | dist/ 6 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | ## [1.3.5] - 2021-5-24 Dependabot 5 | ### Added 6 | * Dependabot bumps py from 1.8.1 to 1.10.0. 7 | 8 | ## [1.3.4] - 2020-1-15 Paul Rossmeier 9 | ### Added 10 | * requirements.txt 11 | * elasticstat/elasticstat.py - added version discovery to (_parse_threadpools) - updates threadpool to match version above or below 7 12 | * elasticstat/elasticstat.py - added packages "json", "re", "packaging" to the import commands 13 | * elasticstat/elasticstat.py - added "write" to the defailt threadpool 14 | ### Changed 15 | * elasticstat/elasticstat.py - moving self.threadpool constructor after client creation constructor to use version discovery 16 | * setup.py - added packaging and certifi to 'install_requires' 17 | 18 | ## [1.3.5] - 2020-4-14 Paul Rossmeier 19 | ### Added 20 | * requirements/prod.txt - added for requirements when insatlling the package 21 | * requirements/dev.txt - added for tox to run 22 | * tox.ini - added for tox configuration 23 | * circleci/config.yml - added circleci intergration 24 | ### Changed 25 | * setup.py - added discovery for prod/dev variable to allow for diffrent python requirement docs 26 | * elasticstat/elasticstat.py - per flake8 linting I updated all requested linting errors 27 | ### Removed 28 | * requirements.txt - removed on favor of requirements dir 29 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2-slim 2 | 3 | COPY . /usr/src/elasticstat/ 4 | WORKDIR /usr/src/elasticstat/ 5 | 6 | RUN pip install --no-cache . 7 | 8 | ENTRYPOINT [ "python", "./elasticstat/elasticstat.py" ] 9 | CMD [ "--help" ] 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2015 Rackspace US, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Elasticstat 2 | 3 | Written By: Jeff Tharp, http://objectrocket.com/elasticsearch 4 | 5 | ## Description 6 | Elasticstat is a utility for real-time performance monitoring of an Elasticsearch cluster from the command line, 7 | much like how the Unix utilities iostat or vmstat work. The frequency of updates can be controled via the DELAYINTERVAL 8 | optional parameter, which specifies a delay in seconds after each update. 9 | 10 | Performance metrics shown are based on the articles 11 | [Cluster Health](https://www.elastic.co/guide/en/elasticsearch/guide/current/_cluster_health.html) and 12 | [Monitoring Individual Nodes](https://www.elastic.co/guide/en/elasticsearch/guide/current/_monitoring_individual_nodes.html) 13 | from the Elasticsearch Definitive Guide. Please refer to these articles for further insight as to the significance of each 14 | metric. 15 | 16 | ## Requirements 17 | 18 | - python 2.6+ 19 | - [elasticsearch-py](http://elasticsearch-py.rtfd.org/) 20 | - Access to an Elasticsearch 1.5.0+ cluster you wish to monitor (via either HTTP or HTTPS) 21 | 22 | ## Install 23 | 24 | Install `elasticstat` via [pip](https://pypi.python.org/pypi/elasticstat): 25 | 26 | ``` 27 | pip install elasticstat 28 | ``` 29 | 30 | ## Usage 31 | 32 | ``` 33 | elasticstat [-h HOSTLIST] [--port PORT] [-u USERNAME] 34 | [-p [PASSWORD]] [--ssl] [-c CATEGORY [CATEGORY ...]] 35 | [-t THREADPOOL [THREADPOOL ...]] [-C] 36 | [DELAYINTERVAL] 37 | 38 | Elasticstat is a utility for real-time performance monitoring of an Elasticsearch cluster from the command line 39 | 40 | positional arguments: 41 | DELAYINTERVAL How long to delay between updates, in seconds 42 | 43 | optional arguments: 44 | -h HOSTLIST, --host HOSTLIST 45 | Host in Elasticsearch cluster (or a comma-delimited 46 | list of hosts from the same cluster) 47 | --port PORT HTTP Port (or include as host:port in HOSTLIST) 48 | -u USERNAME, --username USERNAME 49 | Username 50 | -p [PASSWORD], --password [PASSWORD] 51 | Password (if USERNAME is specified but not PASSWORD, 52 | will prompt for password) 53 | --ssl Connect using TLS/SSL 54 | -c CATEGORY [CATEGORY ...], --categories CATEGORY [CATEGORY ...] 55 | Statistic categories to show [all or choose from os, 56 | jvm, threads, fielddata, connections, data_nodes] 57 | -t THREADPOOL [THREADPOOL ...], --threadpools THREADPOOL [THREADPOOL ...] 58 | Threadpools to show 59 | -C, --no-color Display without ANSI color output 60 | ``` 61 | 62 | ## Cluster-level Metrics 63 | 64 | - cluster: the name of the cluster 65 | - status: the familiar green/yellow/red status of the cluster - yellow indicates at least one replica shard is unavailable, red indicates at least one primary shard is unavailable. 66 | - shards: total number of active primary and replica shards across all indices 67 | - pri: the number of active / allocated primary shards across all indices 68 | - relo: number of shards currently relocating from one data node to another 69 | - init: number of shards being freshly created 70 | - unassign: number of shards defined in an index but not allocated to a data node 71 | - pending tasks: the number of tasks pending (see [Pending Tasks](https://www.elastic.co/guide/en/elasticsearch/guide/current/_pending_tasks.html)) 72 | - time: current local time for this update 73 | 74 | ## Node-level Metrics 75 | 76 | - general 77 | - node: node name, typically a shortened version of the hostname of the node 78 | - role: the [role](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html) of this node in the cluster as follows: 79 | - ALL: a node serving as both a master and data node (Elasticsearch's default role) -- node.master = true, node.data = true 80 | - DATA: a data-only node, node.master = false, node.data = true 81 | - MST: a master-only node, node.master = true, node.data = false -- the active cluster master is marked with an '*' 82 | - RTR: a client node, node.master = false, node.data = false 83 | - UNK: node with an unkown or undetermined role 84 | - os 85 | - load: the 1 minute / 5 minute / 15 minute [load average](http://blog.scoutapp.com/articles/2009/07/31/understanding-load-averages) of the node (only 1 minute load average for Elasticsearch 2.x+) 86 | - mem: percentage of total memory used on the node (including memory used by the kernel and other processes besides Elasticsearch) 87 | - [jvm](https://www.elastic.co/guide/en/elasticsearch/guide/current/_monitoring_individual_nodes.html#_jvm_section) 88 | - heap: percentage of Java heap memory in use. Java garbage collections occur when this reaches or exceeds 75%. 89 | - old sz: total size of the memory pool for the old generation portion of the Java heap 90 | - old gc: number of garbage collection events that have occured, and their cumulative time since the last update, for the old generation region of Java heap 91 | - young gc: number of garbage collection events that have occured, and their cumulative time since the last update, for the young (aka eden) generation region of Java heap 92 | - threads ([threadpools](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-threadpool.html)): number of active | queued | rejected threads for each threadpool. Default threadpools listed are as follows: 93 | - index: (non-bulk) indexing requests 94 | - search: all search and query requests 95 | - bulk: bulk requests 96 | - get: all get-by-ID operations 97 | - [fielddata](https://www.elastic.co/guide/en/elasticsearch/guide/current/_limiting_memory_usage.html#fielddata-size) 98 | - fde: count of field data evictions that have occurred since last update 99 | - fdt: number of times the field data circuit breaker has tripped since the last update 100 | - connections 101 | - hconn: number of active HTTP/HTTPS connections to this node (REST API) 102 | - tconn: number of active transport connections to this node (Java API, includes intra-cluster node-to-node connections) 103 | - data_nodes: metrics useful only for data-bearing nodes 104 | - merges: total time spent in Lucene segment merges since the last time the node was restarted 105 | - idx st: [index store throttle](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-store.html#store-throttling), the total time indexing has been throttled to a single thread since the last time the node was restarted (see [Segments and Merging](https://www.elastic.co/guide/en/elasticsearch/guide/current/indexing-performance.html#segments-and-merging)) 106 | - disk usage: the total space used and percentage of space used for storing Elasticsearch data files 107 | - docs: the total number of documents in all index shards allocated to this node. If there is a second number, this is the total number of deleted documents not yet merged 108 | 109 | ## License 110 | 111 | Copyright 2015 Rackspace US, Inc. 112 | 113 | Licensed under the Apache License, Version 2.0 (the "License"); 114 | you may not use this file except in compliance with the License. 115 | You may obtain a copy of the License at 116 | 117 | http://www.apache.org/licenses/LICENSE-2.0 118 | 119 | Unless required by applicable law or agreed to in writing, software 120 | distributed under the License is distributed on an "AS IS" BASIS, 121 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 122 | See the License for the specific language governing permissions and 123 | limitations under the License. -------------------------------------------------------------------------------- /elasticstat/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'Jeff Tharp' 2 | __version__ = '1.3.5' 3 | -------------------------------------------------------------------------------- /elasticstat/elasticstat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c)2015 Rackspace US, Inc. 4 | # All Rights Reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 7 | # not use this file except in compliance with the License. You may obtain 8 | # a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 14 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 15 | # License for the specific language governing permissions and limitations 16 | # under the License. 17 | 18 | import argparse 19 | import datetime 20 | import getpass 21 | import signal 22 | import sys 23 | import time 24 | import json 25 | import re 26 | 27 | from packaging import version 28 | from elasticsearch import Elasticsearch 29 | from urllib3.util import parse_url 30 | 31 | CLUSTER_TEMPLATE = {} 32 | CLUSTER_TEMPLATE['general'] = """{cluster_name:33} {status:6}""" 33 | CLUSTER_TEMPLATE['shards'] = """{active_shards:>6} {active_primary_shards:>4} {relocating_shards:>4} {initializing_shards:>4} {unassigned_shards:>8}""" 34 | CLUSTER_TEMPLATE['tasks'] = """{number_of_pending_tasks:>13}""" 35 | CLUSTER_TEMPLATE['time'] = """{timestamp:8}""" 36 | CLUSTER_HEADINGS = {} 37 | CLUSTER_HEADINGS["cluster_name"] = "cluster" 38 | CLUSTER_HEADINGS["status"] = "status" 39 | CLUSTER_HEADINGS["active_shards"] = "shards" 40 | CLUSTER_HEADINGS["active_primary_shards"] = "pri" 41 | CLUSTER_HEADINGS["relocating_shards"] = "relo" 42 | CLUSTER_HEADINGS["initializing_shards"] = "init" 43 | CLUSTER_HEADINGS["unassigned_shards"] = "unassign" 44 | CLUSTER_HEADINGS["number_of_pending_tasks"] = "pending tasks" 45 | CLUSTER_HEADINGS["timestamp"] = "time" 46 | CLUSTER_CATEGORIES = ['general', 'shards', 'tasks', 'time'] 47 | 48 | NODES_TEMPLATE = {} 49 | NODES_TEMPLATE['general'] = """{name:24} {role:<6}""" 50 | NODES_TEMPLATE['os'] = """{load_avg:>18} {used_mem:>4}""" 51 | NODES_TEMPLATE['jvm'] = """{used_heap:>4} {old_gc_sz:8} {old_gc:8} {young_gc:8}""" 52 | NODES_TEMPLATE['threads'] = """{threads:<8}""" 53 | NODES_TEMPLATE['fielddata'] = """{fielddata:^7}""" 54 | NODES_TEMPLATE['connections'] = """{http_conn:>6} {transport_conn:>6}""" 55 | NODES_TEMPLATE['data_nodes'] = """{merge_time:>8} {store_throttle:>8} {fs:>16} {docs}""" 56 | NODES_FAILED_TEMPLATE = """{name:24} {role:<6} (No data received, node may have left cluster)""" 57 | NODE_HEADINGS = {} 58 | NODE_HEADINGS["name"] = "nodes" 59 | NODE_HEADINGS["role"] = "role" 60 | NODE_HEADINGS["load_avg"] = "load" 61 | NODE_HEADINGS["used_mem"] = "mem" 62 | NODE_HEADINGS["used_heap"] = "heap" 63 | NODE_HEADINGS["old_gc_sz"] = "old sz" 64 | NODE_HEADINGS["old_gc"] = "old gc" 65 | NODE_HEADINGS["young_gc"] = "young gc" 66 | NODE_HEADINGS["fielddata"] = "fde|fdt" 67 | NODE_HEADINGS["http_conn"] = "hconn" 68 | NODE_HEADINGS["transport_conn"] = "tconn" 69 | NODE_HEADINGS["merge_time"] = "merges" 70 | NODE_HEADINGS["store_throttle"] = "idx st" 71 | NODE_HEADINGS["docs"] = "docs" 72 | NODE_HEADINGS["fs"] = "disk usage" 73 | DEFAULT_THREAD_POOLS = ["index", "search", "bulk", "get", "write"] 74 | CATEGORIES = ['general', 'os', 'jvm', 'threads', 'fielddata', 'connections', 'data_nodes'] 75 | 76 | 77 | class ESArgParser(argparse.ArgumentParser): 78 | """ArgumentParser which prints help by default on any arg parsing error""" 79 | def error(self, message): 80 | self.print_help() 81 | sys.exit(2) 82 | 83 | 84 | class ESColors: 85 | """ANSI escape codes for color output""" 86 | END = '\033[00m' 87 | RED = '\033[0;31m' 88 | GREEN = '\033[0;32m' 89 | YELLOW = '\033[0;33m' 90 | GRAY = '\033[1;30m' 91 | WHITE = '\033[1;37m' 92 | 93 | 94 | class Elasticstat: 95 | """Elasticstat""" 96 | 97 | STATUS_COLOR = {'red': ESColors.RED, 'green': ESColors.GREEN, 'yellow': ESColors.YELLOW} 98 | 99 | def __init__(self, args): 100 | self.sleep_interval = args.delay_interval 101 | self.node_counters = {} 102 | self.node_counters['gc'] = {} 103 | self.node_counters['fd'] = {} 104 | self.node_counters['hconn'] = {} 105 | self.nodes_list = [] # used for detecting new nodes 106 | self.nodes_by_role = {} # main list of nodes, organized by role 107 | self.node_names = {} # node names, organized by id 108 | self.new_nodes = [] # used to track new nodes that join the cluster 109 | self.active_master = "" 110 | self.no_color = args.no_color 111 | self.categories = self._parse_categories(args.categories) 112 | self.cluster_categories = CLUSTER_CATEGORIES 113 | if args.no_pending_tasks: 114 | # Elasticsearch pre v.1.5 does not include number of pending tasks in cluster health 115 | self.cluster_categories.remove('tasks') 116 | 117 | # Create Elasticsearch client 118 | self.es_client = Elasticsearch(self._parse_connection_properties(args.hostlist, args.port, args.username, 119 | args.password, args.use_ssl)) 120 | # moving threadpool after client creation to use version discovery 121 | self.threadpools = self._parse_threadpools(args.threadpools) 122 | 123 | def _parse_connection_properties(self, host, port, username, password, use_ssl): 124 | hosts_list = [] 125 | 126 | if isinstance(host, str): 127 | # Force to a list, split on ',' if multiple 128 | host = host.split(',') 129 | 130 | for entity in host: 131 | # Loop over the hosts and parse connection properties 132 | host_properties = {} 133 | 134 | parsed_uri = parse_url(entity) 135 | host_properties['host'] = parsed_uri.host 136 | if parsed_uri.port is not None: 137 | host_properties['port'] = parsed_uri.port 138 | else: 139 | host_properties['port'] = port 140 | 141 | if parsed_uri.scheme == 'https' or use_ssl is True: 142 | host_properties['use_ssl'] = True 143 | 144 | if parsed_uri.auth is not None: 145 | host_properties['http_auth'] = parsed_uri.auth 146 | elif username is not None: 147 | if password is None or password == 'PROMPT': 148 | password = getpass.getpass() 149 | host_properties['http_auth'] = (username, password) 150 | 151 | hosts_list.append(host_properties) 152 | return hosts_list 153 | 154 | def _parse_categories(self, categories): 155 | if isinstance(categories, list): 156 | if categories[0] == 'all': 157 | return CATEGORIES 158 | if ',' in categories[0]: 159 | categories = categories[0].split(',') 160 | else: 161 | if categories == 'all': 162 | return CATEGORIES 163 | for category in categories: 164 | if category not in CATEGORIES: 165 | msg = "{0} is not valid, please choose categories from {1}".format(category, ', '.join(CATEGORIES[1:])) 166 | raise argparse.ArgumentTypeError(msg) 167 | return ['general'] + categories 168 | 169 | def _parse_threadpools(self, threadpools): 170 | # adding version discovery for ES7 to get correct threadpool 171 | if version.parse(json.dumps(self.es_client.info()['version']['number']).strip('"')) > version.parse("7.0.0"): 172 | threadpools = filter(None, [re.sub(r".*index*", r"", i) for i in threadpools]) 173 | threadpools = filter(None, [re.sub(r".*bulk*", r"", i) for i in threadpools]) 174 | else: 175 | threadpools = filter(None, [re.sub(r".*write*", r"", i) for i in threadpools]) 176 | # end vesion discovery 177 | if isinstance(threadpools, list) and ',' in threadpools[0]: 178 | threadpools = threadpools[0].split(',') 179 | return threadpools 180 | 181 | def colorize(self, msg, color): 182 | if self.no_color is True: 183 | return(msg) 184 | else: 185 | return(color + msg + ESColors.END) 186 | 187 | def thetime(self): 188 | return datetime.datetime.now().strftime("%H:%M:%S") 189 | 190 | def size_human(self, size): 191 | for unit in ['B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB']: 192 | if abs(size) < 1024.0: 193 | return "{:6.2f} {}".format(size, unit) 194 | size /= 1024.0 195 | return "{:6.2f} {}".format(size, 'YB') 196 | 197 | def get_disk_usage(self, node_fs_stats): 198 | # Calculate used disk space 199 | if node_fs_stats["total"] == {}: 200 | # Not a data node 201 | return "-" 202 | 203 | total_in_bytes = node_fs_stats["total"]["total_in_bytes"] 204 | used_in_bytes = total_in_bytes - node_fs_stats["total"]["available_in_bytes"] 205 | 206 | used_percent = int((float(used_in_bytes) / float(total_in_bytes)) * 100) 207 | used_human = self.size_human(used_in_bytes) 208 | 209 | return "{}|{}%".format(used_human, used_percent) 210 | 211 | def get_role(self, node_id, node_stats): 212 | try: 213 | # Section to handle ES 5 214 | role = node_stats['nodes'][node_id]['roles'] 215 | if 'data' in role: 216 | return "DATA" 217 | if 'master' in role: 218 | return "MST" 219 | if 'ingest' in role: 220 | return "ING" 221 | else: 222 | return "UNK" 223 | except KeyError: 224 | # Section to handle ES < 2.x 225 | ismaster = 'true' 226 | isdata = 'true' 227 | role = node_stats['nodes'][node_id]['attributes'] 228 | if 'data' in role: 229 | isdata = role['data'] 230 | if 'master' in role: 231 | ismaster = role['master'] 232 | if ismaster == 'true' and isdata == 'true': 233 | return "ALL" 234 | elif ismaster == 'true' and isdata == 'false': 235 | return "MST" 236 | elif ismaster == 'false' and isdata == 'true': 237 | return "DATA" 238 | elif ismaster == 'false' and isdata == 'false': 239 | return "RTR" 240 | else: 241 | return "UNK" 242 | else: 243 | # Section to handle ES 6.x 244 | role = node_stats['nodes'][node_id]['nodeRole'] 245 | if 'data' in role: 246 | return "DATA" 247 | if 'master' in role: 248 | return "MST" 249 | if 'ingest' in role: 250 | return "ING" 251 | else: 252 | return "UNK" 253 | 254 | def get_gc_stats(self, node_id, node_gc_stats): 255 | # check if this is a new node 256 | if node_id not in self.node_counters['gc']: 257 | # new so init counters and return no data 258 | self.node_counters['gc'][node_id] = {'old': 0, 'young': 0} 259 | self.node_counters['gc'][node_id]['old'] = node_gc_stats['old']['collection_count'] 260 | self.node_counters['gc'][node_id]['young'] = node_gc_stats['young']['collection_count'] 261 | return("-|-", "-|-") 262 | else: 263 | # existing node, so calculate the new deltas, update counters, and return results 264 | old_gc_count = node_gc_stats['old']['collection_count'] 265 | young_gc_count = node_gc_stats['young']['collection_count'] 266 | old_gc_delta = old_gc_count - self.node_counters['gc'][node_id]['old'] 267 | young_gc_delta = young_gc_count - self.node_counters['gc'][node_id]['young'] 268 | self.node_counters['gc'][node_id]['old'] = old_gc_count 269 | self.node_counters['gc'][node_id]['young'] = young_gc_count 270 | old_gc_results = "{0}|{0}ms".format(old_gc_delta, node_gc_stats['old']['collection_time_in_millis']) 271 | young_gc_results = "{0}|{0}ms".format(young_gc_delta, node_gc_stats['young']['collection_time_in_millis']) 272 | return(old_gc_results, young_gc_results) 273 | 274 | def get_fd_stats(self, node_id, current_evictions, current_tripped): 275 | # check if this is a new node 276 | if node_id not in self.node_counters['fd']: 277 | # new so init counters and return no data 278 | self.node_counters['fd'][node_id] = {'fde': 0, 'fdt': 0} 279 | self.node_counters['fd'][node_id]['fde'] = current_evictions 280 | self.node_counters['fd'][node_id]['fdt'] = current_tripped 281 | return("-|-") 282 | else: 283 | # existing node, so calc new deltas, update counters, and return results 284 | fde_delta = current_evictions - self.node_counters['fd'][node_id]['fde'] 285 | self.node_counters['fd'][node_id]['fde'] = current_evictions 286 | fdt_delta = current_tripped - self.node_counters['fd'][node_id]['fdt'] 287 | self.node_counters['fd'][node_id]['fdt'] = current_tripped 288 | return("{0}|{1}".format(fde_delta, fdt_delta)) 289 | 290 | def get_http_conns(self, node_id, http_conns): 291 | # check if this is a new node 292 | if node_id not in self.node_counters['hconn']: 293 | self.node_counters['hconn'][node_id] = http_conns['total_opened'] 294 | return ("{0}|-".format(http_conns['current_open'])) 295 | else: 296 | open_delta = http_conns['total_opened'] - self.node_counters['hconn'][node_id] 297 | self.node_counters['hconn'][node_id] = http_conns['total_opened'] 298 | return("{0}|{1}".format(http_conns['current_open'], open_delta)) 299 | 300 | def process_node_general(self, role, node_id, node): 301 | if node_id in self.new_nodes: 302 | # Flag that this is a node that joined the cluster this round 303 | node_name = node['name'] + "+" 304 | else: 305 | node_name = node['name'] 306 | if self.active_master == node_id: 307 | # Flag active master in role column 308 | node_role = role + "*" 309 | else: 310 | node_role = role 311 | return(NODES_TEMPLATE['general'].format(name=node_name, role=node_role)) 312 | 313 | def process_node_os(self, role, node_id, node): 314 | if 'cpu' in node['os'] and 'load_average' in node['os']['cpu']: 315 | # Elasticsearch 5.x+ move load average to cpu key 316 | node_load_avgs = [] 317 | for load_avg in node['os']['cpu']['load_average'].values(): 318 | node_load_avgs.append(load_avg) 319 | node_load_avg = "/".join("{0:.2f}".format(x) for x in node_load_avgs) 320 | else: 321 | # Pre Elasticsearch 5.x 322 | node_load_avg = node['os'].get('load_average') 323 | if isinstance(node_load_avg, list): 324 | node_load_avg = "/".join(str(x) for x in node_load_avg) 325 | elif isinstance(node_load_avg, float): 326 | # Elasticsearch 2.0-2.3 only return 1 load average, not the standard 5/10/15 min avgs 327 | node_load_avg = "{0:.2f}".format(node_load_avg) 328 | else: 329 | node_load_avg = 'N/A' 330 | 331 | if 'mem' in node['os']: 332 | node_used_mem = "{0}%".format(node['os']['mem']['used_percent']) 333 | else: 334 | node_used_mem = "N/A" 335 | return(NODES_TEMPLATE['os'].format(load_avg=node_load_avg, used_mem=node_used_mem)) 336 | 337 | def process_node_jvm(self, role, node_id, node): 338 | processed_node_jvm = {} 339 | processed_node_jvm['used_heap'] = "{0}%".format(node['jvm']['mem']['heap_used_percent']) 340 | processed_node_jvm['old_gc_sz'] = node['jvm']['mem']['pools']['old']['used'] 341 | node_gc_stats = node['jvm']['gc']['collectors'] 342 | processed_node_jvm['old_gc'], processed_node_jvm['young_gc'] = self.get_gc_stats(node_id, node_gc_stats) 343 | return(NODES_TEMPLATE['jvm'].format(**processed_node_jvm)) 344 | 345 | def process_node_threads(self, role, node_id, node): 346 | thread_segments = [] 347 | for pool in self.threadpools: 348 | if pool in node['thread_pool']: 349 | threads = "{0}|{1}|{2}".format(node['thread_pool'][pool]['active'], 350 | node['thread_pool'][pool]['queue'], 351 | node['thread_pool'][pool]['rejected']) 352 | thread_segments.append(NODES_TEMPLATE['threads'].format(threads=threads)) 353 | else: 354 | thread_segments.append(NODES_TEMPLATE['threads'].format(threads='-|-|-')) 355 | return(" ".join(thread_segments)) 356 | 357 | def process_node_fielddata(self, role, node_id, node): 358 | fielddata = self.get_fd_stats(node_id, 359 | node['indices']['fielddata']['evictions'], 360 | node['breakers']['fielddata']['tripped']) 361 | return(NODES_TEMPLATE['fielddata'].format(fielddata=fielddata)) 362 | 363 | def process_node_connections(self, role, node_id, node): 364 | processed_node_conns = {} 365 | if node.get('http') is None: 366 | node['http'] = {u'total_opened': 0, u'current_open': 0} 367 | processed_node_conns['http_conn'] = self.get_http_conns(node_id, node['http']) 368 | processed_node_conns['transport_conn'] = node['transport']['server_open'] 369 | return(NODES_TEMPLATE['connections'].format(**processed_node_conns)) 370 | 371 | def process_node_data_nodes(self, role, node_id, node): 372 | processed_node_dn = {} 373 | # Data node specific metrics 374 | if role in ['DATA', 'ALL']: 375 | processed_node_dn['merge_time'] = node['indices']['merges']['total_time'] 376 | processed_node_dn['store_throttle'] = node['indices']['store']['size_in_bytes'] 377 | doc_count = node['indices']['docs']['count'] 378 | deleted_count = node['indices']['docs']['deleted'] 379 | if deleted_count > 0: 380 | processed_node_dn['docs'] = "{0}|{1}".format(doc_count, deleted_count) 381 | else: 382 | processed_node_dn['docs'] = str(doc_count) 383 | processed_node_dn['fs'] = self.get_disk_usage(node['fs']) 384 | else: 385 | processed_node_dn['merge_time'] = "-" 386 | processed_node_dn['store_throttle'] = "-" 387 | processed_node_dn['docs'] = "-" 388 | processed_node_dn['fs'] = "-" 389 | return(NODES_TEMPLATE['data_nodes'].format(**processed_node_dn)) 390 | 391 | def process_node(self, role, node_id, node): 392 | node_segments = [] 393 | for category in self.categories: 394 | category_func = getattr(self, 'process_node_' + category) 395 | node_segments.append(category_func(role, node_id, node)) 396 | return(" ".join(node_segments)) 397 | 398 | def process_role(self, role, nodes_stats): 399 | procs = [] 400 | for node_id in self.nodes_by_role[role]: 401 | if node_id not in nodes_stats['nodes']: 402 | # did not get any data on this node, likely it left the cluster 403 | # ...however it may have re-joined the cluster under a new node_id (such as a node restart) 404 | failed_node_name = self.node_names[node_id] 405 | new_nodes_by_name = {nodes_stats['nodes'][id]['name']: id for id in self.new_nodes} 406 | if failed_node_name in new_nodes_by_name: 407 | # ...found it! Remove the old node_id, we've already added the new node_id at this point 408 | new_node_id = new_nodes_by_name[failed_node_name] 409 | self.nodes_list.remove(node_id) 410 | self.node_names.pop(node_id) 411 | self.nodes_by_role[role].remove(node_id) 412 | else: 413 | failed_node = {} 414 | failed_node['name'] = failed_node_name + '-' 415 | failed_node['role'] = "({0})".format(role) # Role it had when we last saw this node in the cluster 416 | print self.colorize(NODES_FAILED_TEMPLATE.format(**failed_node), ESColors.GRAY) 417 | continue 418 | # make sure node's role hasn't changed 419 | current_role = self.get_role(node_id, nodes_stats) 420 | if current_role != role: 421 | # Role changed, update lists so output will be correct on next iteration 422 | self.nodes_by_role.setdefault(current_role, []).append(node_id) # add to new role 423 | self.nodes_by_role[role].remove(node_id) # remove from current role 424 | row = self.process_node(current_role, node_id, nodes_stats['nodes'][node_id]) 425 | if node_id in self.new_nodes: 426 | print self.colorize(row, ESColors.WHITE) 427 | else: 428 | print row 429 | 430 | def get_threads_headings(self): 431 | thread_segments = [] 432 | for pool in self.threadpools: 433 | thread_segments.append(NODES_TEMPLATE['threads'].format(threads=pool)) 434 | return(" ".join(thread_segments)) 435 | 436 | def format_headings(self): 437 | """Format both cluster and node headings once and then store for later output""" 438 | cluster_heading_segments = [] 439 | node_heading_segments = [] 440 | 441 | # cluster headings 442 | for category in self.cluster_categories: 443 | cluster_heading_segments.append(CLUSTER_TEMPLATE[category].format(**CLUSTER_HEADINGS)) 444 | self.cluster_headings = " ".join(cluster_heading_segments) 445 | 446 | # node headings 447 | for category in self.categories: 448 | if category == 'threads': 449 | node_heading_segments.append(self.get_threads_headings()) 450 | else: 451 | node_heading_segments.append(NODES_TEMPLATE[category].format(**NODE_HEADINGS)) 452 | self.node_headings = " ".join(node_heading_segments) 453 | 454 | def print_stats(self): 455 | # just run forever until ctrl-c 456 | while True: 457 | cluster_segments = [] 458 | cluster_health = self.es_client.cluster.health() 459 | nodes_stats = self.es_client.nodes.stats(human=True) 460 | self.active_master = self.es_client.cat.master(h="id").strip() # needed to remove trailing newline 461 | 462 | # Print cluster health 463 | cluster_health['timestamp'] = self.thetime() 464 | status = cluster_health['status'] 465 | for category in self.cluster_categories: 466 | cluster_segments.append(CLUSTER_TEMPLATE[category].format(**cluster_health)) 467 | cluster_health_formatted = " ".join(cluster_segments) 468 | print self.colorize(self.cluster_headings, ESColors.GRAY) 469 | print self.colorize(cluster_health_formatted, self.STATUS_COLOR[status]) 470 | 471 | # Nodes can join and leave cluster with each iteration -- in order to report on nodes 472 | # that have left the cluster, maintain a list grouped by role. 473 | current_nodes_count = len(self.nodes_list) 474 | if current_nodes_count == 0: 475 | # First run, so we need to build the list of nodes by role 476 | for node_id in nodes_stats['nodes']: 477 | self.nodes_list.append(node_id) 478 | self.node_names[node_id] = nodes_stats['nodes'][node_id]['name'] 479 | node_role = self.get_role(node_id, nodes_stats) 480 | self.nodes_by_role.setdefault(node_role, []).append(node_id) 481 | else: 482 | # Check for new nodes that have joined the cluster 483 | self.new_nodes = list(set(nodes_stats['nodes']) - set(self.nodes_list)) 484 | if len(self.new_nodes) > 0: 485 | # At least one new node id found, so add to the list 486 | for node_id in self.new_nodes: 487 | self.nodes_list.append(node_id) 488 | self.node_names[node_id] = nodes_stats['nodes'][node_id]['name'] 489 | node_role = self.get_role(node_id, nodes_stats) 490 | self.nodes_by_role.setdefault(node_role, []).append(node_id) 491 | 492 | # Print node stats 493 | print self.colorize(self.node_headings, ESColors.GRAY) 494 | for role in self.nodes_by_role: 495 | self.process_role(role, nodes_stats) 496 | print "" # space out each run for readability 497 | time.sleep(self.sleep_interval) 498 | 499 | 500 | def main(): 501 | # get command line input 502 | description = 'Elasticstat is a utility for real-time performance monitoring of an Elasticsearch cluster from the command line' 503 | parser = ESArgParser(description=description, add_help=False) 504 | 505 | parser.add_argument('-h', 506 | '--host', 507 | default='localhost', 508 | dest='hostlist', 509 | help='Host in Elasticsearch cluster (or a comma-delimited list of hosts)') 510 | parser.add_argument('--port', 511 | dest='port', 512 | default=9200, 513 | help='HTTP Port (or include as host:port in HOSTLIST)') 514 | parser.add_argument('-u', 515 | '--username', 516 | dest='username', 517 | default=None, 518 | help='Username') 519 | parser.add_argument('-p', 520 | '--password', 521 | dest='password', 522 | nargs='?', 523 | const='PROMPT', 524 | default=None, 525 | help='Password (if USERNAME is specified but not PASSWORD, will prompt for password)') 526 | parser.add_argument('--ssl', 527 | dest='use_ssl', 528 | default=False, 529 | action='store_true', 530 | help='Connect using TLS/SSL') 531 | parser.add_argument('-c', 532 | '--categories', 533 | dest='categories', 534 | default='all', 535 | metavar='CATEGORY', 536 | nargs='+', 537 | help='Statistic categories to show [all or choose from {0}]'.format(', '.join(CATEGORIES[1:]))) 538 | parser.add_argument('-t', 539 | '--threadpools', 540 | dest='threadpools', 541 | default=DEFAULT_THREAD_POOLS, 542 | metavar='THREADPOOL', 543 | nargs='+', 544 | help='Threadpools to show') 545 | parser.add_argument('-C', 546 | '--no-color', 547 | dest='no_color', 548 | action='store_true', 549 | default=False, 550 | help='Display without ANSI color output') 551 | parser.add_argument('--no-pending-tasks', 552 | dest='no_pending_tasks', 553 | default=False, 554 | help='Disable display of pending tasks in cluster health (use for Elasticsearch