├── docker-compose.yml ├── Dockerfile.tmpl ├── LICENSE ├── README.md └── host-stats-logger.py /docker-compose.yml: -------------------------------------------------------------------------------- 1 | docker-host-stats: 2 | image: pitrho/docker-host-stats 3 | tty: true 4 | command: -f 30 -cmdn 5 | links: 6 | - "cadvisor:cadvisor" 7 | net: "container:cadvisor" 8 | volumes: 9 | - /proc:${PROCPATH}:ro 10 | 11 | cadvisor: 12 | image: google/cadvisor:v0.23.2 13 | labels: 14 | io.rancher.scheduler.global: "true" 15 | io.rancher.scheduler.affinity:host_label_ne: ${HOST_STATS_EXCLUDE_LABEL} 16 | io.rancher.sidekicks: docker-host-stats 17 | volumes: 18 | - /:/rootfs:ro 19 | - /var/run:/var/run:rw 20 | - /sys:/sys:ro 21 | - /var/lib/docker/:/var/lib/docker:ro 22 | ports: 23 | - "9090:8080" 24 | -------------------------------------------------------------------------------- /Dockerfile.tmpl: -------------------------------------------------------------------------------- 1 | # Report (log to stdout) host statistics 2 | # 3 | 4 | # Use phusion/baseimage as base image. 5 | FROM phusion/baseimage:0.9.17 6 | MAINTAINER pitrho 7 | 8 | # Set up the environment 9 | # 10 | ENV DEBIAN_FRONTEND noninteractive 11 | 12 | # Install build deps 13 | # 14 | RUN apt-get update && apt-get -y -q install \ 15 | python \ 16 | python-dev \ 17 | python-distribute \ 18 | python-pip \ 19 | && apt-get clean && rm -rf /var/lib/apt/lists/* 20 | 21 | RUN pip install pyCLI==2.0.3 psutil==4.2.0 requests==2.10.0 python-json-logger==0.1.5 22 | 23 | # Copy logger script 24 | # 25 | COPY host-stats-logger.py /host-stats-logger.py 26 | 27 | # Default to reporting every 30 seconds 28 | # 29 | ENTRYPOINT ["python", "-u", "/host-stats-logger.py"] 30 | CMD ["-f", "30", "-cmdn"] 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [year] [fullname] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Docker Host Stats Reporter 2 | Reports host usage information such as CPU, Memory, and Disk utilization. 3 | Reports to stdout so user can ship this information with other logs to an 4 | aggregator if desired. 5 | 6 | ## Usage 7 | Only requirement is to mount the `/proc` directory in Read-Only mode inside the 8 | container under `/prochost`. The default behavior will report CPU, Memory, and 9 | Disk Utilization of `/` every 5 seconds. 10 | 11 | docker run -v=/proc:/prochost:ro pitrho/docker-host-stats 12 | 13 | ## Options 14 | User can optionally pass flags to specify what system stats to report and 15 | on what frequency. 16 | 17 | * -c : CPU Utilization (as percentage) 18 | * --combinedcpu : Report CPU as an average across cores instead of per-CPU basis. 19 | * -m : Memory Utilization 20 | * -d : Disk Utilization 21 | * --diskpaths : Specific disk paths to report as comma separated list. 22 | Defaults to listing all disks from cAdvisor. 23 | If cAdvisor is not reachable, defaults to reporting '/' 24 | based on results from psutil. 25 | Note: this results in invalid results for non-root disks 26 | when this is run inside a docker container. Recommended use 27 | is to rely on cAdvisor in this case. 28 | * -n : Network Utilization 29 | * -f : Reporting frequency in seconds. Default: 60 seconds 30 | * -k : Optional key to use for printed dict. Default: 'host-stats' 31 | * --cadvisorurl : Base url for Cadvisor. Defaults to http://localhost:8080. Include port. 32 | * --cadvisorapi : API Version to use. Defaults to v1.3. 33 | * --procpath : Path to mounted /proc directory. Defaults to /proc_host 34 | * --asbytes : Report usage in bytes. Defaults to reporting in GB (excludes Network). 35 | * --pernic : Report network usage per NIC. Defaults to False. 36 | * --hostname : Specify a hostname to report. Defaults to using Rancher 37 | metadata if available or hostname from python interpreter if not. 38 | * --dotfriendly : Replace keys in the logged usage dict that are not dot-notation 39 | compatible. Currently that only means replacing `/` with `_` 40 | in the disk mount keys. Defaults to False. 41 | 42 | ## Examples 43 | 44 | Report only CPU and Memory every 10 seconds 45 | 46 | docker run -v=/proc:/prochost:ro pitrho/docker-host-stats -cm -f 10 47 | 48 | Report CPU, Memory, and Disk Utilization with "FooBar " prefix (add a space 49 | to the end of your prefix for proper formatting.) 50 | 51 | docker run -v=/proc:/prochost:ro pitrho/docker-host-stats -cmd -p "FooBar " 52 | 53 | ## Use with cAdvisor 54 | 55 | It is recommended to run this alongside cAdvisor in order to have accurate 56 | disk usage reporting when more than one mount exists. The provided 57 | `docker-compose.yml` shows a sample configuration. 58 | 59 | This will work in the absense of cAdvisor and fall back to using psutil based 60 | on the mounted /proc directory of your host. This works except for one known 61 | case: disk usage for mounted partitions other than /. 62 | 63 | Note: 64 | * We are binding to the host machine on port 9090, not 8080. This is to avoid 65 | port conflict when using Rancher. 66 | * Be aware that cAdvisor currently does not have a way to disable the web 67 | interface. Therefore, ensure you do not have port 9090 (or whatever port you 68 | decide to bind to the host) accessible publicly unless you really want to. 69 | * For example, only expose that port over your private subnet so public 70 | traffic is not able to access the interface. 71 | -------------------------------------------------------------------------------- /host-stats-logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import cli.log 3 | import logging 4 | import sys 5 | import psutil 6 | import time 7 | import requests 8 | import json 9 | from pythonjsonlogger import jsonlogger 10 | import socket 11 | 12 | 13 | # Downgrade logging level of requests library 14 | logging.getLogger('requests').setLevel(logging.ERROR) 15 | 16 | # Set up logging specifically for use in a Docker container such that we 17 | # log everything to stdout 18 | root = logging.getLogger() 19 | root.setLevel(logging.DEBUG) 20 | logHandler = logging.StreamHandler(sys.stdout) 21 | logHandler.setLevel(logging.DEBUG) 22 | formatter = jsonlogger.JsonFormatter() 23 | logHandler.setFormatter(formatter) 24 | root.addHandler(logHandler) 25 | 26 | 27 | def to_gb(num_bytes): 28 | """ Turn bytes into GB and round to 2 decimal places. 29 | """ 30 | return round(float(num_bytes) / 1000000000, 2) 31 | 32 | 33 | def cadvisor_disk_average(host_stats, device, asbytes): 34 | """ Return the total/used/free/percent used for specified device. 35 | 36 | cAdvisor returns 60 seconds worth of data in 1s intervals. Roll this 37 | up into an average for each stat. 38 | """ 39 | 40 | disk_usage = { 41 | 'total': 0, 42 | 'percent': 0, 43 | 'used': 0, 44 | 'free': 0, 45 | 'status': 'OK' 46 | } 47 | 48 | capacity_total = 0 49 | usage_total = 0 50 | available_total = 0 51 | samples = 0 52 | for stat in host_stats['stats']: 53 | for mnt in stat['filesystem']: 54 | if mnt['device'] == device: 55 | capacity_total += mnt['capacity'] 56 | usage_total += mnt['usage'] 57 | available_total += mnt['available'] 58 | 59 | samples += 1 60 | 61 | if samples > 0: 62 | disk_usage['total'] = capacity_total / samples 63 | disk_usage['used'] = usage_total / samples 64 | disk_usage['free'] = available_total / samples 65 | 66 | # Convert to GB if not asbytes 67 | if not asbytes: 68 | disk_usage['total'] = to_gb(disk_usage['total']) 69 | disk_usage['used'] = to_gb(disk_usage['used']) 70 | disk_usage['free'] = to_gb(disk_usage['free']) 71 | 72 | # Calc percent used 73 | disk_usage['percent'] = round( 74 | float(disk_usage['used']) / float(disk_usage['total']) * 100, 75 | 2 76 | ) 77 | 78 | else: 79 | disk_usage['status'] = "No samples for provided mount path ..." 80 | 81 | return disk_usage 82 | 83 | 84 | @cli.log.CommandLineApp 85 | def stats_logger(app): 86 | asbytes = app.params.asbytes # Report as bytes? Defaults to false, i.e. GB 87 | 88 | # Create API URLs 89 | # Stats is for last 1 minute's worth of machine usage stats every second 90 | cadvisor_stats = "{0}/api/{1}/containers"\ 91 | .format(app.params.cadvisorurl, app.params.cadvisorapi) 92 | 93 | # Machine contains high level host statistics (static) 94 | cadvisor_machine = "{0}/api/{1}/machine"\ 95 | .format(app.params.cadvisorurl, app.params.cadvisorapi) 96 | 97 | rancher_host_meta = 'http://rancher-metadata/2015-12-19/self/host/{0}' 98 | 99 | cadvisor_active = True 100 | machine_stats = None 101 | host_stats = None 102 | 103 | # Run this once when container starts to establish whether cAdvisor 104 | # is available at start time (primarily just for more informative logs). 105 | # This is re-run every cycle. 106 | try: 107 | r = requests.get(cadvisor_machine) 108 | machine_stats = json.loads(r.content) 109 | except: 110 | cadvisor_active = False 111 | 112 | # Get meta details about current host. This is only available when running 113 | # using Rancher. In absence of Rancher, default to hostname from python 114 | # interpreter. 115 | if app.params.hostname == 'auto': 116 | try: 117 | host = rancher_host_meta.format("hostname") 118 | r = requests.get(host) 119 | host_name = r.content 120 | except: 121 | host_name = socket.gethostname() 122 | else: 123 | host_name = app.params.hostname 124 | 125 | logging.info("**********************************") 126 | logging.info("*** Host Stats Reporter Config ***") 127 | logging.info("**********************************") 128 | logging.info("Version: 0.6.0") 129 | logging.info("Reporting Interval: {0}s".format(app.params.frequency)) 130 | logging.info("Report CPU: {0}".format(app.params.cpu)) 131 | logging.info("Report Memory: {0}".format(app.params.memory)) 132 | logging.info("Report Disk: {0}".format(app.params.disk)) 133 | logging.info("Reporting disk path: {0}".format(app.params.diskpaths)) 134 | logging.info("Report Network: {0}".format(app.params.network)) 135 | logging.info("Report per NIC: {0}".format(app.params.pernic)) 136 | logging.info("Log Key: {0}".format(app.params.key)) 137 | logging.info("/proc Path: {0}".format(app.params.procpath)) 138 | logging.info("Report as GB: {0}".format(not asbytes)) 139 | logging.info("cAdvisor Base: {0}".format(app.params.cadvisorurl)) 140 | logging.info("cAdvisor API: {0}".format(app.params.cadvisorapi)) 141 | logging.info("cAdvisor Active: {0}".format(cadvisor_active)) 142 | logging.info("Host Name: {0}".format(host_name)) 143 | logging.info("Dot-Friendly: {0}".format(app.params.dotfriendly)) 144 | logging.info("**********************************") 145 | logging.info("") 146 | 147 | psutil.PROCFS_PATH = app.params.procpath # Set path to /proc from host 148 | 149 | while True: 150 | log_msg = {} # Will log as a dict string for downstream parsing. 151 | log_msg['hostname'] = host_name 152 | 153 | # Retrieve data from cAdvisor. Must re-try each cycle in case cAdvisor 154 | # goes down (or up). If down, this will attempt to get as much data 155 | # as possible using psutil. 156 | machine_stats = None 157 | host_stats = None 158 | try: 159 | r = requests.get(cadvisor_machine) 160 | machine_stats = json.loads(r.content) 161 | 162 | r = requests.get(cadvisor_stats) 163 | host_stats = json.loads(r.content) 164 | 165 | cadvisor_active = True 166 | except: 167 | cadvisor_active = False 168 | 169 | # Add CPU Utilization 170 | # 171 | if app.params.cpu: 172 | cpu_per_cpu = psutil.cpu_percent(percpu=True) 173 | cpu_combined = psutil.cpu_percent(percpu=False) 174 | log_msg['cpu'] = { 175 | 'percent_per_cpu': cpu_per_cpu, 176 | 'percent_combined': cpu_combined, 177 | 'status': 'OK' 178 | } 179 | 180 | # Add Memory Utilization 181 | # 182 | if app.params.memory: 183 | memory = psutil.virtual_memory() 184 | 185 | # We are not reporting any platform specific fields, such as 186 | # buffers / cahced / shared on Linux/BSD 187 | log_msg['memory'] = { 188 | 'total': memory.total if asbytes else to_gb(memory.total), 189 | 'available': memory.available if asbytes else to_gb(memory.available), 190 | 'percent': memory.percent, 191 | 'used': memory.used if asbytes else to_gb(memory.used), 192 | 'free': memory.free if asbytes else to_gb(memory.free), 193 | 'status': 'OK' 194 | } 195 | 196 | # Add Disk Utilization 197 | # 198 | if app.params.disk: 199 | 200 | def disk_usage_dict(mount): 201 | """ Return a dict for reported usage on a particular mount. 202 | """ 203 | disk_usage = {} 204 | 205 | # We are not reporting any platform specific fields, such as 206 | # buffers / cahced / shared on Linux/BSD 207 | try: 208 | if cadvisor_active: 209 | disk_usage =\ 210 | cadvisor_disk_average(host_stats, mount, asbytes) 211 | else: 212 | disk = psutil.disk_usage(mount) 213 | 214 | disk_usage['total'] =\ 215 | disk.total if asbytes else to_gb(disk.total) 216 | disk_usage['percent'] = disk.percent 217 | disk_usage['used'] =\ 218 | disk.used if asbytes else to_gb(disk.used) 219 | disk_usage['free'] =\ 220 | disk.free if asbytes else to_gb(disk.free) 221 | disk_usage['status'] = 'OK' 222 | 223 | except OSError: 224 | disk_usage['status'] =\ 225 | "Provided mount path does not exist ..." 226 | 227 | return disk_usage 228 | 229 | disk_paths = app.params.diskpaths 230 | mounts = [] # Mount points to report upon 231 | 232 | # Create list of paths from CLI if user specified paths. 233 | if disk_paths != "default": 234 | paths = disk_paths.split(',') 235 | for path in paths: 236 | mounts.append(path.strip()) 237 | 238 | # Use cadvisor's list of disks if available 239 | elif machine_stats is not None: 240 | paths = machine_stats['filesystems'] 241 | for path in paths: 242 | mounts.append(path['device']) 243 | 244 | # Default to reporting '/' 245 | else: 246 | mounts.append('/') 247 | 248 | log_msg['disk'] = {} 249 | for mount in mounts: 250 | mount_key = mount 251 | if app.params.dotfriendly: 252 | # Replace '/' and '-' with '_' in mount name to use in key 253 | mount_key = mount.replace('/', '_').replace('-', '_') 254 | log_msg['disk'][mount_key] = disk_usage_dict(mount) 255 | 256 | # Add Network Utilization 257 | # 258 | if app.params.network: 259 | 260 | log_msg['network'] = { 261 | 'interfaces': [], 262 | 'status': "OK" 263 | } 264 | 265 | net_usage = psutil.net_io_counters(pernic=app.params.pernic) 266 | 267 | log_msg['network']['interfaces'] = {} 268 | 269 | # If `pernic` is False, then this is returned as a named tuple 270 | # instead of a dict. We turn this into a dict and create a 271 | # key `allnic` 272 | if type(net_usage) != dict: 273 | net_usage = { 274 | 'allnic': dict(net_usage._asdict()) 275 | } 276 | log_msg['network']['interfaces'] = dict(net_usage) 277 | else: 278 | for interface, tup in net_usage.iteritems(): 279 | log_msg['network']['interfaces'][interface] =\ 280 | dict(tup._asdict()) 281 | 282 | # Create report dict using provided root key. 283 | report = { 284 | app.params.key: log_msg 285 | } 286 | 287 | logging.info("Reporting stats using cAdvisor: {0}" 288 | .format(cadvisor_active), extra=report) # Log usage ... 289 | 290 | time.sleep(app.params.frequency) # Sleep ... 291 | 292 | 293 | stats_logger.add_param( 294 | "-f", 295 | "--frequency", 296 | help="Update frequency for stats", 297 | default=60, 298 | type=int 299 | ) 300 | stats_logger.add_param( 301 | "-c", 302 | "--cpu", 303 | required=False, 304 | help="Report CPU Utilization", 305 | action="store_true", 306 | default=False 307 | ) 308 | stats_logger.add_param( 309 | "-m", 310 | "--memory", 311 | help="Report Memory", 312 | action="store_true", 313 | default=False 314 | ) 315 | stats_logger.add_param( 316 | "-d", 317 | "--disk", 318 | help="Report Disk", 319 | action="store_true", 320 | default=False 321 | ) 322 | stats_logger.add_param( 323 | "--diskpaths", 324 | help="Specific disk paths to report as comma separated list. " 325 | "Defaults to listing all disks from cAdvisor. " 326 | "If cAdvisor not reachable, defaults to '/'. " 327 | "based on results from psutil. " 328 | "Note: this results in invalid results for non-root disks depending " 329 | "on what is mounted internally to this container. Recommended use " 330 | "is to rely on cAdvisor.", 331 | default='default', 332 | type=str 333 | ) 334 | stats_logger.add_param( 335 | "-n", 336 | "--network", 337 | help="Report Network", 338 | action="store_true", 339 | default=False 340 | ) 341 | stats_logger.add_param( 342 | "--pernic", 343 | help="Report Network Utilization per NIC. Defaults to False.", 344 | action="store_true", 345 | default=False 346 | ) 347 | stats_logger.add_param( 348 | "-k", 349 | "--key", 350 | help="Optional key to use for printed dict.", 351 | default="host-stats", 352 | type=str 353 | ) 354 | stats_logger.add_param( 355 | "--procpath", 356 | help="Path to mounted /proc directory. Defaults to /proc_host", 357 | default="/proc_host", 358 | type=str 359 | ) 360 | stats_logger.add_param( 361 | "--cadvisorurl", 362 | help="Base url for Cadvisor. Defaults to http://localhost:8080. Include port.", 363 | default="http://localhost:8080", 364 | type=str 365 | ) 366 | stats_logger.add_param( 367 | "--cadvisorapi", 368 | help="API Version to use. Defaults to v1.3.", 369 | default="v1.3", 370 | type=str 371 | ) 372 | stats_logger.add_param( 373 | "--asbytes", 374 | help="Report relevant usage in bytes instead of gigabytes.", 375 | action="store_true", 376 | default=False 377 | ) 378 | stats_logger.add_param( 379 | "--hostname", 380 | help="Specify a hostname to report. Defaults to using Rancher metadata if available or hostname from python interpreter if not.", 381 | default="auto", 382 | type=str 383 | ) 384 | stats_logger.add_param( 385 | "--dotfriendly", 386 | help="Replace keys in the logged usage dict that are not dot-notation compatible. Currently that only means replacing `/` with `_` in the disk mount keys. Defaults to False.", 387 | action="store_true", 388 | default=False 389 | ) 390 | 391 | if __name__ == "__main__": 392 | stats_logger.run() 393 | --------------------------------------------------------------------------------