├── tests ├── __init__.py ├── test_constants.py ├── test_event_processor.py └── test_utils.py ├── garbagedog ├── __init__.py ├── constants.py ├── utils.py └── event_processor.py ├── requirements.txt ├── dev_requirements.txt ├── grafana-examples ├── grafana.png └── grafana-example.json ├── .travis.yml ├── test.sh ├── setup.py ├── .gitignore ├── bin └── garbagedog ├── README.md └── LICENSE /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /garbagedog/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | datadog 2 | typing 3 | -------------------------------------------------------------------------------- /dev_requirements.txt: -------------------------------------------------------------------------------- 1 | mock 2 | mypy 3 | pycodestyle 4 | pytest 5 | pex 6 | -------------------------------------------------------------------------------- /grafana-examples/grafana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eero-inc/garbagedog/HEAD/grafana-examples/grafana.png -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.4" 4 | install: 5 | - pip install -r dev_requirements.txt 6 | - pip install -r requirements.txt 7 | script: 8 | - ./test.sh 9 | cache: pip 10 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo "Running pycodestyle" && pycodestyle ./garbagedog --ignore=E501,E701,W605 && echo "Running mypy" && mypy --ignore-missing-imports --no-warn-no-return ./garbagedog && echo "pycodestyle/mypy passed!" \ 3 | && echo "Running Python tests" && pytest -v && echo "Python tests passed!" 4 | -------------------------------------------------------------------------------- /tests/test_constants.py: -------------------------------------------------------------------------------- 1 | from garbagedog.constants import GCEventType 2 | 3 | 4 | def test_from_gc_line(): 5 | log_line = "2015-05-26T14:45:37.987-0200: 151.126: [GC (Allocation Failure) 151.126: [DefNew: " \ 6 | "629119K->69888K(629120K), 0.0584157 secs] 1619346K->1273247K(2027264K), 0.0585007 secs] " \ 7 | "[Times: user=0.06 sys=0.00, real=0.06 secs]" 8 | 9 | assert GCEventType.from_gc_line(log_line) == GCEventType.DEF_NEW 10 | 11 | def test_from_gc_line_unknown(): 12 | log_line = "2015-05-26T14:45:37.987-0200 Nothing Here" 13 | 14 | assert GCEventType.from_gc_line(log_line) == GCEventType.UNKNOWN 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='garbagedog', 5 | version='0.0.14', 6 | description='Parse JVM gc.logs and emit stats over dogstatsd', 7 | author='Will Bertelsen', 8 | author_email='willb@eero.com', 9 | url='https://github.com/eero-inc/garbagedog', 10 | license='All rights reserved', 11 | packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), 12 | scripts=[ 13 | 'bin/garbagedog' 14 | ], 15 | python_requires='>=3.4', 16 | install_requires=[ 17 | 'datadog>=0.26.0', 18 | 'typing', 19 | 'setuptools>=40.8.0' 20 | ], 21 | ) 22 | -------------------------------------------------------------------------------- /tests/test_event_processor.py: -------------------------------------------------------------------------------- 1 | from mock import call, Mock 2 | 3 | from garbagedog.event_processor import GCEventProcessor 4 | 5 | 6 | def test_process_line(): 7 | log_line = "2015-05-26T14:45:37.987-0200: 151.126: [GC (Allocation Failure) 151.126: " \ 8 | "[DefNew: 629119K->69888K(629120K), 0.0584157 secs] 1619346K->1273247K(2027264K), " \ 9 | "0.0585007 secs] [Times: user=0.06 sys=0.00, real=0.06 secs]" 10 | 11 | log_line_2 = "2012-04-04T19:08:23.054+0000: 511001.548: [Full GC 511001.549: [CMS2012-04-04T19:08:48.906+0000: " \ 12 | "511027.400: [CMS-concurrent-preclean: 51.957/52.341 secs] [Times: user=76.72 sys=0.15, " \ 13 | "real=52.34 secs] (concurrent mode failure): 18431999K->16174249K(18432000K), 106.0788490 secs] " \ 14 | "29491199K->16174249K(29491200K), [CMS Perm : 69005K->69005K(115372K)], 106.0801410 secs] " \ 15 | "[Times: user=106.01 sys=0.00, real=106.06 secs]" 16 | 17 | gc_event_processor = GCEventProcessor("localhost", "1234", None) 18 | gc_event_processor.stats.timing = Mock() 19 | 20 | gc_event_processor._process_line(log_line_2, log_line) 21 | 22 | gc_event_processor.stats.timing.assert_has_calls( 23 | [ 24 | call('garbagedog_gc_event_duration', 0.06, tags=['stw:True', 'event_type:DefNew']) 25 | ] 26 | ) 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | *.iml 104 | .idea/ 105 | *.whl 106 | *~ -------------------------------------------------------------------------------- /bin/garbagedog: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import pkg_resources 5 | import sys 6 | 7 | from garbagedog.event_processor import GCEventProcessor 8 | 9 | 10 | parser = argparse.ArgumentParser(description='Parse JVM gc.logs and emit stats over dogstatsd', 11 | usage="garbagedog --log-dir /var/log/eero/") 12 | 13 | parser.add_argument('--tags', 14 | help='Extra datadog tags, comma separated; ie "application:actorcluster, version:2017.07.27"') 15 | 16 | parser.add_argument('--dogstatsd-host', 17 | help='dogstatsd host (default: %(default)s)', default='localhost') 18 | 19 | parser.add_argument('--dogstatsd-port', type=int, 20 | help='dogstatsd port (default: %(default)s)', default=8125) 21 | 22 | parser.add_argument('--verbose', action='store_true', 23 | help='Emit noisy messages on stdout') 24 | 25 | parser.add_argument('--log-dir', 26 | help='Read from this log dir instead of stdin') 27 | 28 | parser.add_argument('--glob-pattern', 29 | help='Glob pattern to select gc.log files (default: %(default)s)', default="gc.log*") 30 | 31 | parser.add_argument('--refresh-logfiles-seconds', type=int, 32 | help='How often to recheck --log-dir if there are no logfiles found ' 33 | 'or no new loglines have been written (default: %(default)s)', default=60) 34 | 35 | parser.add_argument('--sleep-seconds', type=int, 36 | help='How long to sleep between checking the logfile for new lines (default: %(default)s)', 37 | default=1) 38 | 39 | 40 | parser.add_argument("--version", "-v", help="Print version information", action='store_true') 41 | 42 | 43 | args = parser.parse_args() 44 | 45 | if args.version: 46 | version = pkg_resources.require("garbagedog")[0].version 47 | print("garbagedog {}".format(version)) 48 | sys.exit(0) 49 | 50 | parsed_tags = [] 51 | if args.tags: 52 | parsed_tags = args.tags.replace(' ', '').split(',') 53 | 54 | gc_event_processor = GCEventProcessor(args.dogstatsd_host, args.dogstatsd_port, parsed_tags, args.verbose) 55 | try: 56 | if args.log_dir: 57 | gc_event_processor.process_log_directory(args.log_dir, 58 | glob_pattern=args.glob_pattern, 59 | refresh_logfiles_seconds=args.refresh_logfiles_seconds, 60 | sleep_seconds=args.sleep_seconds) 61 | else: 62 | gc_event_processor.process_stdin() 63 | except KeyboardInterrupt: 64 | pass 65 | -------------------------------------------------------------------------------- /garbagedog/constants.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from enum import Enum 3 | import re 4 | 5 | 6 | # These regexes are modified from https://github.com/Netflix-Skunkworks/gcviz, Copyright 2013 Netflix, under APACHE 2.0 7 | THREE_ARROWS_REGEX = re.compile("->.*->.*->", re.MULTILINE) 8 | SIZE_REGEX = re.compile(r"^([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[.][0-9]{3}[+]0000):" 9 | r" ([0-9]+[.][0-9]{3}): .* ([0-9]+)K->([0-9]+)K\(([0-9]+)K\).*" 10 | r" ([0-9]+)K->([0-9]+)K\(([0-9]+)K\)", re.MULTILINE) 11 | 12 | ABSOLUTE_TIME_REGEX = re.compile(r"^([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[.][0-9]{3}[+]0000):", re.MULTILINE) 13 | RELATIVE_TIME_REGEX = re.compile(r"^[0-9]+[.][0-9]+: ") 14 | 15 | CONFLATED_RELATIVE_REGEX = re.compile(r"(^.*[0-9]+[.][0-9]+ secs])([0-9]+[.][0-9]+: .*$)", re.MULTILINE) 16 | CONFLATED_ABSOLUTE_REGEX = re.compile(r"(^.*)([0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.*)", re.MULTILINE) 17 | 18 | TIMES_REGEX = re.compile(r".*real=([0-9][0-9]*[.][0-9][0-9]*) secs\]\s*", re.MULTILINE) 19 | 20 | TIMEFORMAT = "%Y-%m-%dT%H:%M:%S.%f%z" 21 | 22 | 23 | GCSizeInfo = namedtuple( 24 | "GCSizeInfo", "young_begin_k, young_end_k, young_total_k, whole_heap_begin_k, whole_heap_end_k, whole_heap_total_k") 25 | 26 | 27 | class GCEventType(Enum): 28 | UNKNOWN = ("Unknown") 29 | FULL_GC = ("FullGC", "Full GC", True) 30 | CONCURRENT_MODE_FAILURE = ("concurrent_mode_failure", "(concurrent mode failure", True) 31 | PROMOTION_FAILED = ("promotion_failed", "(promotion failed)", True) 32 | PAR_NEW = ("ParNew", "ParNew", True) 33 | CMS_INITIAL_MARK = ("CMS_initial_mark", "CMS-initial-mark", True) 34 | CMS_CONCURRENT_MARK = ("CMS_concurrent_mark", "CMS-concurrent-mark", False) 35 | CMS_CONCURRENT_ABORTABLE_PRECLEAN = ("CMS_concurrent_abortable_preclean", "CMS-concurrent-abortable-preclean", False) 36 | CMS_CONCURRENT_PRECLEAN = ("CMS_concurrent_preclean", "CMS-concurrent-preclean", False) 37 | CMS_REMARK = ("CMS_remark", "CMS-remark", True) 38 | CMS_CONCURRENT_SWEEP = ("CMS_concurrent_sweep", "CMS-concurrent-sweep", False) 39 | CMS_CONCURRENT_RESET = ("CMS_concurrent_reset", "CMS-concurrent-reset", False) 40 | PS_YOUNG_GEN = ("PSYoungGen", "PSYoungGen", True) 41 | DEF_NEW = ("DefNew", "DefNew", True) 42 | 43 | def __init__(self, stats_name: str, gc_text: str = None, is_stop_the_world: bool = False) -> None: 44 | self.stats_name = stats_name 45 | self.gc_text = gc_text 46 | self.is_stop_the_world = is_stop_the_world 47 | 48 | @classmethod 49 | def from_gc_line(cls, line: str) -> "GCEventType": 50 | """ 51 | Given a GC log line, return the appropriate event type classification 52 | 53 | :param line: Log line 54 | :return: GC event type 55 | """ 56 | for gc_type in cls: 57 | if gc_type.gc_text and gc_type.gc_text in line: 58 | return gc_type 59 | return cls.UNKNOWN 60 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import pytest 4 | import time 5 | 6 | from garbagedog.constants import GCEventType, GCSizeInfo 7 | 8 | from garbagedog.utils import GCLogHandler 9 | from garbagedog.utils import parse_line_for_sizes, parse_line_for_times 10 | 11 | 12 | def test_parse_line_for_times(): 13 | log_line = "2015-05-26T14:45:37.987-0200: 151.126: [GC (Allocation Failure) 151.126: " \ 14 | "[DefNew: 629119K->69888K(629120K), 0.0584157 secs] 1619346K->1273247K(2027264K), " \ 15 | "0.0585007 secs] [Times: user=0.06 sys=0.00, real=0.06 secs]" 16 | 17 | event_type, duration = parse_line_for_times(log_line) 18 | assert event_type == GCEventType.DEF_NEW 19 | assert duration == 0.06 20 | 21 | def test_parse_line_for_times_no_match(): 22 | log_line = "2015-05-26T14:45:37.987-0200: 151.126: Nothing Happened" 23 | 24 | assert parse_line_for_times(log_line) is None 25 | 26 | def test_parse_line_for_sizes(): 27 | log_line = "2012-04-04T19:08:23.054+0000: 511001.548: [Full GC 511001.549: [CMS2012-04-04T19:08:48.906+0000: " \ 28 | "511027.400: [CMS-concurrent-preclean: 51.957/52.341 secs] [Times: user=76.72 sys=0.15, " \ 29 | "real=52.34 secs] (concurrent mode failure): 18431999K->16174249K(18432000K), 106.0788490 secs] " \ 30 | "29491199K->16174249K(29491200K), [CMS Perm : 69005K->69005K(115372K)], 106.0801410 secs] " \ 31 | "[Times: user=106.01 sys=0.00, real=106.06 secs]" 32 | 33 | timestamp, size_info = parse_line_for_sizes(log_line) 34 | assert timestamp == datetime.datetime(2012, 4, 4, 19, 8, 23, 54000, tzinfo=datetime.timezone.utc) 35 | assert size_info == GCSizeInfo(young_begin_k=29491199, 36 | young_end_k=16174249, 37 | young_total_k=29491200, 38 | whole_heap_begin_k=69005, 39 | whole_heap_end_k=69005, 40 | whole_heap_total_k=115372) 41 | 42 | def test_parse_line_for_sizes_no_match(): 43 | log_line = "2015-05-26T14:45:37.987-0200: 151.126: Nothing Happened" 44 | 45 | assert parse_line_for_sizes(log_line) is None 46 | 47 | def test_gc_log_handler(tmpdir): 48 | 49 | gc_log = tmpdir.mkdir("logs").join("gc.log.1") 50 | gc_log.write("") 51 | 52 | with GCLogHandler(os.path.join(str(tmpdir), "logs/")) as gc_log_handler: 53 | log_line_generator = gc_log_handler.get_log_lines() 54 | gc_log.write("hello world") 55 | line = next(log_line_generator) 56 | assert line == "hello world" 57 | 58 | gc_log.write("foo", mode="a") 59 | line = next(log_line_generator) 60 | assert line == "foo" 61 | 62 | def test_gc_log_handler_newest_log(tmpdir): 63 | 64 | gc_log = tmpdir.mkdir("logs").join("gc.log.1") 65 | gc_log.write("") 66 | time.sleep(1) 67 | 68 | gc_log_2 = tmpdir.join("logs").join("gc.log.2") 69 | gc_log_2.write("") 70 | 71 | with GCLogHandler(os.path.join(str(tmpdir), "logs/")) as gc_log_handler: 72 | gc_log.write("foo") 73 | gc_log_2.write("bar") 74 | line = next(gc_log_handler.get_log_lines()) 75 | assert line == "bar" 76 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # garbagedog 🗑🐶 3 | Tail a JVM gc.log and emit stats over the dogstatsd [protocol](https://docs.datadoghq.com/developers/dogstatsd/). 4 | 5 | `garbagedog` monitors JVM gc logs and emits stats via dogstatsd. 6 | Normally, these will be recieved by a local agent such as [telegraf](https://github.com/influxdata/telegraf) or [datadog-agent](https://docs.datadoghq.com/agent/). 7 | 8 | You can use these stats to monitor continuously monitor your GC performance (though active tuning is probably better left to more comprehensive tools). 9 | 10 | Does not support the G1 garbage collector. 11 | 12 | The log parsing logic based on https://github.com/Netflix-Skunkworks/gcviz 13 | 14 | [![Build Status](https://travis-ci.com/eero-inc/garbagedog.svg?branch=master)](https://travis-ci.com/eero-inc/garbagedog) [![Total Alerts](https://img.shields.io/lgtm/alerts/g/eero-inc/garbagedog.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/eero-inc/garbagedog/alerts/) 15 | ## Installation 16 | 17 | `pip3 install git+ssh://git@github.com/eero-inc/garbagedog.git#egg=garbagedog` (requires python 3.4) 18 | 19 | or 20 | 21 | `wget https://github.com/eero-inc/garbagedog/releases/download/0.0.12/garbagedog-linux-gnu-0.0.12.pex` 22 | 23 | ## JVM Setup 24 | Run your JVM app with the following flags: 25 | ``` 26 | -Xloggc:/var/log/eero/gc.log 27 | -XX:+UseGCLogFileRotation 28 | -XX:GCLogFileSize=64M 29 | -XX:NumberOfGCLogFiles=2 30 | -XX:+PrintGCDetails 31 | -XX:+PrintGCDateStamps 32 | -XX:+PrintGCApplicationConcurrentTime 33 | -XX:+PrintGCApplicationStoppedTime 34 | -XX:+PrintTenuringDistribution 35 | -XX:+PrintPromotionFailure 36 | -XX:+PrintHeapAtGC 37 | -XX:+PrintGCCause 38 | ``` 39 | 40 | ## Usage 41 | ``` 42 | usage: garbagedog --log-dir /var/log/eero/ 43 | 44 | Parse JVM gc.logs and emit stats over dogstatsd 45 | 46 | optional arguments: 47 | -h, --help show this help message and exit 48 | --tags TAGS Extra datadog tags, comma separated; ie 49 | "application:actorcluster, version:2017.07.27" 50 | --dogstatsd-host DOGSTATSD_HOST 51 | dogstatsd host (default: localhost) 52 | --dogstatsd-port DOGSTATSD_PORT 53 | dogstatsd port (default: 8125) 54 | --verbose Emit noisy messages on stdout 55 | --log-dir LOG_DIR Read from this log dir instead of stdin 56 | --glob-pattern GLOB_PATTERN 57 | Glob pattern to select gc.log files (default: gc.log*) 58 | --refresh-logfiles-seconds REFRESH_LOGFILES_SECONDS 59 | How often to recheck --log-dir if there are no 60 | logfiles found or no new loglines have been written 61 | (default: 60) 62 | --sleep-seconds SLEEP_SECONDS 63 | How long to sleep between checking the logfile for new 64 | lines (default: 1) 65 | --version, -v Print version information 66 | ``` 67 | 68 | ## Stats 69 | 70 | Timing by event type: `garbagedog_gc_event_duration` 71 | 72 | Allocation rate: `garbagedog_allocation_rate_histogram` 73 | 74 | Promotion rate: `garbagedog_promotion_rate_histogram` 75 | 76 | Old gen GC frequency: `garbagedog_time_between_old_gc` 77 | 78 | Young gen GC frequency: `garbagedog_time_between_young_gc` 79 | 80 | ## Grafana Examples 81 | Example Graphs 82 | ![Grafana Graph Example](grafana-examples/grafana.png?raw=true "Grafana Graph Example") 83 | 84 | [See grafana json](grafana-examples/grafana-example.json) 85 | ## Development 86 | ### Running 87 | ``` 88 | git clone git@github.com:eero-inc/garbagedog.git 89 | cd garbagedog 90 | python3 -m venv ENV 91 | source ENV/bin/activate 92 | pip install -e . 93 | ``` 94 | 95 | ### Testing 96 | 97 | ``` 98 | git clone git@github.com:eero-inc/garbagedog.git 99 | cd garbagedog 100 | python3 -m venv ENV 101 | source ENV/bin/activate 102 | pip install -r dev_requirements.txt 103 | 104 | ./test.sh 105 | ``` 106 | 107 | 108 | ### Building a standalone executable pex 109 | On your targeted environment check out the source and build 110 | ``` 111 | git clone git@github.com:eero-inc/garbagedog.git 112 | cd garbagedog 113 | python3 -m venv ENV 114 | source ENV/bin/activate 115 | pip install pex 116 | ./build.sh 117 | ``` 118 | 119 | ## About 120 | Made for [eero](https://eero.com/) Hack Week 2017 - ps we're hiring! https://eero.com/jobs 121 | 122 | Not related to the retired RedHat project also called garbagedog 123 | -------------------------------------------------------------------------------- /garbagedog/utils.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import time 4 | from datetime import datetime 5 | 6 | from typing import Tuple, Optional, Generator 7 | from typing.io import TextIO 8 | 9 | from .constants import GCEventType, GCSizeInfo 10 | from .constants import SIZE_REGEX, TIMES_REGEX, TIMEFORMAT, THREE_ARROWS_REGEX 11 | 12 | 13 | class GCLogHandler(object): 14 | 15 | def __init__(self, 16 | log_directory: str, 17 | glob_pattern: str = "gc.log*", 18 | refresh_logfiles_seconds: int = 60, 19 | sleep_seconds: int = 1, 20 | verbose: bool = False) -> None: 21 | """ 22 | Given a `log_directory`, provide an object for returning new GC logs in that directory. This object can 23 | be used as a contextmanager for convenience. For example: 24 | 25 | with GCLogHandler('/var/log/gc/') as gc_log_handler: 26 | for line in gc_log_handler: 27 | print(line) 28 | 29 | This log handler object will also handle opening rotated log files when they are created. 30 | 31 | :param log_directory: Directory to find GC logs 32 | :param glob_pattern: Pattern to match for garbage collection logs 33 | :param refresh_logfiles_seconds: How often (in seconds) to check for newer rotated log files 34 | :param sleep_seconds: How often (in seconds) to poll for new log lines 35 | :param verbose: If True, print extra info when log files are opened 36 | """ 37 | self.log_directory = log_directory 38 | self.glob_pattern = glob_pattern 39 | self.refresh_logfiles_seconds = refresh_logfiles_seconds 40 | self.sleep_seconds = sleep_seconds 41 | self.verbose = verbose 42 | 43 | self.log_file = None # type: TextIO 44 | self.last_new_line_seen = datetime.utcfromtimestamp(0) # type: datetime 45 | self.previous_record = "" # type: str 46 | 47 | def __enter__(self): 48 | self._load_newest_file() 49 | return self 50 | 51 | def __exit__(self, exception_type, exception_value, traceback): 52 | if self.log_file: 53 | self.log_file.close() 54 | 55 | def __iter__(self): 56 | return self.get_log_lines() 57 | 58 | def get_log_lines(self) -> Generator: 59 | """ 60 | Generator that returns the next log line. If there are no new log lines, this will sleep for `sleep_time` seconds. 61 | If the log file is not updated after `refresh_logfiles_seconds` and a new log file exists, the new log file will 62 | be used. 63 | 64 | :return: The next log line in GC logs 65 | """ 66 | while True: 67 | current_time = datetime.now() 68 | 69 | # gc.logs rotate, so if we dont see output for a while, we should make sure were reading the newest file 70 | if (current_time - self.last_new_line_seen).total_seconds() > self.refresh_logfiles_seconds: 71 | self._load_newest_file() 72 | 73 | if not self.log_file: 74 | printv("No logfiles found in {}, sleeping for {} seconds" 75 | .format(self.log_directory, self.refresh_logfiles_seconds), self.verbose) 76 | time.sleep(self.refresh_logfiles_seconds) 77 | continue 78 | 79 | line = self.log_file.readline() 80 | if line: 81 | self.last_new_line_seen = current_time 82 | yield line 83 | else: 84 | time.sleep(self.sleep_seconds) 85 | 86 | def _load_newest_file(self) -> None: 87 | printv("", self.verbose) 88 | printv("Last line seen {} seconds ago!" 89 | .format((datetime.now() - self.last_new_line_seen).total_seconds()), self.verbose) 90 | 91 | gc_logs = glob.glob(os.path.join(self.log_directory, self.glob_pattern)) 92 | 93 | if gc_logs: 94 | newest_log_name = max(gc_logs, key=os.path.getctime) 95 | if self.log_file: 96 | self.log_file.close() 97 | printv("Now reading from: {}!".format(newest_log_name), self.verbose) 98 | 99 | self.log_file = open(newest_log_name) 100 | self.log_file.seek(0, 2) 101 | self.last_new_line_seen = datetime.now() 102 | else: 103 | self.log_file = None 104 | 105 | 106 | def parse_line_for_times(line: str) -> Optional[Tuple[GCEventType, float]]: 107 | """ 108 | Given a log line, return an event type and duration if it exists 109 | 110 | :param line: Log line 111 | :return: Tuple of (event type, duration) 112 | """ 113 | time_match = TIMES_REGEX.match(line) 114 | if time_match: 115 | gctime_in_seconds = time_match.group(1) 116 | event_type = GCEventType.from_gc_line(line) 117 | return event_type, float(gctime_in_seconds) 118 | 119 | 120 | def parse_line_for_sizes(line: str) -> Optional[Tuple[datetime, GCSizeInfo]]: 121 | """ 122 | Given a log line, return a timestamp and size info object if it exists 123 | 124 | :param line: Log line 125 | :return: Tuple of (timestamp, size info object) 126 | """ 127 | arrows_match = THREE_ARROWS_REGEX.match(line) 128 | size_match = SIZE_REGEX.match(line) 129 | if size_match and not arrows_match: 130 | date_str, _, young_begin_k, young_end_k, young_total_k, whole_heap_begin_k, whole_heap_end_k, whole_heap_total_k = size_match.groups() 131 | timestamp = datetime.strptime(date_str, TIMEFORMAT) 132 | size_info = GCSizeInfo( 133 | young_begin_k=int(young_begin_k), 134 | young_end_k=int(young_end_k), 135 | young_total_k=int(young_total_k), 136 | whole_heap_begin_k=int(whole_heap_begin_k), 137 | whole_heap_end_k=int(whole_heap_end_k), 138 | whole_heap_total_k=int(whole_heap_total_k)) 139 | return timestamp, size_info 140 | 141 | 142 | def printv(line: str, verbose: bool) -> None: 143 | if verbose: 144 | print(line) 145 | -------------------------------------------------------------------------------- /garbagedog/event_processor.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from datetime import datetime 3 | 4 | from datadog.dogstatsd.base import DogStatsd 5 | from typing import Tuple, Optional, List 6 | 7 | from .constants import ABSOLUTE_TIME_REGEX, RELATIVE_TIME_REGEX, CONFLATED_RELATIVE_REGEX, CONFLATED_ABSOLUTE_REGEX, \ 8 | TIMEFORMAT 9 | from .constants import GCEventType, GCSizeInfo 10 | from .utils import GCLogHandler 11 | from .utils import parse_line_for_times, parse_line_for_sizes 12 | 13 | 14 | class GCEventProcessor(object): 15 | 16 | def __init__(self, 17 | dogstatsd_host: str, 18 | dogstatsd_port: str, 19 | extra_tags: Optional[List[str]], 20 | verbose: bool = False) -> None: 21 | """ 22 | Given a dogstatsd connection, provide an object for processing JVM garbage collector logs and emitting 23 | relevant events over dogstatsd. GC logs can be input via a log directory or STDIN. 24 | 25 | :param dogstatsd_host: dogstatsd connection host 26 | :param dogstatsd_port: dogstatsd connection port 27 | :param extra_tags: dogstatsd constant tags 28 | :param verbose: If True, print extra info when processing logs 29 | """ 30 | self.stats = DogStatsd(host=dogstatsd_host, port=dogstatsd_port, constant_tags=extra_tags) 31 | self.verbose = verbose 32 | 33 | self.last_time_and_size_info = None # type: Optional[Tuple[datetime, GCSizeInfo]] 34 | self.last_minor_time = None # type: Optional[datetime] 35 | self.last_major_time = None # type: Optional[datetime] 36 | 37 | def process_log_directory(self, 38 | log_directory: str, 39 | glob_pattern: str = "gc.log*", 40 | refresh_logfiles_seconds: int = 60, 41 | sleep_seconds: int = 1) -> None: 42 | """ 43 | Given a directory of GC logs, generate datadog stats from log lines as they are added to the newest gc* log file 44 | 45 | :param log_directory: Directory of find GC logs 46 | :param glob_pattern: Pattern to match for garbage collection logs 47 | :param refresh_logfiles_seconds: How often (in seconds) to check for newer rotated log files 48 | :param sleep_seconds: How often (in seconds) to poll for new log lines 49 | """ 50 | with GCLogHandler(log_directory, 51 | glob_pattern=glob_pattern, 52 | refresh_logfiles_seconds=refresh_logfiles_seconds, 53 | sleep_seconds=sleep_seconds, 54 | verbose=self.verbose) as log_handler: 55 | previous_record = "" 56 | for line in log_handler: 57 | previous_record = self._process_line(line, previous_record) 58 | 59 | def process_stdin(self) -> None: 60 | """ 61 | Generate datadog stats from log lines from STDIN 62 | """ 63 | previous_record = "" 64 | while True: 65 | inline = sys.stdin.readline() 66 | if not inline: 67 | break 68 | previous_record = self._process_line(inline, previous_record) 69 | 70 | def _process_for_frequency_stats(self, stripped_line: str) -> None: 71 | line_time_match = ABSOLUTE_TIME_REGEX.match(stripped_line) 72 | if line_time_match: 73 | line_time = datetime.strptime(line_time_match.group(1), TIMEFORMAT) 74 | if GCEventType.CMS_INITIAL_MARK.gc_text in stripped_line or GCEventType.FULL_GC.gc_text in stripped_line: 75 | if self.last_major_time: 76 | elapsed = (line_time - self.last_major_time).total_seconds() 77 | self.stats.histogram("garbagedog_time_between_old_gc", elapsed) 78 | self.last_major_time = line_time 79 | elif GCEventType.PAR_NEW.gc_text in stripped_line or GCEventType.PS_YOUNG_GEN.gc_text in stripped_line: 80 | if self.last_minor_time: 81 | elapsed = (line_time - self.last_minor_time).total_seconds() 82 | self.stats.histogram("garbagedog_time_between_young_gc", elapsed) 83 | self.last_minor_time = line_time 84 | 85 | def _process_eventline(self, stripped_line: str) -> None: 86 | if stripped_line: 87 | if self.verbose: 88 | print('.', end='', flush=True) 89 | 90 | self._process_for_frequency_stats(stripped_line) 91 | 92 | time_info = parse_line_for_times(stripped_line) 93 | if time_info: 94 | event_type, duration = time_info 95 | if event_type == GCEventType.PROMOTION_FAILED: 96 | print(event_type) 97 | print(event_type.is_stop_the_world) 98 | print(event_type.stats_name) 99 | tags = ["stw:{}".format(event_type.is_stop_the_world), "event_type:{}".format(event_type.stats_name)] 100 | self.stats.timing("garbagedog_gc_event_duration", duration, tags=tags) 101 | 102 | time_and_size_info = parse_line_for_sizes(stripped_line) 103 | if time_and_size_info: 104 | timestamp, size_info = time_and_size_info 105 | if self.last_time_and_size_info: 106 | event_time = timestamp 107 | last_event_time, last_size_info = self.last_time_and_size_info 108 | elapsed = (event_time - last_event_time).total_seconds() 109 | 110 | # Allocation rate 111 | bytes_added = abs(size_info.young_begin_k - last_size_info.young_end_k) 112 | self.stats.histogram("garbagedog_allocation_rate_histogram", bytes_added / elapsed) 113 | 114 | # Promotion rate 115 | young_decreased = abs(size_info.young_begin_k - size_info.young_end_k) 116 | total_decreased = abs(size_info.whole_heap_begin_k - size_info.whole_heap_end_k) 117 | if total_decreased < young_decreased: 118 | promoted = abs(total_decreased - young_decreased) 119 | self.stats.histogram("garbagedog_promotion_rate_histogram", promoted / elapsed) 120 | 121 | self.last_time_and_size_info = (timestamp, size_info) 122 | 123 | def _process_line(self, inline: str, previous_record: str) -> str: 124 | stripped_line = inline.rstrip() 125 | 126 | conflated_relative = CONFLATED_RELATIVE_REGEX.match(stripped_line) 127 | conflated_absolute = CONFLATED_ABSOLUTE_REGEX.match(stripped_line) 128 | 129 | if ABSOLUTE_TIME_REGEX.match(stripped_line) or RELATIVE_TIME_REGEX.match(stripped_line): 130 | self._process_eventline(previous_record) 131 | previous_record = stripped_line 132 | elif conflated_relative: 133 | previous_record = previous_record + conflated_relative.group(1) 134 | self._process_eventline(previous_record) 135 | previous_record = conflated_relative.group(2) 136 | elif conflated_absolute: 137 | previous_record = previous_record + conflated_absolute.group(1) 138 | self._process_eventline(previous_record) 139 | previous_record = conflated_absolute.group(2) 140 | else: 141 | previous_record = previous_record + " " + stripped_line 142 | 143 | return previous_record 144 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2017 Eero, Inc. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /grafana-examples/grafana-example.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_INFLUXDB_(TELEGRAF)", 5 | "label": "InfluxDB (telegraf)", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "influxdb", 9 | "pluginName": "InfluxDB" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "4.1.1" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "influxdb", 28 | "name": "InfluxDB", 29 | "version": "1.0.0" 30 | } 31 | ], 32 | "annotations": { 33 | "list": [] 34 | }, 35 | "editable": true, 36 | "gnetId": null, 37 | "graphTooltip": 0, 38 | "hideControls": false, 39 | "id": null, 40 | "links": [], 41 | "refresh": false, 42 | "rows": [ 43 | { 44 | "collapse": false, 45 | "height": 439, 46 | "panels": [ 47 | { 48 | "aliasColors": {}, 49 | "bars": true, 50 | "datasource": "${DS_INFLUXDB_(TELEGRAF)}", 51 | "fill": 1, 52 | "id": 1, 53 | "interval": "1m", 54 | "legend": { 55 | "avg": false, 56 | "current": false, 57 | "max": false, 58 | "min": false, 59 | "show": true, 60 | "total": false, 61 | "values": false 62 | }, 63 | "lines": false, 64 | "linewidth": 1, 65 | "links": [], 66 | "nullPointMode": "null", 67 | "percentage": false, 68 | "pointradius": 5, 69 | "points": false, 70 | "renderer": "flot", 71 | "seriesOverrides": [], 72 | "span": 6, 73 | "stack": false, 74 | "steppedLine": false, 75 | "targets": [ 76 | { 77 | "alias": "Max $tag_event_type", 78 | "dsType": "influxdb", 79 | "groupBy": [ 80 | { 81 | "params": [ 82 | "$interval" 83 | ], 84 | "type": "time" 85 | }, 86 | { 87 | "params": [ 88 | "event_type" 89 | ], 90 | "type": "tag" 91 | }, 92 | { 93 | "params": [ 94 | "instance_layer" 95 | ], 96 | "type": "tag" 97 | }, 98 | { 99 | "params": [ 100 | "null" 101 | ], 102 | "type": "fill" 103 | } 104 | ], 105 | "measurement": "garbagedog_gc_event_duration", 106 | "policy": "default", 107 | "refId": "A", 108 | "resultFormat": "time_series", 109 | "select": [ 110 | [ 111 | { 112 | "params": [ 113 | "upper" 114 | ], 115 | "type": "field" 116 | }, 117 | { 118 | "params": [], 119 | "type": "max" 120 | } 121 | ] 122 | ], 123 | "tags": [ 124 | { 125 | "key": "stw", 126 | "operator": "=", 127 | "value": "True" 128 | }, 129 | { 130 | "condition": "AND", 131 | "key": "instance_layer", 132 | "operator": "=~", 133 | "value": "/^$layer$/" 134 | }, 135 | { 136 | "condition": "AND", 137 | "key": "host", 138 | "operator": "=~", 139 | "value": "/^$host$/" 140 | } 141 | ] 142 | } 143 | ], 144 | "thresholds": [], 145 | "timeFrom": null, 146 | "timeShift": null, 147 | "title": "Worst STW GC Event Durations", 148 | "tooltip": { 149 | "shared": true, 150 | "sort": 0, 151 | "value_type": "individual" 152 | }, 153 | "type": "graph", 154 | "xaxis": { 155 | "mode": "time", 156 | "name": null, 157 | "show": true, 158 | "values": [] 159 | }, 160 | "yaxes": [ 161 | { 162 | "format": "s", 163 | "label": null, 164 | "logBase": 1, 165 | "max": "1", 166 | "min": "0", 167 | "show": true 168 | }, 169 | { 170 | "format": "short", 171 | "label": null, 172 | "logBase": 1, 173 | "max": null, 174 | "min": null, 175 | "show": true 176 | } 177 | ] 178 | }, 179 | { 180 | "aliasColors": {}, 181 | "bars": false, 182 | "datasource": "${DS_INFLUXDB_(TELEGRAF)}", 183 | "fill": 1, 184 | "id": 2, 185 | "interval": ">10s", 186 | "legend": { 187 | "avg": false, 188 | "current": false, 189 | "max": false, 190 | "min": false, 191 | "show": false, 192 | "total": false, 193 | "values": false 194 | }, 195 | "lines": true, 196 | "linewidth": 1, 197 | "links": [], 198 | "nullPointMode": "null", 199 | "percentage": false, 200 | "pointradius": 5, 201 | "points": false, 202 | "renderer": "flot", 203 | "seriesOverrides": [], 204 | "span": 6, 205 | "stack": false, 206 | "steppedLine": false, 207 | "targets": [ 208 | { 209 | "alias": "p99 $tag_event_type $tag_host", 210 | "dsType": "influxdb", 211 | "groupBy": [ 212 | { 213 | "params": [ 214 | "$interval" 215 | ], 216 | "type": "time" 217 | }, 218 | { 219 | "params": [ 220 | "event_type" 221 | ], 222 | "type": "tag" 223 | }, 224 | { 225 | "params": [ 226 | "host" 227 | ], 228 | "type": "tag" 229 | }, 230 | { 231 | "params": [ 232 | "previous" 233 | ], 234 | "type": "fill" 235 | } 236 | ], 237 | "measurement": "garbagedog_allocation_rate_histogram", 238 | "policy": "default", 239 | "refId": "A", 240 | "resultFormat": "time_series", 241 | "select": [ 242 | [ 243 | { 244 | "params": [ 245 | "99_percentile" 246 | ], 247 | "type": "field" 248 | }, 249 | { 250 | "params": [], 251 | "type": "max" 252 | } 253 | ] 254 | ], 255 | "tags": [ 256 | { 257 | "key": "instance_layer", 258 | "operator": "=~", 259 | "value": "/^$layer$/" 260 | }, 261 | { 262 | "condition": "AND", 263 | "key": "host", 264 | "operator": "=~", 265 | "value": "/^$host$/" 266 | } 267 | ] 268 | } 269 | ], 270 | "thresholds": [], 271 | "timeFrom": null, 272 | "timeShift": null, 273 | "title": "Allocation Rate", 274 | "tooltip": { 275 | "shared": true, 276 | "sort": 0, 277 | "value_type": "individual" 278 | }, 279 | "type": "graph", 280 | "xaxis": { 281 | "mode": "time", 282 | "name": null, 283 | "show": true, 284 | "values": [] 285 | }, 286 | "yaxes": [ 287 | { 288 | "format": "KBs", 289 | "label": null, 290 | "logBase": 1, 291 | "max": null, 292 | "min": "0", 293 | "show": true 294 | }, 295 | { 296 | "format": "short", 297 | "label": null, 298 | "logBase": 1, 299 | "max": null, 300 | "min": null, 301 | "show": true 302 | } 303 | ] 304 | } 305 | ], 306 | "repeat": null, 307 | "repeatIteration": null, 308 | "repeatRowId": null, 309 | "showTitle": false, 310 | "title": "Dashboard Row", 311 | "titleSize": "h6" 312 | }, 313 | { 314 | "collapse": false, 315 | "height": 338, 316 | "panels": [ 317 | { 318 | "aliasColors": {}, 319 | "bars": false, 320 | "datasource": "${DS_INFLUXDB_(TELEGRAF)}", 321 | "fill": 1, 322 | "id": 3, 323 | "legend": { 324 | "avg": false, 325 | "current": false, 326 | "max": false, 327 | "min": false, 328 | "show": true, 329 | "total": false, 330 | "values": false 331 | }, 332 | "lines": true, 333 | "linewidth": 1, 334 | "links": [], 335 | "nullPointMode": "null", 336 | "percentage": false, 337 | "pointradius": 5, 338 | "points": false, 339 | "renderer": "flot", 340 | "seriesOverrides": [], 341 | "span": 6, 342 | "stack": false, 343 | "steppedLine": false, 344 | "targets": [ 345 | { 346 | "dsType": "influxdb", 347 | "groupBy": [ 348 | { 349 | "params": [ 350 | "$interval" 351 | ], 352 | "type": "time" 353 | }, 354 | { 355 | "params": [ 356 | "previous" 357 | ], 358 | "type": "fill" 359 | } 360 | ], 361 | "measurement": "garbagedog_time_between_young_gc", 362 | "policy": "default", 363 | "refId": "A", 364 | "resultFormat": "time_series", 365 | "select": [ 366 | [ 367 | { 368 | "params": [ 369 | "1_percentile" 370 | ], 371 | "type": "field" 372 | }, 373 | { 374 | "params": [], 375 | "type": "min" 376 | } 377 | ] 378 | ], 379 | "tags": [ 380 | { 381 | "key": "host", 382 | "operator": "=~", 383 | "value": "/^$host$/" 384 | }, 385 | { 386 | "condition": "AND", 387 | "key": "instance_layer", 388 | "operator": "=~", 389 | "value": "/^$layer$/" 390 | } 391 | ] 392 | } 393 | ], 394 | "thresholds": [], 395 | "timeFrom": null, 396 | "timeShift": null, 397 | "title": "Time Between Young GC", 398 | "tooltip": { 399 | "shared": true, 400 | "sort": 0, 401 | "value_type": "individual" 402 | }, 403 | "type": "graph", 404 | "xaxis": { 405 | "mode": "time", 406 | "name": null, 407 | "show": true, 408 | "values": [] 409 | }, 410 | "yaxes": [ 411 | { 412 | "format": "s", 413 | "label": null, 414 | "logBase": 1, 415 | "max": null, 416 | "min": "0", 417 | "show": true 418 | }, 419 | { 420 | "format": "short", 421 | "label": null, 422 | "logBase": 1, 423 | "max": null, 424 | "min": null, 425 | "show": true 426 | } 427 | ] 428 | }, 429 | { 430 | "aliasColors": {}, 431 | "bars": false, 432 | "datasource": "${DS_INFLUXDB_(TELEGRAF)}", 433 | "fill": 1, 434 | "id": 4, 435 | "legend": { 436 | "avg": false, 437 | "current": false, 438 | "max": false, 439 | "min": false, 440 | "show": true, 441 | "total": false, 442 | "values": false 443 | }, 444 | "lines": true, 445 | "linewidth": 1, 446 | "links": [], 447 | "nullPointMode": "null", 448 | "percentage": false, 449 | "pointradius": 5, 450 | "points": false, 451 | "renderer": "flot", 452 | "seriesOverrides": [], 453 | "span": 6, 454 | "stack": false, 455 | "steppedLine": false, 456 | "targets": [ 457 | { 458 | "dsType": "influxdb", 459 | "groupBy": [ 460 | { 461 | "params": [ 462 | "$interval" 463 | ], 464 | "type": "time" 465 | }, 466 | { 467 | "params": [ 468 | "previous" 469 | ], 470 | "type": "fill" 471 | } 472 | ], 473 | "measurement": "garbagedog_time_between_old_gc", 474 | "policy": "default", 475 | "refId": "A", 476 | "resultFormat": "time_series", 477 | "select": [ 478 | [ 479 | { 480 | "params": [ 481 | "lower" 482 | ], 483 | "type": "field" 484 | }, 485 | { 486 | "params": [], 487 | "type": "min" 488 | } 489 | ] 490 | ], 491 | "tags": [ 492 | { 493 | "key": "host", 494 | "operator": "=~", 495 | "value": "/^$host$/" 496 | }, 497 | { 498 | "condition": "AND", 499 | "key": "instance_layer", 500 | "operator": "=~", 501 | "value": "/^$layer$/" 502 | } 503 | ] 504 | } 505 | ], 506 | "thresholds": [], 507 | "timeFrom": null, 508 | "timeShift": null, 509 | "title": "Time Between Old GC", 510 | "tooltip": { 511 | "shared": true, 512 | "sort": 0, 513 | "value_type": "individual" 514 | }, 515 | "type": "graph", 516 | "xaxis": { 517 | "mode": "time", 518 | "name": null, 519 | "show": true, 520 | "values": [] 521 | }, 522 | "yaxes": [ 523 | { 524 | "format": "s", 525 | "label": null, 526 | "logBase": 1, 527 | "max": null, 528 | "min": "0", 529 | "show": true 530 | }, 531 | { 532 | "format": "short", 533 | "label": null, 534 | "logBase": 1, 535 | "max": null, 536 | "min": null, 537 | "show": true 538 | } 539 | ] 540 | } 541 | ], 542 | "repeat": null, 543 | "repeatIteration": null, 544 | "repeatRowId": null, 545 | "showTitle": false, 546 | "title": "Dashboard Row", 547 | "titleSize": "h6" 548 | } 549 | ], 550 | "schemaVersion": 14, 551 | "style": "dark", 552 | "tags": [], 553 | "templating": { 554 | "list": [ 555 | { 556 | "allValue": null, 557 | "current": {}, 558 | "datasource": "${DS_INFLUXDB_(TELEGRAF)}", 559 | "hide": 0, 560 | "includeAll": true, 561 | "label": null, 562 | "multi": false, 563 | "name": "layer", 564 | "options": [], 565 | "query": "SHOW TAG VALUES FROM \"garbagedog_time_between_young_gc\" WITH KEY=instance_layer", 566 | "refresh": 1, 567 | "regex": "", 568 | "sort": 0, 569 | "tagValuesQuery": "", 570 | "tags": [], 571 | "tagsQuery": "", 572 | "type": "query", 573 | "useTags": false 574 | }, 575 | { 576 | "allValue": null, 577 | "current": {}, 578 | "datasource": "${DS_INFLUXDB_(TELEGRAF)}", 579 | "hide": 0, 580 | "includeAll": true, 581 | "label": null, 582 | "multi": true, 583 | "name": "host", 584 | "options": [], 585 | "query": "SHOW TAG VALUES FROM \"garbagedog_time_between_young_gc\" WITH KEY=host", 586 | "refresh": 1, 587 | "regex": "", 588 | "sort": 0, 589 | "tagValuesQuery": "", 590 | "tags": [], 591 | "tagsQuery": "", 592 | "type": "query", 593 | "useTags": false 594 | } 595 | ] 596 | }, 597 | "time": { 598 | "from": "now-15m", 599 | "to": "now" 600 | }, 601 | "timepicker": { 602 | "refresh_intervals": [ 603 | "5s", 604 | "10s", 605 | "30s", 606 | "1m", 607 | "5m", 608 | "15m", 609 | "30m", 610 | "1h", 611 | "2h", 612 | "1d" 613 | ], 614 | "time_options": [ 615 | "5m", 616 | "15m", 617 | "1h", 618 | "6h", 619 | "12h", 620 | "24h", 621 | "2d", 622 | "7d", 623 | "30d" 624 | ] 625 | }, 626 | "timezone": "browser", 627 | "title": "Garbagedog Dashboard", 628 | "version": 25 629 | } --------------------------------------------------------------------------------