├── VERSION ├── lib └── luminol │ ├── VERSION │ ├── demo │ ├── requirements.txt │ └── src │ │ ├── start.py │ │ ├── rca.py │ │ └── static │ │ └── inversion.css │ ├── requirements.txt │ ├── src │ └── luminol │ │ ├── modules │ │ ├── __init__.py │ │ ├── correlation_result.py │ │ └── anomaly.py │ │ ├── algorithms │ │ ├── __init__.py │ │ ├── correlator_algorithms │ │ │ ├── all.py │ │ │ ├── __init__.py │ │ │ └── cross_correlator.py │ │ └── anomaly_detector_algorithms │ │ │ ├── all.py │ │ │ ├── default_detector.py │ │ │ ├── __init__.py │ │ │ ├── derivative_detector.py │ │ │ ├── absolute_threshold.py │ │ │ ├── diff_percent_threshold.py │ │ │ └── exp_avg_detector.py │ │ ├── exceptions.py │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── constants.py │ │ ├── tests │ │ └── run_tests.py │ │ ├── correlator.py │ │ └── anomaly_detector.py │ ├── MANIFEST.in │ └── setup.py ├── test ├── matplotlib │ ├── test4.csv │ ├── test2.csv │ └── test_matplotlib.py ├── __init__.py ├── httpdownload.html ├── test_diff.py ├── test_run_step.py ├── test_naarad_api.py ├── test_cluster_metric.py ├── test_httpdownload.py └── test_netstat_metric.py ├── src ├── naarad │ ├── graphing │ │ ├── __init__.py │ │ ├── plot_data.py │ │ ├── pygal_naarad.py │ │ └── dygraphs.py │ ├── metrics │ │ ├── __init__.py │ │ ├── procvmstat_metric.py │ │ ├── procmeminfo_metric.py │ │ ├── linkedin_android_rum_metric.py │ │ ├── proczoneinfo_metric.py │ │ ├── cluster_metric.py │ │ └── sar_metric.py │ ├── reporting │ │ └── __init__.py │ ├── run_steps │ │ ├── __init__.py │ │ ├── run_step.py │ │ └── local_cmd.py │ ├── resources │ │ ├── sorttable.js │ │ ├── default_report_footer.html │ │ ├── __init__.py │ │ ├── naarad.css │ │ ├── default_summary_content.html │ │ ├── default_report_header.html │ │ ├── default_diff_client_charting_page.html │ │ ├── default_client_charting_page.html │ │ └── default_summary_page.html │ ├── naarad_imports.py │ ├── sla.py │ ├── naarad_constants.py │ └── httpdownload.py └── .gitignore ├── MANIFEST.in ├── optional_requirements.txt ├── requirements.txt ├── examples ├── conf │ ├── config-gc-diff │ ├── config-top │ ├── config-procinterrupts │ ├── config-procvmstat │ ├── config-proczoneinfo │ ├── config-netstat │ ├── config-procmeminfo │ ├── config-gc-js │ ├── config-gc-svg │ ├── config-linkedin-android-rum │ ├── config-jmeter │ ├── config-sar-matplotlib │ ├── config-sar │ ├── config-others │ ├── config-inno │ ├── config-cluster │ ├── config-matplotlib │ ├── config-grpby │ ├── config-js │ ├── config-integration │ ├── config-gc │ └── config-gc-1 ├── screenshots │ ├── naarad_report.png │ ├── naarad-gc-screenshots.png │ ├── naarad-sar-screenshots.png │ └── naarad-jmeter-screenshots.png └── logs │ ├── lock.out │ ├── deadlock.out │ ├── netstat.tcp.out │ └── stats.out ├── setup.cfg ├── templates ├── config-gc ├── config-sar └── config-inno ├── bin ├── addDateStampToGC ├── naarad └── naarad_metric_collector.sh ├── setup.py ├── .gitignore ├── NOTICE └── README.md /VERSION: -------------------------------------------------------------------------------- 1 | 1.0.16 2 | -------------------------------------------------------------------------------- /lib/luminol/VERSION: -------------------------------------------------------------------------------- 1 | 0.2 2 | -------------------------------------------------------------------------------- /test/matplotlib/test4.csv: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/naarad/graphing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/naarad/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/naarad/reporting/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/naarad/run_steps/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/luminol/demo/requirements.txt: -------------------------------------------------------------------------------- 1 | flask -------------------------------------------------------------------------------- /lib/luminol/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'sgandhi' 2 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-include *.html *.js *.css VERSION requirements.txt -------------------------------------------------------------------------------- /lib/luminol/MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-include VERSION requirements.txt 2 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore compiled python files in source 2 | *.pyc 3 | -------------------------------------------------------------------------------- /optional_requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib>=1.1.1 2 | pyparsing>=2.0.1 3 | pygal>=1.2.0 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.6.2 2 | argparse 3 | pytz>=2012c 4 | jinja2 5 | luminol 6 | -------------------------------------------------------------------------------- /examples/conf/config-gc-diff: -------------------------------------------------------------------------------- 1 | [GC] 2 | GCPause.sla=mean>5% p50>5 p90<5 3 | appstop.sla=mean>5% p50>5 p90<5% 4 | -------------------------------------------------------------------------------- /src/naarad/resources/sorttable.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInAttic/naarad/HEAD/src/naarad/resources/sorttable.js -------------------------------------------------------------------------------- /examples/screenshots/naarad_report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInAttic/naarad/HEAD/examples/screenshots/naarad_report.png -------------------------------------------------------------------------------- /examples/conf/config-top: -------------------------------------------------------------------------------- 1 | [TOP-host1] 2 | infile=top.out 3 | PID=9609 16089 4 | COMMAND=firefox 5 | 6 | [GRAPH] 7 | graphing_library=matplotlib 8 | -------------------------------------------------------------------------------- /examples/screenshots/naarad-gc-screenshots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInAttic/naarad/HEAD/examples/screenshots/naarad-gc-screenshots.png -------------------------------------------------------------------------------- /examples/screenshots/naarad-sar-screenshots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInAttic/naarad/HEAD/examples/screenshots/naarad-sar-screenshots.png -------------------------------------------------------------------------------- /examples/logs/lock.out: -------------------------------------------------------------------------------- 1 | 2012-03-30T22:34:30 mysql_thread_id lock_type waiting lock_wait_time time lock_mode db tbl index insert_intention special 2 | 3 | 4 | -------------------------------------------------------------------------------- /examples/screenshots/naarad-jmeter-screenshots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinkedInAttic/naarad/HEAD/examples/screenshots/naarad-jmeter-screenshots.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E111,E128 3 | max-line-length = 160 4 | exclude = MANIFEST.in,LICENSE,NOTICE,README.md,build,env,*requirements.txt,setup.cfg -------------------------------------------------------------------------------- /examples/conf/config-procinterrupts: -------------------------------------------------------------------------------- 1 | [PROCINTERRUPTS] 2 | infile=proc.interrupts.out 3 | CPUS=CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 4 | 5 | [GRAPH] 6 | graphing_library=matplotlib 7 | -------------------------------------------------------------------------------- /src/naarad/resources/default_report_footer.html: -------------------------------------------------------------------------------- 1 | 5 | 6 | -------------------------------------------------------------------------------- /examples/conf/config-procvmstat: -------------------------------------------------------------------------------- 1 | [PROCVMSTAT] 2 | hostname=localhost 3 | infile=procvmstat.out 4 | sub_metrics=nr_free_pages nr_inactive_anon 5 | 6 | [GRAPH] 7 | graphing_library=matplotlib 8 | -------------------------------------------------------------------------------- /src/naarad/resources/__init__.py: -------------------------------------------------------------------------------- 1 | import pkg_resources 2 | 3 | 4 | def get_dir(): 5 | """Return the location of resources for report""" 6 | return pkg_resources.resource_filename('naarad.resources', None) 7 | -------------------------------------------------------------------------------- /examples/conf/config-proczoneinfo: -------------------------------------------------------------------------------- 1 | [PROCZONEINFO] 2 | hostname=localhost 3 | infile=proczoneinfo.out 4 | sub_metrics=pages.min nr_free_pages 5 | zones=Node.0.zone.DMA 6 | 7 | [GRAPH] 8 | graphing_library=matplotlib 9 | -------------------------------------------------------------------------------- /templates/config-gc: -------------------------------------------------------------------------------- 1 | [GC] 2 | infile=gc.log 3 | sub_metrics=appstop alloc promo used0 used1 used commit0 commit1 commit gen0 gen0t gen0usr gen0sys gen1t cmsIM cmsRM cmsRS GCPause cmsCM cmsCP cmsCS cmsCR safept apptime 4 | 5 | [GRAPH] 6 | -------------------------------------------------------------------------------- /templates/config-sar: -------------------------------------------------------------------------------- 1 | [SAR-device] 2 | access=local 3 | infile=sar.device.out 4 | 5 | [SAR-cpuusage] 6 | access=local 7 | infile=sar.cpuusage.out 8 | 9 | [SAR-memory] 10 | access=local 11 | infile=sar.memory.out 12 | 13 | [GRAPH] 14 | -------------------------------------------------------------------------------- /examples/logs/deadlock.out: -------------------------------------------------------------------------------- 1 | 2012-03-30T22:34:30 mysql_thread_id timestring user hostname victim time undo_log_entries lock_structs query_text 2 | 2012-03-30T22:34:30 mysql_thread_id waiting lock_mode db tbl index special insert_intention 3 | 4 | 5 | -------------------------------------------------------------------------------- /examples/conf/config-netstat: -------------------------------------------------------------------------------- 1 | [NETSTAT-host1] 2 | hostname=localhost 3 | infile=netstat.tcp.out 4 | connections=host1.localdomain.com<->web1.remotedomain.com:https host1:48860<->email 5 | processes=/firefox 6 | 7 | [GRAPH] 8 | graphing_library=matplotlib 9 | -------------------------------------------------------------------------------- /examples/conf/config-procmeminfo: -------------------------------------------------------------------------------- 1 | [PROCMEMINFO] 2 | hostname=localhost 3 | infile=procmeminfo.out 4 | sub_metrics=MemTotal MemFree Active Inactive 5 | ts_start=2013-12-05 12:04:58 6 | ts_end=2013-12-05 12:14:58 7 | 8 | [GRAPH] 9 | graphing_library=matplotlib 10 | -------------------------------------------------------------------------------- /examples/conf/config-gc-js: -------------------------------------------------------------------------------- 1 | [GC] 2 | infile=gc.log 3 | sub_metrics=appstop alloc promo used0 used1 used commit0 commit1 commit gen0 gen0t gen0usr gen0sys gen1t cmsIM cmsRM cmsRS GCPause cmsCM cmsCP cmsCS cmsCR safept apptime 4 | access=local 5 | 6 | [GRAPH] 7 | graphing_library=js 8 | -------------------------------------------------------------------------------- /examples/conf/config-gc-svg: -------------------------------------------------------------------------------- 1 | [GC] 2 | infile=gc.log 3 | sub_metrics=appstop alloc promo used0 used1 used commit0 commit1 commit gen0 gen0t gen0usr gen0sys gen1t cmsIM cmsRM cmsRS GCPause cmsCM cmsCP cmsCS cmsCR safept apptime 4 | access=local 5 | 6 | [GRAPH] 7 | graphing_library=svg 8 | -------------------------------------------------------------------------------- /examples/conf/config-linkedin-android-rum: -------------------------------------------------------------------------------- 1 | [LINKEDINANDROIDRUM] 2 | infile=linkedin_android_rum.log 3 | sub_metrics=launch_time nus_update_time 4 | launch_time.sla=mean<5000 p50<5000 5 | access=local 6 | 7 | [GRAPH] 8 | graphs=LINKEDINANDROIDRUM.launch_time.all LINKEDINANDROIDRUM.nus_update_time.all 9 | -------------------------------------------------------------------------------- /examples/conf/config-jmeter: -------------------------------------------------------------------------------- 1 | [JMETER] 2 | infile=perf-result.xml 3 | aggregation_granularity=minute 4 | Overall_Summary.ResponseTime.sla=mean<1000 p95<20000 5 | anomaly_detection_metrics=Overall_Summary.ResponseTime 6 | 7 | [GRAPH] 8 | graphing_library=matplotlib 9 | graphs=JMETER.Overall_Summary.ResponseTime,JMETER.Overall_Summary.ErrorsPerSecond 10 | -------------------------------------------------------------------------------- /templates/config-inno: -------------------------------------------------------------------------------- 1 | [INNOTOP-R] 2 | infile=records.out 3 | access=local 4 | 5 | [INNOTOP-M] 6 | infile=replication.out 7 | access=local 8 | 9 | [INNOTOP-I] 10 | infile=iostat.out 11 | access=local 12 | 13 | [INNOTOP-C] 14 | infile=command.out 15 | access=local 16 | 17 | [INNOTOP-B] 18 | access=local 19 | infile=buffer.out 20 | 21 | [GRAPH] 22 | -------------------------------------------------------------------------------- /src/naarad/resources/naarad.css: -------------------------------------------------------------------------------- 1 | footer { 2 | padding: 40px 0; 3 | color: #999; 4 | text-align: center; 5 | background-color: #f9f9f9; 6 | border-top: 1px solid #e5e5e5; 7 | } 8 | 9 | .chart-area, .chart-label { 10 | width: 100%; 11 | height: 50%; 12 | margin: 0; 13 | } 14 | 15 | .chart-label { 16 | padding: 10px 0; 17 | text-align: center; 18 | } 19 | 20 | .chart-csv { 21 | text-align: right; 22 | } -------------------------------------------------------------------------------- /examples/conf/config-sar-matplotlib: -------------------------------------------------------------------------------- 1 | [SAR-device] 2 | #ignore=1 3 | access=local 4 | infile=sar.device.out 5 | devices=sda sdb 6 | options=tps rd_sec/s %util 7 | 8 | [SAR-cpuusage] 9 | ignore=1 10 | access=local 11 | infile=sar.cpuusage.out 12 | #ts_start=2012-02-23 22:05:00 13 | #ts_end=2012-02-23 23:00:00 14 | 15 | [SAR-memory] 16 | access=local 17 | infile=sar.memory.out 18 | 19 | [GRAPH] 20 | graphing_library=matplotlib 21 | -------------------------------------------------------------------------------- /examples/conf/config-sar: -------------------------------------------------------------------------------- 1 | [SAR-device] 2 | #ignore=1 3 | access=local 4 | infile=sar.device.out 5 | devices=sda sdb 6 | options=tps rd_sec/s %util 7 | anomaly_detection_metrics=sda.tps 8 | 9 | [SAR-cpuusage] 10 | #ignore=1 11 | access=local 12 | infile=sar.cpuusage.out 13 | #ts_start=2012-02-23 22:05:00 14 | #ts_end=2012-02-23 23:00:00 15 | 16 | [SAR-memory] 17 | access=local 18 | infile=sar.memory.out 19 | 20 | [GRAPH] 21 | graphing_library=js 22 | -------------------------------------------------------------------------------- /examples/conf/config-others: -------------------------------------------------------------------------------- 1 | [MYSQL] 2 | #ignore=1 3 | infile=/home/ritesh/mytopStats/mytop.Queries.out 4 | access=local 5 | columns=queries 6 | ylabels=qps 7 | titles=MySql qps 8 | sep=, 9 | 10 | [MYSQL-buffer] 11 | #ignore=1 12 | infile=/home/ritesh/mytopStats/mytop.BufferPool.out 13 | access=local 14 | columns=%free %dirty 15 | titles=MYSQL_Buffer MYSQL_Buffer 16 | ylabels=percent,percent 17 | sep=, 18 | 19 | [GRAPH] 20 | outdir=/home/ritesh/naarad-out/others 21 | graphing_library=js 22 | -------------------------------------------------------------------------------- /examples/conf/config-inno: -------------------------------------------------------------------------------- 1 | [INNOTOP-R] 2 | #ignore=1 3 | infile=records.out 4 | options=num_inserts queries_in_queue num_reads 5 | access=local 6 | 7 | [INNOTOP-M] 8 | #ignore=1 9 | infile=replication.out 10 | access=local 11 | 12 | [INNOTOP-I] 13 | #ignore=1 14 | infile=iostat.out 15 | access=local 16 | 17 | [INNOTOP-C] 18 | #ignore=1 19 | infile=command.out 20 | access=local 21 | 22 | [INNOTOP-B] 23 | #ignore=1 24 | access=local 25 | infile=buffer.out 26 | 27 | [GRAPH] 28 | graphing_library=dygraphs 29 | -------------------------------------------------------------------------------- /examples/conf/config-cluster: -------------------------------------------------------------------------------- 1 | [SAR-device-host1] 2 | hostname=host1 3 | infile=sar.device.out 4 | devices=sda sdb 5 | 6 | [SAR-cpuusage-host1] 7 | hostname=host1 8 | infile=sar.cpuusage.out 9 | 10 | [SAR-device-host2] 11 | hostname=host2 12 | infile=sar.device.out 13 | devices=sda sdb 14 | 15 | [SAR-cpuusage-host2] 16 | hostname=host2 17 | infile=sar.cpuusage.out 18 | 19 | [CLUSTER-1] 20 | aggr_hosts=host1 host2 21 | aggr_metrics=SAR-device.sda.await:raw,sum,avg,count SAR-cpuusage.all.%sys:avg 22 | 23 | [GRAPH] 24 | graphing_library=matplotlib 25 | -------------------------------------------------------------------------------- /examples/conf/config-matplotlib: -------------------------------------------------------------------------------- 1 | [GC] 2 | infile=gc.log 3 | sub_metrics=appstop alloc promo used0 used1 used commit0 commit1 commit gen0 gen0t gen0usr gen0sys cmsIM cmsRM cmsRS GCPause cmsCM cmsCP cmsCS cmsCR safept apptime 4 | GCPause.sla=mean<0.05 p50<0.05 p99<0.05 5 | 6 | [SAR-device] 7 | infile=sar.device.out 8 | devices=sda sdb 9 | options=tps rd_sec/s %util 10 | 11 | [SAR-cpuusage] 12 | #ignore=1 13 | infile=sar.cpuusage.out 14 | 15 | [SAR-memory] 16 | infile=sar.memory.out 17 | 18 | [JMETER] 19 | infile=perf-result.xml 20 | 21 | [GRAPH] 22 | graphing_library=matplotlib 23 | -------------------------------------------------------------------------------- /examples/conf/config-grpby: -------------------------------------------------------------------------------- 1 | [TEST] 2 | access=local 3 | sep= 4 | # vmstat format - 5 | # 2013-12-02_11:20:59.53386 allocstall 785 6 | infile=sample.vm.grpby.out 7 | columns=type size 8 | ts_start=2013-12-02 11:20:48 9 | ts_end=2013-12-02 11:21:00 10 | important_sub_metrics=size 11 | groupby=type 12 | 13 | [TEST-Meminfo] 14 | # meminfo format - 15 | #2013-12-05_12:04:58.68382 MemTotal: 65899332 kB 16 | sep= 17 | infile=sample.vm.grpby.out 18 | columns=type size 19 | ts_start=2013-12-02 11:20:48 20 | ts_end=2013-12-02 11:21:00 21 | #important_sub_metrics=size 22 | groupby=type 23 | -------------------------------------------------------------------------------- /examples/conf/config-js: -------------------------------------------------------------------------------- 1 | [SAR-device] 2 | infile=sar.device.out 3 | devices=sda sdb 4 | options=tps rd_sec/s %util 5 | 6 | [SAR-cpuusage] 7 | #ignore=1 8 | infile=sar.cpuusage.out 9 | 10 | [SAR-memory] 11 | infile=sar.memory.out 12 | 13 | [INNOTOP-R] 14 | ignore=1 15 | infile=records.out 16 | options=num_inserts queries_in_queue num_reads 17 | 18 | [INNOTOP-M] 19 | ignore=1 20 | infile=replication.out 21 | 22 | [INNOTOP-I] 23 | ignore=1 24 | infile=iostat.out 25 | 26 | [INNOTOP-C] 27 | ignore=1 28 | infile=command.out 29 | 30 | [INNOTOP-B] 31 | ignore=1 32 | infile=buffer.out 33 | 34 | [JMETER] 35 | infile=perf-result.xml 36 | 37 | [GRAPH] 38 | graphing_library=js 39 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/correlator_algorithms/all.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | from luminol.algorithms.correlator_algorithms import * 13 | 14 | correlator_algorithms = { 15 | 'cross_correlator': cross_correlator.CrossCorrelator 16 | } 17 | -------------------------------------------------------------------------------- /lib/luminol/setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | 3 | from setuptools import setup, find_packages 4 | 5 | with open('VERSION') as f: 6 | luminol_version = f.read().strip() 7 | 8 | with open('requirements.txt') as f: 9 | required = f.read().splitlines() 10 | 11 | setup(name="luminol", 12 | description='luminol is an anomaly detection and correlation library for timeseries data.', 13 | url='https://github.com/linkedin/naarad', 14 | author='Naarad Developers', 15 | author_email='naarad-dev@googlegroups.com', 16 | version=luminol_version, 17 | packages=['luminol', 'luminol.algorithms', 'luminol.modules', 'luminol.algorithms.anomaly_detector_algorithms', 18 | 'luminol.algorithms.correlator_algorithms'], 19 | package_dir={'': 'src'}, 20 | install_requires=required, 21 | license='Apache 2.0', 22 | ) 23 | -------------------------------------------------------------------------------- /bin/addDateStampToGC: -------------------------------------------------------------------------------- 1 | #! /usr/bin/awk -f 2 | BEGIN { 3 | if(ts_start == "") ts_start = "2015-01-01T00:00:00" 4 | "date -d \""ts_start"\" '+%s'" | getline file_start; 5 | FS=":"; 6 | OFS=": "; 7 | } 8 | { 9 | # Calculate the line datestamp 10 | line_timestamp=$1 11 | line_datestamp=file_start+int(line_timestamp); 12 | # Find the millisecond part of the line timestamp 13 | fraction=line_timestamp-int(line_timestamp) 14 | milliseconds=substr(fraction,3); 15 | if(length(milliseconds) == 1) { 16 | milliseconds = milliseconds "00"; 17 | } 18 | else if(length(milliseconds) == 2) { 19 | milliseconds = milliseconds "0"; 20 | } 21 | # The full datestamp for the line, including milliseconds and timezone 22 | datestamp=strftime("%Y-%m-%dT%H:%M:%S",line_datestamp) "." sprintf("%3.3d", milliseconds + 0.0) strftime("%z",line_datestamp) 23 | # The full line with datestamp at the start 24 | print datestamp, $0; 25 | } 26 | END { 27 | } 28 | 29 | -------------------------------------------------------------------------------- /src/naarad/resources/default_summary_content.html: -------------------------------------------------------------------------------- 1 | 2 |
3 |
4 |

{{ metric.label }} Summary

5 |
6 | 7 | 8 | 9 | {%- for header in metric_stats[0] %} 10 | 11 | {%- endfor %} 12 | 13 | 14 | 15 | {% for row in range(1,metric_stats|length) %} 16 | 17 | {%- for row_data in metric_stats[row] %} 18 | 19 | {%- endfor %} 20 | 21 | {%- endfor %} 22 | 23 | 24 |
{{ header }}
{{ row_data }}
25 |
26 |
27 |
28 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | 3 | from setuptools import setup, find_packages 4 | 5 | with open('VERSION') as f: 6 | naarad_version = f.read().strip() 7 | 8 | with open('requirements.txt') as f: 9 | required = f.read().splitlines() 10 | 11 | setup(name="naarad", 12 | description='Naarad is a Performance Analysis tool', 13 | url='https://github.com/linkedin/naarad', 14 | author='Naarad Developers', 15 | author_email='naarad-dev@googlegroups.com', 16 | version=naarad_version, 17 | packages=['naarad', 'naarad.metrics', 'naarad.graphing', 'naarad.reporting', 'naarad.run_steps', 'naarad.resources'], 18 | scripts=['bin/naarad', 'bin/PrintGCStats', 'bin/naarad_metric_collector.sh', 'bin/addDateStampToGC'], 19 | package_dir={'': 'src'}, 20 | package_data={'': ['src/naarad/resources/*.html']}, 21 | include_package_data=True, 22 | install_requires=required, 23 | license='Apache 2.0', 24 | ) 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | 24 | # PyInstaller 25 | # Usually these files are written by a python script from a template 26 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 27 | *.manifest 28 | *.spec 29 | 30 | # Installer logs 31 | pip-log.txt 32 | pip-delete-this-directory.txt 33 | 34 | # Unit test / coverage reports 35 | htmlcov/ 36 | .tox/ 37 | .coverage 38 | .cache 39 | nosetests.xml 40 | coverage.xml 41 | 42 | # Translations 43 | *.mo 44 | *.pot 45 | 46 | # Django stuff: 47 | *.log 48 | 49 | # Sphinx documentation 50 | docs/_build/ 51 | 52 | # PyBuilder 53 | target/ 54 | 55 | # Mac Files 56 | .DS_Store 57 | .AppleDouble 58 | .LSOverride 59 | 60 | # Temporary Files from Editors 61 | *.swp 62 | *.swo 63 | \#*\# 64 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/anomaly_detector_algorithms/all.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | from luminol.algorithms.anomaly_detector_algorithms import * 13 | 14 | anomaly_detector_algorithms = { 15 | 'bitmap_detector': bitmap_detector.BitmapDetector, 16 | 'default_detector': default_detector.DefaultDetector, 17 | 'derivative_detector': derivative_detector.DerivativeDetector, 18 | 'exp_avg_detector': exp_avg_detector.ExpAvgDetector, 19 | 'absolute_threshold': absolute_threshold.AbsoluteThreshold, 20 | 'diff_percent_threshold': diff_percent_threshold.DiffPercentThreshold 21 | } 22 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/modules/correlation_result.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | """ 13 | Correlation object 14 | """ 15 | 16 | 17 | class CorrelationResult(object): 18 | 19 | def __init__(self, shift, coefficient, shifted_coefficient): 20 | """ 21 | Construct a CorrelationResult object. 22 | :param int shift: the amount of shift where the coefficient is obtained. 23 | :param float coefficient: the correlation coefficient. 24 | :param float shifted_coefficient: the correlation coefficient with shift taken into account. 25 | """ 26 | self.shift = shift 27 | self.coefficient = coefficient 28 | self.shifted_coefficient = shifted_coefficient 29 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/exceptions.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | 13 | """ 14 | Exception Classes 15 | """ 16 | 17 | 18 | class AlgorithmNotFound(Exception): 19 | 20 | """ 21 | Raise when algorithm can not be found. 22 | """ 23 | pass 24 | 25 | 26 | class RequiredParametersNotPassed(Exception): 27 | 28 | """ 29 | Raise when algorithm can not be properly initialized because some required parameters are not passed in init. 30 | """ 31 | pass 32 | 33 | 34 | class InvalidDataFormat(Exception): 35 | 36 | """ 37 | Raise when data has invalid format. 38 | """ 39 | pass 40 | 41 | 42 | class NotEnoughDataPoints(Exception): 43 | 44 | """ 45 | Raise when there are not enough data points. 46 | """ 47 | pass 48 | -------------------------------------------------------------------------------- /src/naarad/resources/default_report_header.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {%- if report_title %} 5 | {{ report_title }} 6 | {%- else %} 7 | naarad analysis report 8 | {%- endif %} 9 | {%- if custom_javascript_includes %} 10 | {%- for custom_javascript in custom_javascript_includes %} 11 | {%- if 'http' in custom_javascript %} 12 | 13 | {%- else %} 14 | 15 | {% endif %} 16 | {%- endfor %} 17 | {%- endif %} 18 | {%- if custom_stylesheet_includes %} 19 | {%- for custom_stylesheet in custom_stylesheet_includes %} 20 | {%- if 'http' in custom_stylesheet %} 21 | 22 | {%- else %} 23 | 24 | {%- endif %} 25 | {%- endfor %} 26 | {%- endif %} 27 | 33 | -------------------------------------------------------------------------------- /test/matplotlib/test2.csv: -------------------------------------------------------------------------------- 1 | 2012-02-23 22:33:06.697000,1.1880000 2 | 2012-02-23 22:46:44.711000,1.9470000 3 | 2012-02-23 22:48:29.134000,1.9590000 4 | 2012-02-23 22:49:31.614000,1.9850000 5 | 2012-02-23 22:50:00.583000,2.0130000 6 | 2012-02-23 22:50:40.552000,2.0650000 7 | 2012-02-23 22:51:15.093000,2.1490000 8 | 2012-02-23 22:51:47.879000,2.1750000 9 | 2012-02-23 22:52:17.464000,2.1530000 10 | 2012-02-23 22:53:03.168000,2.1030000 11 | 2012-02-23 22:53:46.611000,2.1340000 12 | 2012-02-23 22:54:16.462000,2.2560000 13 | 2012-02-23 22:54:51.640000,2.1170000 14 | 2012-02-23 22:55:23.744000,2.1230000 15 | 2012-02-23 22:55:57.479000,2.1850000 16 | 2012-02-23 22:56:30.667000,2.1850000 17 | 2012-02-23 22:57:14.028000,2.1440000 18 | 2012-02-23 22:57:50.580000,2.1040000 19 | 2012-02-23 22:58:25.150000,2.2810000 20 | 2012-02-23 22:58:55.941000,2.1370000 21 | 2012-02-23 22:59:21.211000,2.1990000 22 | 2012-02-23 22:59:49.952000,2.1760000 23 | 2012-02-23 23:00:17.426000,2.2670000 24 | 2012-02-23 23:00:43.475000,2.1500000 25 | 2012-02-23 23:01:27.593000,2.2830000 26 | 2012-02-23 23:02:00.605000,2.2120000 27 | 2012-02-23 23:02:37.087000,2.2830000 28 | 2012-02-23 23:02:58.372000,2.2410000 29 | 2012-02-23 23:03:33.802000,2.2240000 30 | 2012-02-23 23:04:31.253000,2.1320000 31 | 2012-02-23 23:05:13.116000,2.1500000 32 | 2012-02-23 23:05:58.937000,2.0690000 33 | 2012-02-23 23:06:42.485000,2.1620000 34 | 2012-02-23 23:07:21.076000,2.0700000 35 | 2012-02-23 23:08:17.432000,2.1150000 36 | -------------------------------------------------------------------------------- /examples/conf/config-integration: -------------------------------------------------------------------------------- 1 | [GC] 2 | infile=gc.log 3 | sub_metrics=alloc promo used0 used1 used commit0 commit1 commit gen0 gen0t gen0usr gen0sys cmsIM cmsRM cmsRS GCPause cmsCM cmsCP cmsCS cmsCR safept apptime 4 | GCPause.sla=mean<0.05 p50<0.05 p99<0.05 5 | 6 | [SAR-device] 7 | infile=sar.device.out 8 | devices=sda sdb 9 | options=tps rd_sec/s %util 10 | 11 | [SAR-cpuusage-host1] 12 | hostname=host1 13 | infile=sar.cpuusage.out 14 | 15 | [SAR-memory] 16 | infile=sar.memory.out 17 | 18 | [SAR-cpuusage-host2] 19 | hostname=host2 20 | infile=sar.cpuusage.out 21 | 22 | [CLUSTER-1] 23 | aggr_hosts=host1 host2 24 | aggr_metrics=SAR-cpuusage.all.%sys:raw,sum,avg,count 25 | 26 | [JMETER] 27 | infile=perf-result.xml 28 | aggregation_granularity=minute 29 | Overall_Summary.ResponseTime.sla=mean<100000 p95<20000 30 | 31 | [PROCMEMINFO] 32 | hostname=localhost 33 | infile=procmeminfo.out 34 | sub_metrics=MemTotal MemFree Active Inactive 35 | ts_start=2013-12-05 12:04:58 36 | ts_end=2013-12-05 12:14:58 37 | 38 | [PROCVMSTAT] 39 | hostname=localhost 40 | infile=procvmstat.out 41 | sub_metrics=nr_free_pages nr_inactive_anon 42 | 43 | [PROCZONEINFO] 44 | hostname=localhost 45 | infile=proczoneinfo.out 46 | sub_metrics=pages.min nr_free_pages 47 | zones=Node.0.zone.DMA 48 | 49 | [GRAPH] 50 | graphing_library=matplotlib 51 | graphs=GC.GCPause,GC.alloc,GC.promo GC.GCPause,GC.gen0t,GC.gen0sys JMETER.Overall_Summary.ResponseTime,JMETER.Overall_Summary.qps,JMETER.Overall_Summary.ErrorsPerSecond 52 | -------------------------------------------------------------------------------- /src/naarad/graphing/plot_data.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | 19 | class PlotData: 20 | """Class to hold details of the metrics to be plotted""" 21 | def __init__(self, input_csv, csv_column, series_name, y_label, precision, graph_height, graph_width, graph_type, x_label=None, plot_label=None, 22 | highlight_regions=None): 23 | self.input_csv = input_csv 24 | self.csv_column = csv_column 25 | self.graph_title = series_name 26 | self.y_label = y_label 27 | self.precision = precision 28 | if graph_height is None: 29 | self.graph_height = 600 30 | else: 31 | self.graph_height = graph_height 32 | if graph_width is None: 33 | self.graph_width = 1200 34 | else: 35 | self.graph_width = graph_width 36 | self.graph_type = graph_type 37 | self.plot_label = plot_label 38 | self.x_label = x_label 39 | self.highlight_regions = highlight_regions 40 | return None 41 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2013 LinkedIn Corp. All rights reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | 15 | List of other open-source software used/depended on by Naarad: 16 | 17 | PrintGCStats 18 | https://java.net/projects/printgcstats/ 19 | Copyright 2013 Oracle 20 | License: BSD 21 | 22 | dygraphs 23 | http://dygraphs.com/ 24 | Copyright 2011 Dan Vanderkam 25 | License: MIT 26 | 27 | numpy 28 | http://www.numpy.org/ 29 | Copyright 2005-2013 NumPy Developers 30 | License: BSD 31 | 32 | matplotlib 33 | http://matplotlib.org/ 34 | Copyright 2012-2013 Matplotlib Development Team 35 | License: PSF (http://matplotlib.org/users/license.html) 36 | 37 | pytz 38 | http://pytz.sourceforge.net/ 39 | Copyright 2008-2013 Stuart Bishop 40 | License: MIT 41 | 42 | sorttable.js 43 | http://www.kryogenix.org/code/browser/sorttable/#licence 44 | License: X11 License 45 | 46 | pygal 47 | http://pygal.org 48 | License: GNU LGPL v3+ (https://www.gnu.org/licenses/lgpl.html) 49 | 50 | bootstrap 51 | http://getbootstrap.com/ 52 | License: Apache License v2.0 (http://www.apache.org/licenses/LICENSE-2.0) -------------------------------------------------------------------------------- /test/httpdownload.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 19.1. HTMLParser — Simple HTML and XHTML parser — Python v2.7.5 documentation 6 | 7 | 8 | 17 | 18 | 19 | 20 | 21 | 22 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /bin/naarad: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | """ 4 | Copyright 2013 LinkedIn Corp. All rights reserved. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | """ 18 | 19 | import logging 20 | import os 21 | import sys 22 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'src'))) 23 | from naarad import Naarad 24 | import naarad.utils as Utils 25 | import naarad.naarad_constants as CONSTANTS 26 | 27 | 28 | def main(): 29 | args = Utils.get_argument_parser().parse_args() 30 | Utils.validate_arguments(args) 31 | logger = logging.getLogger('naarad') 32 | Utils.init_logging(logger, CONSTANTS.NAARAD_LOG, args.log) 33 | logger.info('Reports will be in %s', args.output_dir) 34 | if args.diff: 35 | status = Naarad().diff_reports_by_location(args.diff[0], args.diff[1], args.output_dir, config=args.config, args=args) 36 | else: 37 | status = Naarad().analyze(args.input_dir, args.output_dir, config=args.config, skip_plots=args.no_plots, args=args) 38 | logger.info('Reports will be in %s', args.output_dir) 39 | if args.exit_code: 40 | sys.exit(status) 41 | 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/modules/anomaly.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | """ 13 | Anomaly Object 14 | """ 15 | 16 | 17 | class Anomaly(object): 18 | 19 | def __init__(self, start_timestamp, end_timestamp, anomaly_score, exact_timestamp): 20 | """ 21 | Construct an anomaly object. 22 | :param:start_timestamp: start time of the anomaly period. 23 | :param:end_timestamp: end time of the anomaly period. 24 | :param:anomly_score: the score of the anomaly. 25 | :param:exact_timestamp: the timestamp within the period where the anomaly likely happened. 26 | """ 27 | self.start_timestamp = start_timestamp 28 | self.end_timestamp = end_timestamp 29 | self.anomaly_score = anomaly_score 30 | self.exact_timestamp = exact_timestamp 31 | 32 | def get_time_window(self): 33 | """ 34 | Get the anomaly period. 35 | :return tuple: a tuple representation of the anomaly period. 36 | """ 37 | return self.start_timestamp, self.end_timestamp 38 | 39 | def __str__(self): 40 | """ 41 | return string representation of the anomaly 42 | :return: string 43 | """ 44 | return "Anomaly from {0} to {1} with score {2}".format(self.start_timestamp, self.end_timestamp, self.anomaly_score) 45 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/correlator_algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | 13 | __all__ = ['cross_correlator'] 14 | 15 | 16 | class CorrelatorAlgorithm(object): 17 | 18 | """ 19 | Base class for Correlator algorithm. 20 | """ 21 | def __init__(self, class_name, time_series_a, time_series_b): 22 | """ 23 | Initializer 24 | :param class_name: name of extended class. 25 | :param TimeSeries time_series_a: TimeSeries a. 26 | :param TimeSeries time_series_b: TimeSeries b. 27 | """ 28 | self.class_name = class_name 29 | self.time_series_a = time_series_a 30 | self.time_series_b = time_series_b 31 | 32 | # Need to be extended. 33 | def _detect_correlation(self): 34 | """ 35 | Detect correlation. 36 | """ 37 | raise NotImplementedError 38 | 39 | def get_correlation_result(self): 40 | """ 41 | Get correlation result. 42 | :return CorrelationResult: a CorrelationResult object represents the correlation result. 43 | """ 44 | return self.correlation_result 45 | 46 | def run(self): 47 | """ 48 | Execute algorithm. 49 | :return CorrelationResult: a CorrelationResult object represents the correlation result. 50 | """ 51 | self._detect_correlation() 52 | return self.correlation_result 53 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | 13 | from luminol import exceptions 14 | 15 | 16 | class Luminol(object): 17 | 18 | def __init__(self, anomalies, correlations): 19 | """ 20 | :param list anomalies: a list of `Anomaly` objects. 21 | `Anomaly` is defined in luminol.modules.anomaly. 22 | 23 | :param dict correlations: a dict represents correlated metrics(`TimeSeries` object) to each anomaly. 24 | each key-value pair looks like this: 25 | `Anomaly` --> [metric1, metric2, metric3 ...]. 26 | """ 27 | self.anomalies = anomalies 28 | self.correlations = correlations 29 | self._analyze_root_causes() 30 | 31 | # TODO(yaguo): Replace this with valid root cause analysis. 32 | def _analyze_root_causes(self): 33 | """ 34 | Conduct root cause analysis. 35 | The first metric of the list is taken as the root cause right now. 36 | """ 37 | causes = {} 38 | for a in self.anomalies: 39 | try: 40 | causes[a] = self.correlations[a][0] 41 | except IndexError: 42 | raise exceptions.InvalidDataFormat('luminol.luminol: dict correlations contains empty list.') 43 | self.causes = causes 44 | 45 | def get_root_causes(self): 46 | """ 47 | Get root causes. 48 | :return dict: a dict represents root causes for each anomaly. 49 | """ 50 | return getattr(self, 'causes', None) 51 | -------------------------------------------------------------------------------- /src/naarad/run_steps/run_step.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import logging 19 | 20 | logger = logging.getLogger('naarad.run_steps.run_step') 21 | 22 | 23 | class Run_Step(object): 24 | """ 25 | Base class that holds information about different kinds of "run steps" like Workload kickoff, Pre-run setup scripts, 26 | Post-run setup scripts 27 | """ 28 | 29 | def __init__(self, run_type, run_cmd, call_type, run_order, run_rank, should_wait=True, kill_after_seconds=None): 30 | """ 31 | Init method 32 | :param run_type: Type of run_step: "workload" only for now 33 | :param run_cmd: Details of command to be run. It could be a command or API call 34 | :param call_type: Kind of call -- local or remote 35 | :param run_order: When to run this w.r.t analysis. One of ('pre', 'in', 'post') 36 | :param run_rank: In what order to run this 37 | :param should_wait: Boolean whether naarad should wait for the run command to finish or not before moving on 38 | :param kill_after_seconds: Seconds for which the command should be run before being killed 39 | :return: None 40 | """ 41 | self.run_type = run_type 42 | self.run_cmd = run_cmd 43 | self.call_type = call_type 44 | self.run_order = run_order 45 | self.run_rank = run_rank 46 | self.should_wait = should_wait 47 | self.kill_after_seconds = kill_after_seconds 48 | self.timer = None 49 | -------------------------------------------------------------------------------- /test/test_diff.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import sys 19 | import logging 20 | import os 21 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'src'))) 22 | from naarad.reporting.diff import Diff 23 | from naarad.reporting.diff import NaaradReport 24 | 25 | diff_obj = None 26 | 27 | 28 | def setup_module(): 29 | global diff_obj 30 | reports_list = [] 31 | report_name = 'diff' 32 | output_directory = '' 33 | resource_directory = '' 34 | resource_path = 'resource_path' 35 | naarad_reports = [NaaradReport('/tmp', None), NaaradReport('/tmp', None)] 36 | diff_obj = Diff(naarad_reports, 'diff', '/tmp', '/tmp', 'resources') 37 | 38 | 39 | def test_collect_cdf_datasources(): 40 | """ 41 | Test whether collect_cdf_datasources works as expected 42 | :return: None 43 | """ 44 | global diff_obj 45 | diff_obj.reports[0].cdf_datasource = ['a.csv', 'b.csv', 'c.csv'] 46 | diff_obj.reports[1].cdf_datasource = ['a.csv', 'b.csv', 'd.csv'] 47 | return_code = diff_obj.collect_cdf_datasources() 48 | assert return_code is True 49 | assert diff_obj.reports[0].cdf_datasource == diff_obj.reports[1].cdf_datasource 50 | assert len(diff_obj.reports[0].cdf_datasource) == 2 51 | assert diff_obj.reports[0].cdf_datasource[0] == 'a.csv' 52 | assert diff_obj.reports[0].cdf_datasource[1] == 'b.csv' 53 | diff_obj.reports[0].cdf_datasource = [] 54 | diff_obj.reports[1].cdf_datasource = ['a.csv', 'b.csv', 'd.csv'] 55 | return_code = diff_obj.collect_cdf_datasources() 56 | assert return_code is False 57 | -------------------------------------------------------------------------------- /test/test_run_step.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import os 19 | import sys 20 | # add the path of ~/naarad/src; the testing py is under ~/naarad/test 21 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'src'))) 22 | 23 | from naarad.run_steps.run_step import Run_Step 24 | from naarad.run_steps.local_cmd import Local_Cmd 25 | import naarad.utils 26 | import naarad.naarad_constants as CONSTANTS 27 | 28 | local_cmd_obj = None 29 | 30 | 31 | def setup_module(): 32 | global local_cmd_obj 33 | run_cmd = "sleep 10" 34 | run_rank = 1 35 | run_type = CONSTANTS.RUN_TYPE_WORKLOAD 36 | run_order = CONSTANTS.PRE_ANALYSIS_RUN 37 | call_type = 'local' 38 | local_cmd_obj = Local_Cmd(run_type, run_cmd, call_type, run_order, run_rank) 39 | 40 | 41 | def test_run_local_cmd(): 42 | """ 43 | Test whether local command works as expected 44 | :return: None 45 | """ 46 | global local_cmd_obj 47 | local_cmd_obj.run() 48 | ts_diff = naarad.utils.convert_to_unixts(local_cmd_obj.ts_end) - naarad.utils.convert_to_unixts(local_cmd_obj.ts_start) 49 | ts_diff /= 1000 50 | assert ts_diff == 10.0 51 | 52 | 53 | def test_run_local_cmd_with_kill(): 54 | """ 55 | Test whether local command works as expected when kill is specified 56 | :return: None 57 | """ 58 | global local_cmd_obj 59 | local_cmd_obj.kill_after_seconds = 5 60 | local_cmd_obj.run() 61 | ts_diff = naarad.utils.convert_to_unixts(local_cmd_obj.ts_end) - naarad.utils.convert_to_unixts(local_cmd_obj.ts_start) 62 | ts_diff /= 1000 63 | assert ts_diff == 5.0 64 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/anomaly_detector_algorithms/default_detector.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | from exp_avg_detector import ExpAvgDetector 13 | from derivative_detector import DerivativeDetector 14 | from luminol.algorithms.anomaly_detector_algorithms import AnomalyDetectorAlgorithm 15 | from luminol.constants import * 16 | from luminol.modules.time_series import TimeSeries 17 | 18 | 19 | class DefaultDetector(AnomalyDetectorAlgorithm): 20 | 21 | """ 22 | Default detector. 23 | Not configurable. 24 | """ 25 | def __init__(self, time_series, baseline_time_series=None): 26 | """ 27 | Initializer 28 | :param TimeSeries time_series: a TimeSeries object. 29 | :param TimeSeries baseline_time_series: baseline TimeSeries. 30 | """ 31 | self.exp_avg_detector = ExpAvgDetector(time_series, baseline_time_series) 32 | self.derivative_detector = DerivativeDetector(time_series, baseline_time_series) 33 | 34 | def _set_scores(self): 35 | """ 36 | Set anomaly scores using a weighted sum. 37 | """ 38 | anom_scores_ema = self.exp_avg_detector.run() 39 | anom_scores_deri = self.derivative_detector.run() 40 | anom_scores = {} 41 | for timestamp in anom_scores_ema.timestamps: 42 | # Compute a weighted anomaly score. 43 | anom_scores[timestamp] = max(anom_scores_ema[timestamp], 44 | anom_scores_ema[timestamp] * DEFAULT_DETECTOR_EMA_WEIGHT + anom_scores_deri[timestamp] * (1 - DEFAULT_DETECTOR_EMA_WEIGHT)) 45 | # If ema score is significant enough, take the bigger one of the weighted score and deri score. 46 | if anom_scores_ema[timestamp] > DEFAULT_DETECTOR_EMA_SIGNIFICANT: 47 | anom_scores[timestamp] = max(anom_scores[timestamp], anom_scores_deri[timestamp]) 48 | self.anom_scores = TimeSeries(self._denoise_scores(anom_scores)) 49 | -------------------------------------------------------------------------------- /src/naarad/resources/default_diff_client_charting_page.html: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 |
21 |
22 |
23 |
24 | 30 |
31 |
32 | 38 |
39 |
40 |
41 |
42 |
43 | 44 |
45 |
46 |
47 |
48 |
49 | 50 |

51 | -------------------------------------------------------------------------------- /bin/naarad_metric_collector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PATH=$PATH:/sbin:/usr/sbin:/usr/local/sbin 4 | NOW=`date +%s` 5 | TODAY=`date +%Y-%m-%d` 6 | COUNT=450 7 | INTERVAL=2 8 | export PATH 9 | 10 | t=`date +"%Y-%m-%d"` 11 | if [ -z "$1" ] 12 | then 13 | echo "No argument supplied, using current directory as base output directory." 14 | export RESULT="sar-results-$t" 15 | else 16 | export RESULT="$1/sar-results-$t" 17 | fi 18 | 19 | mkdir -p $RESULT 20 | 21 | ##################### 22 | # EVERY INTERVAL SECONDS 23 | ##################### 24 | echo $t >> $RESULT/top.out & 25 | top -b -c -n $COUNT -d $INTERVAL | grep -A 40 '^top' >> $RESULT/top.out & 26 | sar -B $INTERVAL $COUNT >> $RESULT/sar.paging.out & 27 | sar -d -p $INTERVAL $COUNT >> $RESULT/sar.device.out & 28 | sar -R $INTERVAL $COUNT >> $RESULT/sar.memory.out & 29 | sar -r $INTERVAL $COUNT >> $RESULT/sar.memutil.out & 30 | sar -u ALL -P ALL $INTERVAL $COUNT >> $RESULT/sar.cpuusage.out & 31 | sar -n DEV $INTERVAL $COUNT >> $RESULT/sar.network.out & 32 | sar -W $INTERVAL $COUNT >> $RESULT/sar.swapping.out & 33 | sar -m -P ALL $INTERVAL $COUNT >> $RESULT/sar.cpuhz.out & 34 | sar -n EDEV $INTERVAL $COUNT >> $RESULT/sar.edev.out & 35 | sar -n TCP $INTERVAL $COUNT >> $RESULT/sar.tcp.out & 36 | sar -n ETCP $INTERVAL $COUNT >> $RESULT/sar.etcp.out & 37 | sar -n SOCK $INTERVAL $COUNT >> $RESULT/sar.sock.out & 38 | sar -w $INTERVAL $COUNT >> $RESULT/sar.switching.out & 39 | sar -q $INTERVAL $COUNT >> $RESULT/sar.queue.out & 40 | 41 | ##################### 42 | # EVERY INTERVAL SECONDS 43 | ##################### 44 | COUNT1=$COUNT 45 | INTERVAL1=$INTERVAL 46 | for ((i=1; i<= $COUNT1; i++)); do cat /proc/meminfo | sed "s/^/$(date +%Y-%m-%d\ %H:%M:%S.%05N)\t/" >> $RESULT/proc.meminfo.out; sleep $INTERVAL1 ; done & 47 | for ((i=1; i<= $COUNT1; i++)); do cat /proc/vmstat | sed "s/^/$(date +%Y-%m-%d\ %H:%M:%S.%05N)\t/" >> $RESULT/proc.vmstat.out; sleep $INTERVAL1 ; done & 48 | for ((i=1; i<= $COUNT1; i++)); do cat /proc/zoneinfo | sed "s/^/$(date +%Y-%m-%d\ %H:%M:%S.%05N)\t/" >> $RESULT/proc.zoneinfo.out; sleep $INTERVAL1 ; done & 49 | for ((i=1; i<= $COUNT1; i++)); do cat /proc/interrupts | sed "s/^/$(date +%Y-%m-%d\ %H:%M:%S.%05N)\t/" >> $RESULT/proc.interrupts.out; sleep $INTERVAL1 ; done & 50 | for ((i=1; i<= $COUNT1; i++)); do netstat -s | sed "s/^/$(date +%Y-%m-%d\ %H:%M:%S.%05N)\t/" >> $RESULT/netstat.out; sleep $INTERVAL1 ; done & 51 | -------------------------------------------------------------------------------- /examples/conf/config-gc: -------------------------------------------------------------------------------- 1 | [GC] 2 | infile=g1.log 3 | sub_metrics=appstop alloc promo used0 used1 used commit0 commit1 commit gen0 gen0t gen0usr gen0sys gen0real gen1t cmsIM cmsRM cmsRS GCPause cmsCM cmsCP cmsCS cmsCR safept apptime used0AfterGC used1AfterGC usedAfterGC g1-pause-young g1-pause-mixed g1-pause-remark g1-pause-cleanup g1-pause-remark.ref-proc g1-pause-young.parallel g1-pause-young.parallel.gcworkers g1-pause-young.parallel.ext-root-scanning.avg g1-pause-young.parallel.ext-root-scanning.max g1-pause-young.parallel.update-rs.avg g1-pause-young.parallel.update-rs.max g1-pause-young.parallel.update-rs.processed-buffers.avg g1-pause-young.parallel.update-rs.processed-buffers.max g1-pause-young.parallel.scan-rs.avg g1-pause-young.parallel.scan-rs.max g1-pause-young.parallel.object-copy-rs.avg g1-pause-young.parallel.object-copy-rs.max g1-pause-young.parallel.termination.avg g1-pause-young.parallel.termination.max g1-pause-young.parallel.gc-worker-other.avg g1-pause-young.parallel.gc-worker-other.max g1-pause-young.parallel.gc-worker-total.avg g1-pause-young.parallel.gc-worker-total.max g1-pause-young.parallel.gc-worker-end.avg g1-pause-young.parallel.gc-worker-end.max g1-pause-young.code-root-fixup g1-pause-young.clear-ct g1-pause-young.other g1-pause-young.other.choose-cset g1-pause-young.other.ref-proc g1-pause-young.other.reg-enq g1-pause-young.other.free-cset g1-pause-mixed.parallel g1-pause-mixed.parallel.gcworkers g1-pause-mixed.parallel.ext-root-scanning.avg g1-pause-mixed.parallel.ext-root-scanning.max g1-pause-mixed.parallel.update-rs.avg g1-pause-mixed.parallel.update-rs.max g1-pause-mixed.parallel.update-rs.processed-buffers.avg g1-pause-mixed.parallel.update-rs.processed-buffers.max g1-pause-mixed.parallel.scan-rs.avg g1-pause-mixed.parallel.scan-rs.max g1-pause-mixed.parallel.object-copy-rs.avg g1-pause-mixed.parallel.object-copy-rs.max g1-pause-mixed.parallel.termination.avg g1-pause-mixed.parallel.termination.max g1-pause-mixed.parallel.gc-worker-other.avg g1-pause-mixed.parallel.gc-worker-other.max g1-pause-mixed.parallel.gc-worker-total.avg g1-pause-mixed.parallel.gc-worker-total.max g1-pause-mixed.parallel.gc-worker-end.avg g1-pause-mixed.parallel.gc-worker-end.max g1-pause-mixed.code-root-fixup g1-pause-mixed.clear-ct g1-pause-mixed.other g1-pause-mixed.other.choose-cset g1-pause-mixed.other.ref-proc g1-pause-mixed.other.reg-enq g1-pause-mixed.other.free-cset g1-pause-young.parallel.gc-worker-start.avg g1-pause-young.parallel.gc-worker-start.max g1-pause-mixed.parallel.gc-worker-start.avg g1-pause-mixed.parallel.gc-worker-start.max 4 | 5 | [GRAPH] 6 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | """ 13 | Utilities for luminol 14 | """ 15 | import calendar 16 | import csv 17 | import datetime 18 | import re 19 | import time 20 | 21 | from luminol import constants, exceptions 22 | 23 | 24 | def compute_ema(smoothing_factor, points): 25 | """ 26 | Compute exponential moving average of a list of points. 27 | :param float smoothing_factor: the smoothing factor. 28 | :param list points: the data points. 29 | :return list: all ema in a list. 30 | """ 31 | ema = [] 32 | # The initial point has a ema equal to itself. 33 | if(len(points) > 0): 34 | ema.append(points[0]) 35 | for i in range(1, len(points)): 36 | ema.append(smoothing_factor * points[i] + (1 - smoothing_factor) * ema[i - 1]) 37 | return ema 38 | 39 | 40 | def read_csv(csv_name): 41 | """ 42 | Read data from a csv file into a dictionary. 43 | :param str csv_name: path to a csv file. 44 | :return dict: a dictionary represents the data in file. 45 | """ 46 | data = {} 47 | if not isinstance(csv_name, (str, unicode)): 48 | raise exceptions.InvalidDataFormat('luminol.utils: csv_name has to be a string!') 49 | with open(csv_name, 'r') as csv_data: 50 | reader = csv.reader(csv_data, delimiter=',', quotechar='|') 51 | for row in reader: 52 | try: 53 | key = to_epoch(row[0]) 54 | value = float(row[1]) 55 | data[key] = value 56 | except ValueError: 57 | pass 58 | return data 59 | 60 | 61 | def to_epoch(t_str): 62 | """ 63 | Covert a timestamp string to an epoch number. 64 | :param str t_str: a timestamp string. 65 | :return int: epoch number of the timestamp. 66 | """ 67 | try: 68 | t = float(t_str) 69 | return t 70 | except: 71 | for format in constants.TIMESTAMP_STR_FORMATS: 72 | try: 73 | t = datetime.datetime.strptime(t_str, format) 74 | return float(time.mktime(t.utctimetuple()) * 1000.0 + t.microsecond / 1000.0) 75 | except: 76 | pass 77 | raise exceptions.InvalidDataFormat 78 | -------------------------------------------------------------------------------- /test/test_naarad_api.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import ConfigParser 19 | import os 20 | import sys 21 | import time 22 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'src'))) 23 | from naarad import Naarad 24 | import naarad.naarad_constants as CONSTANTS 25 | naarad_obj = None 26 | 27 | 28 | def setup_module(): 29 | global naarad_obj 30 | naarad_obj = Naarad() 31 | 32 | 33 | def test_naarad_apis(): 34 | """ 35 | :return: None 36 | """ 37 | examples_directory = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'examples') 38 | config_file = os.path.join(os.path.join(examples_directory, 'conf'), 'config-gc') 39 | config_object = ConfigParser.ConfigParser() 40 | config_object.optionxform = str 41 | config_object.read(config_file) 42 | input_directory = os.path.join(examples_directory, 'logs') 43 | output_directory = 'test_api_temp' 44 | diff_output_directory = 'test_api_temp/diff_location' 45 | report1_location = 'test_api_temp/0' 46 | report2_location = 'test_api_temp/1' 47 | global naarad_obj 48 | test_id_1 = naarad_obj.signal_start(config_file) 49 | time.sleep(60) 50 | naarad_obj.signal_stop(test_id_1) 51 | test_id_2 = naarad_obj.signal_start(config_object) 52 | time.sleep(60) 53 | naarad_obj.signal_stop(test_id_2) 54 | if naarad_obj.analyze(input_directory, output_directory) != CONSTANTS.OK: 55 | print naarad_obj.get_failed_analyses() 56 | naarad_obj.get_sla_data(test_id_1) 57 | naarad_obj.get_stats_data(test_id_1) 58 | naarad_obj.get_sla_data(test_id_2) 59 | naarad_obj.get_stats_data(test_id_2) 60 | if naarad_obj.diff(test_id_1, test_id_2, None) != CONSTANTS.OK: 61 | print 'Error encountered during diff' 62 | if naarad_obj.diff_reports_by_location(report1_location, report2_location, diff_output_directory, None): 63 | print 'Error encountered during diff' 64 | print 'Please inspect the generated reports manually' 65 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/anomaly_detector_algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | from luminol.constants import * 13 | 14 | __all__ = ['bitmap_detector', 'derivative_detector', 'exp_avg_detector', 'default_detector', 'absolute_threshold', 15 | 'diff_percent_threshold'] 16 | 17 | 18 | class AnomalyDetectorAlgorithm(object): 19 | 20 | """ 21 | Base Class for AnomalyDetector algorithm. 22 | """ 23 | def __init__(self, class_name, time_series, baseline_time_series=None): 24 | """ 25 | Initializer 26 | :param str class_name: extended class name. 27 | :param TimeSeries time_series: a TimeSeries object. 28 | :param TimeSeries baseline_time_series: baseline TimeSeries. 29 | """ 30 | self.class_name = class_name 31 | self.time_series = time_series 32 | self.time_series_length = len(time_series) 33 | self.baseline_time_series = baseline_time_series 34 | 35 | def run(self): 36 | """ 37 | Run the algorithm to get anomalies. 38 | return list: a list of Anomaly objects. 39 | """ 40 | self._set_scores() 41 | return self.anom_scores 42 | 43 | def _denoise_scores(self, scores): 44 | """ 45 | Denoise anomaly scores. 46 | Low anomaly scores could be noisy. The following two series will have good correlation result with out denoise: 47 | [0.08, 4.6, 4.6, 4.6, 1.0, 1.0] 48 | [0.0010, 0.0012, 0.0012, 0.0008, 0.0008] 49 | while the second series is pretty flat(suppose it has a max score of 100). 50 | param dict scores: the scores to be denoised. 51 | """ 52 | if scores: 53 | maximal = max(scores.values()) 54 | if maximal: 55 | for key in scores: 56 | if scores[key] < DEFAULT_NOISE_PCT_THRESHOLD * maximal: 57 | scores[key] = 0 58 | return scores 59 | 60 | # Need to be extended. 61 | def _set_scores(self): 62 | """ 63 | Compute anomaly scores for the time series. 64 | """ 65 | raise NotImplementedError 66 | 67 | def get_scores(self): 68 | """ 69 | Get anomaly scores for the time series. 70 | :return TimeSeries: a TimeSeries representation of the anomaly scores. 71 | """ 72 | return self.anom_scores 73 | -------------------------------------------------------------------------------- /src/naarad/naarad_imports.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | 19 | def import_modules(modules_dict, is_class_type=True): 20 | return_dict = {} 21 | for module_name, module_string in modules_dict.items(): 22 | try: 23 | if is_class_type: 24 | file_name, class_name = module_string.rsplit('.', 1) 25 | mod = __import__(file_name, fromlist=[class_name]) 26 | return_dict[module_name] = getattr(mod, class_name) 27 | else: 28 | return_dict[module_name] = __import__(module_string, fromlist=[module_string]) 29 | except ImportError: 30 | pass 31 | return return_dict 32 | 33 | metric_imports_dict = { 34 | 'GC': 'naarad.metrics.gc_metric.GCMetric', 35 | 'INNOTOP': 'naarad.metrics.innotop_metric.INNOMetric', 36 | 'JMETER': 'naarad.metrics.jmeter_metric.JmeterMetric', 37 | 'LINKEDINANDROIDRUM': 'naarad.metrics.linkedin_android_rum_metric.LinkedInAndroidRumMetric', 38 | 'PROCVMSTAT': 'naarad.metrics.procvmstat_metric.ProcVmstatMetric', 39 | 'PROCMEMINFO': 'naarad.metrics.procmeminfo_metric.ProcMeminfoMetric', 40 | 'PROCZONEINFO': 'naarad.metrics.proczoneinfo_metric.ProcZoneinfoMetric', 41 | 'PROCINTERRUPTS': 'naarad.metrics.procinterrupts_metric.ProcInterruptsMetric', 42 | 'SAR': 'naarad.metrics.sar_metric.SARMetric', 43 | 'TOP': 'naarad.metrics.top_metric.TopMetric', 44 | 'NETSTAT': 'naarad.metrics.netstat_metric.NetstatMetric' 45 | } 46 | 47 | graphing_imports_dict = { 48 | 'matplotlib': 'naarad.graphing.matplotlib_naarad', 49 | 'svg': 'naarad.graphing.pygal_naarad' 50 | } 51 | 52 | aggregate_metric_imports_dict = { 53 | 'CLUSTER': 'naarad.metrics.cluster_metric.ClusterMetric' 54 | } 55 | 56 | reporting_imports_dict = { 57 | 'report': 'naarad.reporting.report.Report' 58 | } 59 | 60 | metric_classes = import_modules(metric_imports_dict) 61 | 62 | graphing_modules = import_modules(graphing_imports_dict, is_class_type=False) 63 | 64 | aggregate_metric_classes = import_modules(aggregate_metric_imports_dict) 65 | 66 | reporting_modules = import_modules(reporting_imports_dict) 67 | -------------------------------------------------------------------------------- /examples/logs/netstat.tcp.out: -------------------------------------------------------------------------------- 1 | 2014-04-14 12:09:01.67581 Active Internet connections (w/o servers) 2 | 2014-04-14 12:09:01.67581 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name 3 | 2014-04-14 12:09:01.67581 tcp 0 500 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox 4 | 2014-04-14 12:09:01.67581 tcp 120 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox 5 | 2014-04-14 12:09:03.76251 Active Internet connections (w/o servers) 6 | 2014-04-14 12:09:03.76251 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name 7 | 2014-04-14 12:09:03.76251 tcp 0 200 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox 8 | 2014-04-14 12:09:03.76251 tcp 330 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox 9 | 2014-04-14 12:09:05.84302 Active Internet connections (w/o servers) 10 | 2014-04-14 12:09:05.84302 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name 11 | 2014-04-14 12:09:05.84302 tcp 0 345 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox 12 | 2014-04-14 12:09:05.84302 tcp 440 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox 13 | 2014-04-14 12:09:07.91455 Active Internet connections (w/o servers) 14 | 2014-04-14 12:09:07.91455 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name 15 | 2014-04-14 12:09:07.91455 tcp 0 0 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox 16 | 2014-04-14 12:09:07.91455 tcp 1550 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox 17 | 2014-04-14 12:09:09.98031 Active Internet connections (w/o servers) 18 | 2014-04-14 12:09:09.98031 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name 19 | 2014-04-14 12:09:09.98031 tcp 0 564 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox 20 | 2014-04-14 12:09:09.98031 tcp 20 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox 21 | 2014-04-14 12:09:12.05993 Active Internet connections (w/o servers) 22 | 2014-04-14 12:09:12.05993 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name 23 | 2014-04-14 12:09:12.05993 tcp 0 234 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox 24 | 2014-04-14 12:09:12.05993 tcp 3245 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox 25 | -------------------------------------------------------------------------------- /examples/conf/config-gc-1: -------------------------------------------------------------------------------- 1 | [GC] 2 | infile=g1.log 3 | sub_metrics=appstop alloc promo used0 used1 used commit0 commit1 commit gen0 gen0t gen0usr gen0sys gen0real gen1t cmsIM cmsRM cmsRS GCPause cmsCM cmsCP cmsCS cmsCR safept apptime used0AfterGC used1AfterGC usedAfterGC g1-pause-young g1-pause-mixed g1-pause-remark g1-pause-cleanup g1-pause-remark.ref-proc g1-pause-young.parallel g1-pause-young.parallel.gcworkers g1-pause-young.parallel.ext-root-scanning.avg g1-pause-young.parallel.ext-root-scanning.max g1-pause-young.parallel.update-rs.avg g1-pause-young.parallel.update-rs.max g1-pause-young.parallel.update-rs.processed-buffers.avg g1-pause-young.parallel.update-rs.processed-buffers.max g1-pause-young.parallel.scan-rs.avg g1-pause-young.parallel.scan-rs.max g1-pause-young.parallel.object-copy-rs.avg g1-pause-young.parallel.object-copy-rs.max g1-pause-young.parallel.termination.avg g1-pause-young.parallel.termination.max g1-pause-young.parallel.gc-worker-other.avg g1-pause-young.parallel.gc-worker-other.max g1-pause-young.parallel.gc-worker-total.avg g1-pause-young.parallel.gc-worker-total.max g1-pause-young.parallel.gc-worker-end.avg g1-pause-young.parallel.gc-worker-end.max g1-pause-young.code-root-fixup g1-pause-young.clear-ct g1-pause-young.other g1-pause-young.other.choose-cset g1-pause-young.other.ref-proc g1-pause-young.other.reg-enq g1-pause-young.other.free-cset g1-pause-mixed.parallel g1-pause-mixed.parallel.gcworkers g1-pause-mixed.parallel.ext-root-scanning.avg g1-pause-mixed.parallel.ext-root-scanning.max g1-pause-mixed.parallel.update-rs.avg g1-pause-mixed.parallel.update-rs.max g1-pause-mixed.parallel.update-rs.processed-buffers.avg g1-pause-mixed.parallel.update-rs.processed-buffers.max g1-pause-mixed.parallel.scan-rs.avg g1-pause-mixed.parallel.scan-rs.max g1-pause-mixed.parallel.object-copy-rs.avg g1-pause-mixed.parallel.object-copy-rs.max g1-pause-mixed.parallel.termination.avg g1-pause-mixed.parallel.termination.max g1-pause-mixed.parallel.gc-worker-other.avg g1-pause-mixed.parallel.gc-worker-other.max g1-pause-mixed.parallel.gc-worker-total.avg g1-pause-mixed.parallel.gc-worker-total.max g1-pause-mixed.parallel.gc-worker-end.avg g1-pause-mixed.parallel.gc-worker-end.max g1-pause-mixed.code-root-fixup g1-pause-mixed.clear-ct g1-pause-mixed.other g1-pause-mixed.other.choose-cset g1-pause-mixed.other.ref-proc g1-pause-mixed.other.reg-enq g1-pause-mixed.other.free-cset g1-pause-young.parallel.gc-worker-start.avg g1-pause-young.parallel.gc-worker-start.max g1-pause-mixed.parallel.gc-worker-start.avg g1-pause-mixed.parallel.gc-worker-start.max g1-eden-occupancy-before-gc g1-eden-capacity-before-gc g1-eden-occupancy-after-gc g1-eden-capacity-after-gc g1-survivor-before-gc g1-survivor-after-gc g1-heap-occupancy-before-gc g1-heap-capacity-before-gc g1-heap-occupancy-after-gc g1-heap-capacity-after-gc g1-young-cpu.sys g1-young-cpu.usr g1-young-cpu.real g1-mixed-cpu.usr g1-mixed-cpu.sys g1-mixed-cpu.real 4 | 5 | [GRAPH] 6 | -------------------------------------------------------------------------------- /src/naarad/graphing/pygal_naarad.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import datetime 19 | import pygal 20 | import os 21 | import logging 22 | 23 | logger = logging.getLogger('naarad.graphing.pygal_naarad') 24 | 25 | 26 | def convert_to_date(date_str): 27 | return datetime.datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S') 28 | 29 | 30 | def curate_plot_list(plots): 31 | delete_nodes = [] 32 | for plot in plots: 33 | if os.path.exists(plot.input_csv): 34 | if not os.path.getsize(plot.input_csv): 35 | logger.warning("%s file is empty. No plot corresponding to this file will be generated", plot.input_csv) 36 | delete_nodes.append(plot) 37 | else: 38 | logger.warning("%s file does not exist. No plot corresponding to this file will be generated", plot.input_csv) 39 | delete_nodes.append(plot) 40 | for node in delete_nodes: 41 | plots.remove(node) 42 | return plots 43 | 44 | 45 | def graph_data(list_of_plots, output_directory, resource_path, output_filename): 46 | date_plot = pygal.DateY(x_label_rotation=20, height=500, width=1200, legend_at_bottom=True, style=pygal.style.BlueStyle) 47 | for plot in list_of_plots: 48 | plot_data = [] 49 | with open(plot.input_csv, 'r') as csv_data: 50 | for line in csv_data: 51 | line_data = line.strip('\n').split(',') 52 | if '.' in line_data[0]: 53 | plot_data.append((datetime.datetime.strptime(line_data[0], '%Y-%m-%d %H:%M:%S.%f'), float(line_data[1]))) 54 | else: 55 | plot_data.append((datetime.datetime.strptime(line_data[0], '%Y-%m-%d %H:%M:%S'), float(line_data[1]))) 56 | date_plot.add(plot.graph_title, plot_data) 57 | date_plot.render_to_file(os.path.join(output_directory, output_filename + '.svg')) 58 | with open(os.path.join(output_directory, output_filename + '.div'), 'w') as div_file: 59 | div_file.write('
') 60 | return True, os.path.join(output_directory, output_filename + '.div') 61 | 62 | 63 | def graph_data_on_the_same_graph(list_of_plots, output_directory, resource_path, output_filename): 64 | """ 65 | graph_data_on_the_same_graph: put a list of plots on the same graph: currently it supports CDF 66 | """ 67 | logger.warning('graph_data_on_the_same_graph is currently not supported in pygal') 68 | return False, None 69 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/anomaly_detector_algorithms/derivative_detector.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | import numpy 13 | 14 | from luminol import utils 15 | from luminol.algorithms.anomaly_detector_algorithms import AnomalyDetectorAlgorithm 16 | from luminol.constants import * 17 | from luminol.modules.time_series import TimeSeries 18 | 19 | 20 | class DerivativeDetector(AnomalyDetectorAlgorithm): 21 | 22 | ''' 23 | Derivative Algorithm. 24 | This method is the derivative version of Method 1. 25 | Instead of data point value, it uses the derivative of the data point. 26 | ''' 27 | def __init__(self, time_series, baseline_time_series=None, smoothing_factor=None): 28 | """ 29 | Initializer 30 | :param TimeSeries time_series: a TimeSeries object. 31 | :param TimeSeries baseline_time_series: baseline TimeSeries. 32 | :param float smoothing_factor: smoothing factor. 33 | """ 34 | super(DerivativeDetector, self).__init__(self.__class__.__name__, time_series, baseline_time_series) 35 | self.smoothing_factor = (smoothing_factor or DEFAULT_DERI_SMOOTHING_FACTOR) 36 | self.time_series_items = self.time_series.items() 37 | 38 | def _compute_derivatives(self): 39 | """ 40 | Compute derivatives of the time series. 41 | """ 42 | derivatives = [] 43 | for i, (timestamp, value) in enumerate(self.time_series_items): 44 | if i > 0: 45 | pre_item = self.time_series_items[i - 1] 46 | pre_timestamp = pre_item[0] 47 | pre_value = pre_item[1] 48 | td = timestamp - pre_timestamp 49 | derivative = (value - pre_value) / td if td != 0 else value - pre_value 50 | derivative = abs(derivative) 51 | derivatives.append(derivative) 52 | # First timestamp is assigned the same derivative as the second timestamp. 53 | if derivatives: 54 | derivatives.insert(0, derivatives[0]) 55 | self.derivatives = derivatives 56 | 57 | def _set_scores(self): 58 | """ 59 | Compute anomaly scores for the time series. 60 | """ 61 | anom_scores = {} 62 | self._compute_derivatives() 63 | derivatives_ema = utils.compute_ema(self.smoothing_factor, self.derivatives) 64 | for i, (timestamp, value) in enumerate(self.time_series_items): 65 | anom_scores[timestamp] = abs(self.derivatives[i] - derivatives_ema[i]) 66 | stdev = numpy.std(anom_scores.values()) 67 | if stdev: 68 | for timestamp in anom_scores.keys(): 69 | anom_scores[timestamp] /= stdev 70 | self.anom_scores = TimeSeries(self._denoise_scores(anom_scores)) 71 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/anomaly_detector_algorithms/absolute_threshold.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | 13 | from luminol import utils, exceptions 14 | from luminol.algorithms.anomaly_detector_algorithms import AnomalyDetectorAlgorithm 15 | from luminol.constants import * 16 | from luminol.modules.time_series import TimeSeries 17 | 18 | 19 | class AbsoluteThreshold(AnomalyDetectorAlgorithm): 20 | """ 21 | Anomalies are those data points that are above a pre-specified threshold value. 22 | This algorithm does not take baseline time series. 23 | """ 24 | def __init__(self, time_series, absolute_threshold_value_upper=None, absolute_threshold_value_lower=None, 25 | baseline_time_series=None): 26 | """ 27 | Initialize algorithm, check all required args are present 28 | 29 | :param time_series: The current time series dict to run anomaly detection on 30 | :param absolute_threshold_value_upper: Time series values above this are considered anomalies 31 | :param absolute_threshold_value_lower: Time series values below this are considered anomalies 32 | :param baseline_time_series: A no-op for now 33 | :return: 34 | """ 35 | super(AbsoluteThreshold, self).__init__(self.__class__.__name__, time_series, baseline_time_series) 36 | self.absolute_threshold_value_upper = absolute_threshold_value_upper 37 | self.absolute_threshold_value_lower = absolute_threshold_value_lower 38 | if not self.absolute_threshold_value_lower and not self.absolute_threshold_value_upper: 39 | raise exceptions.RequiredParametersNotPassed('luminol.algorithms.anomaly_detector_algorithms.absolute_threshold: ' 40 | 'Either absolute_threshold_value_upper or ' 41 | 'absolute_threshold_value_lower needed') 42 | 43 | def _set_scores(self): 44 | """ 45 | Compute anomaly scores for the time series 46 | This algorithm just takes the diff of threshold with current value as anomaly score 47 | """ 48 | anom_scores = {} 49 | for timestamp, value in self.time_series.items(): 50 | anom_scores[timestamp] = 0.0 51 | if self.absolute_threshold_value_upper and value > self.absolute_threshold_value_upper: 52 | anom_scores[timestamp] = value - self.absolute_threshold_value_upper 53 | if self.absolute_threshold_value_lower and value < self.absolute_threshold_value_lower: 54 | anom_scores[timestamp] = self.absolute_threshold_value_lower - value 55 | self.anom_scores = TimeSeries(self._denoise_scores(anom_scores)) 56 | -------------------------------------------------------------------------------- /src/naarad/sla.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import logging 19 | 20 | logger = logging.getLogger('naarad.sla') 21 | 22 | 23 | class SLA(object): 24 | 25 | supported_sla_types = ('lt', '<', 'gt', '>', 'eq', '=') 26 | 27 | def __init__(self, metric, sub_metric, stat_name, threshold, sla_type): 28 | if sla_type not in self.supported_sla_types: 29 | logger.error('Unsupported sla type passed : ' + sla_type) 30 | return None 31 | self.metric = metric 32 | self.sub_metric = sub_metric 33 | self.stat_name = stat_name 34 | self.sla_type = sla_type 35 | self.is_processed = False 36 | self.threshold = None 37 | self.display = None 38 | if '%' in threshold: 39 | self.threshold = float(threshold.translate(None, '%')) 40 | self.display = '%' 41 | else: 42 | self.threshold = float(threshold) 43 | self.display = '' 44 | self.sla_passed = None 45 | self.stat_value = None 46 | 47 | def __str__(self): 48 | return "{0} of {1}, threshold: {2}, sla_type: {3}, sla_passed: {4}, display: {5}".format(self.stat_name, self.sub_metric, self.threshold, self.sla_type, 49 | self.sla_passed, self.display) 50 | 51 | def get_csv_repr(self): 52 | return "{0},{1},{2},{3},{4},{5}".format(self.sub_metric, self.stat_name, self.threshold, self.sla_type, self.stat_value, self.sla_passed) 53 | 54 | def check_sla_passed(self, stat_value): 55 | if self.sla_type in ('lt', '<'): 56 | self.grade_lt(stat_value) 57 | elif self.sla_type in ('gt', '>'): 58 | self.grade_gt(stat_value) 59 | elif self.sla_type in ('eq', '='): 60 | self.grade_eq(stat_value) 61 | else: 62 | logger.error('sla type is unsupported') 63 | self.stat_value = stat_value 64 | return self.sla_passed 65 | 66 | def grade_lt(self, stat_value): 67 | self.is_processed = True 68 | if stat_value >= self.threshold: 69 | self.sla_passed = False 70 | else: 71 | self.sla_passed = True 72 | 73 | def grade_gt(self, stat_value): 74 | self.is_processed = True 75 | if stat_value <= self.threshold: 76 | self.sla_passed = False 77 | else: 78 | self.sla_passed = True 79 | 80 | def grade_eq(self, stat_value): 81 | self.is_processed = True 82 | if stat_value == self.threshold: 83 | self.sla_passed = True 84 | else: 85 | self.sla_passed = False 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Naarad # 2 | 3 | ## What is Naarad? ## 4 | 5 | Naarad is a framework for performance analysis & rating of sharded & stateful 6 | services. 7 | 8 | ## Why Naarad? ## 9 | 10 | Use-cases: 11 | * Scalability / Headroom Rating 12 | * Continuous Integration ( Performance ) 13 | * Performance Investigation 14 | 15 | Naarad is a highly configurable system analysis tool that parses and plots 16 | timeseries data for better visual correlation. It can be used for performance 17 | analysis of your service/application. You collect data for the metrics you 18 | want to monitor and: 19 | 20 | * Naarad parses JVM Garbage Collection (GC), System/Network (SAR), MySQL 21 | (Innotop), Jmeter (JTL/XML) logs, VMStat, ZoneInfo, and MemInfo 22 | * Naarad reads other metrics you have pre-processed and written in CSV format 23 | * Naarad plots the metrics you specify 24 | 25 | The power of Naarad is in its configurablity. You can use it to glance at 26 | various metrics and then choose the important metrics to plot to visually 27 | correlate the metrics together. An example use-case is when your application's 28 | throughput dropped, you want to know if it was because of some GC activity or a 29 | spike in CPU usage or disk I/O. Naarad can help you investigate such issue 30 | better. 31 | 32 | ## Features ## 33 | 34 | * Configurable input format, so you can specify which metrics to inspect. GC, 35 | SAR and Innotop logs supported currently, with support for more metrics coming 36 | in near future. 37 | * Logs for the supported metrics are parsed by Naarad. 38 | * Also supports generic metric logs in csv format. 39 | * Pick 'n Choose which metrics to plot together for visual correlation. 40 | * Html report with all the plots for a visual inspection of your application's 41 | performance profile. 42 | * Grading support. 43 | * Diff support. Ability to diff two naarad reports. Reports generated with 44 | naarad version < 1.0.5 are not supported for diff functionality. 45 | 46 | ## How is it Different? ## 47 | 48 | Many tools and frameworks like Zenoss, rrdtool, etc have solved the use-case of 49 | metric collection, parsing and plotting. Naarad has an overlap in functionality 50 | with these tools, but the main advantage of naarad is in its flexibility, which 51 | lets it be a powerful tool for performance investigations. Naarad users are 52 | performance experts who need to look for 'needle in a haystack'. Naarad was 53 | built to support this use-case. 54 | 55 | ## Installation ## 56 | 57 | 1. Check out Naarad code: 58 | 59 | ``` 60 | git clone https://github.com/linkedin/naarad.git 61 | ``` 62 | 63 | 2. Make sure you have Python (2.6 or 2.7), [pip][pipinstall] and `awk`. 64 | 3. Install the necessary Python libraries using PIP. 65 | 66 | ``` 67 | cd naarad; pip install -r requirements.txt 68 | ``` 69 | 70 | 4. For problems in installation, check out our [troubleshooting wiki][trouble] 71 | 72 | # More Details # 73 | 74 | Please check out our [wiki][gitwiki] page for more details on Naarad's usage, 75 | supported metrics etc. 76 | 77 | [pipinstall]: http://www.pip-installer.org/en/latest/installing.html 78 | [trouble]: https://github.com/linkedin/naarad/wiki/Troubleshooting 79 | [gitwiki]: https://github.com/linkedin/naarad/wiki 80 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/constants.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | """ 13 | Constants to use for luminol 14 | """ 15 | 16 | """ 17 | Detector Constants 18 | """ 19 | # Indicate which algorithm to use to calculate anomaly scores. 20 | ANOMALY_DETECTOR_ALGORITHM = 'bitmap_detector' 21 | 22 | # Indicate which algorithm to use to get refined maximal score within each anomaly. 23 | ANOMALY_DETECTOR_REFINE_ALGORITHM = 'exp_avg_detector' 24 | 25 | # Default percent threshold value on anomaly score above which is considered an anomaly. 26 | DEFAULT_SCORE_PERCENT_THRESHOLD = 0.2 27 | 28 | # Constants for BitmapDetector. 29 | # Window sizes as percentiles of the whole data length. 30 | DEFAULT_BITMAP_LEADING_WINDOW_SIZE_PCT = 0.2 / 16 31 | 32 | DEFAULT_BITMAP_LAGGING_WINDOW_SIZE_PCT = 0.2 / 16 33 | 34 | DEFAULT_BITMAP_MINIMAL_POINTS_IN_WINDOWS = 50 35 | 36 | DEFAULT_BITMAP_MAXIMAL_POINTS_IN_WINDOWS = 200 37 | 38 | # Chunk size. 39 | # Data points form chunks and frequencies of similar chunks are used to determine anomaly scores. 40 | DEFAULT_BITMAP_CHUNK_SIZE = 2 41 | 42 | DEFAULT_BITMAP_PRECISION = 4 43 | 44 | # Constants for ExpAvgDetector. 45 | DEFAULT_EMA_SMOOTHING_FACTOR = 0.2 46 | 47 | DEFAULT_EMA_WINDOW_SIZE_PCT = 0.2 48 | 49 | # Constants for DerivativeDetector. 50 | DEFAULT_DERI_SMOOTHING_FACTOR = 0.2 51 | 52 | ANOMALY_THRESHOLD = { 53 | 'exp_avg_detector': 3, 54 | 'default_detector': 3 55 | } 56 | 57 | # Percentage threshold on anomaly score below which is considered noises. 58 | DEFAULT_NOISE_PCT_THRESHOLD = 0.001 59 | 60 | # The score weight default detector uses. 61 | DEFAULT_DETECTOR_EMA_WEIGHT = 0.65 62 | 63 | # The default minimal ema score for the deault detector to use weighted score. 64 | DEFAULT_DETECTOR_EMA_SIGNIFICANT = 0.94 65 | 66 | """ 67 | Correlator Constants 68 | """ 69 | CORRELATOR_ALGORITHM = 'cross_correlator' 70 | 71 | # Since anomalies take time to propagate between two different timeseries, 72 | # similar irregularities may happen close in time but not exactly at the same point in time. 73 | # To take this into account, when correlates, we allow a "shift room". 74 | DEFAULT_ALLOWED_SHIFT_SECONDS = 60 75 | 76 | # The threshold above which is considered "correlated". 77 | DEFAULT_CORRELATE_THRESHOLD = 0.7 78 | 79 | # The impact of shift on shifted correlation coefficient. 80 | DEFAULT_SHIFT_IMPACT = 0.05 81 | 82 | TIMESTAMP_STR_FORMATS = [ 83 | '%Y%m%d_%H:%M:%S', 84 | '%Y-%m-%d %H:%M:%S.%f', 85 | '%Y%m%d %H:%M:%S', 86 | '%Y-%m-%d_%H:%M:%S', 87 | '%Y-%m-%dT%H:%M:%S.%f', 88 | '%H:%M:%S.%f', 89 | '%Y-%m-%dT%H:%M:%S.%f%z', 90 | '%Y%m%dT%H:%M:%S', 91 | '%Y-%m-%d_%H:%M:%S.%f', 92 | '%Y%m%d_%H:%M:%S.%f', 93 | '%Y-%m-%dT%H:%M:%S', 94 | '%Y-%m-%d %H:%M:%S', 95 | '%Y%m%dT%H:%M:%S.%f', 96 | '%H:%M:%S', 97 | '%Y%m%d %H:%M:%S.%f'] 98 | -------------------------------------------------------------------------------- /lib/luminol/demo/src/start.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import datetime 3 | import os 4 | import sys 5 | import time 6 | import urllib 7 | 8 | from flask import Flask, jsonify, render_template, request 9 | 10 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'src')) 11 | 12 | from luminol import utils, anomaly_detector, correlator 13 | from rca import RCA 14 | 15 | app = Flask(__name__) 16 | 17 | DATA_PATH = 'static/data/' 18 | SCORE_FILE_PATH = 'static/' 19 | 20 | 21 | @app.route('/') 22 | def index(): 23 | return render_template('index.html') 24 | 25 | 26 | @app.route('/get_selection') 27 | def get_selection(): 28 | fs = list() 29 | for f in os.listdir(DATA_PATH): 30 | if f.endswith('.csv'): 31 | fs.append(f) 32 | return jsonify(selection=fs) 33 | 34 | 35 | @app.route('/detect') 36 | def luminoldetect(): 37 | ts = urllib.unquote(request.args.get('ts_path')[1:]) 38 | my_detector = anomaly_detector.AnomalyDetector(ts) 39 | score = my_detector.get_all_scores().values 40 | 41 | # Create a new csv file that contains both values and scores. 42 | anom_scores = list() 43 | for i, (timestamp, value) in enumerate(my_detector.time_series.iteritems()): 44 | t_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp / 1000)) 45 | anom_scores.append([t_str, value, score[i]]) 46 | write_csv(anom_scores, ts.split('/')[-1]) 47 | 48 | anoms = my_detector.get_anomalies() 49 | result = list() 50 | for anom in anoms: 51 | entry = list() 52 | anom_dict = anom.__dict__ 53 | for key in anom_dict: 54 | entry.append(anom_dict[key]) 55 | result.append(entry) 56 | return jsonify(anom=result, anom_score=anom_scores) 57 | 58 | 59 | @app.route('/correlate') 60 | def luminolanalyze(): 61 | ts = urllib.unquote(request.args.get('ts_paths')) 62 | ts = ts.split(",") 63 | matrix = ts.pop(0) 64 | matrices = list() 65 | for t in ts: 66 | matrices.append(t) 67 | myluminol = RCA(matrix, matrices) 68 | result = myluminol.output_by_name 69 | return jsonify(anom=result) 70 | 71 | 72 | @app.route('/find_correlation_list') 73 | def findCorrelationListPerAnomaly(): 74 | ts = urllib.unquote(request.args.get('ts')[1:]) 75 | all_ts = os.listdir(DATA_PATH) 76 | matrices = list() 77 | for t in all_ts: 78 | t = DATA_PATH + t 79 | if t.endswith('.csv') and t != ts: 80 | matrices.append(t) 81 | myluminol = RCA(ts, matrices) 82 | result = myluminol.output 83 | r = list() 84 | for t in result: 85 | l = result[t] 86 | data = list() 87 | for entry in l: 88 | data.append([entry[3]] + entry[2].values()) 89 | data_sorted = sorted(data, key=lambda k: (-k[1], k[2], -k[3])) 90 | r.append([t, data_sorted]) 91 | return jsonify(anom=r) 92 | 93 | 94 | def write_csv(rows, name): 95 | with open(SCORE_FILE_PATH + name, 'w+') as fp: 96 | writer = csv.writer(fp) 97 | writer.writerows(rows) 98 | 99 | 100 | def to_epoch(anom): 101 | r = list() 102 | for a in anom: 103 | cur = list() 104 | for t in a: 105 | cur.append(utils.to_epoch(t)) 106 | r.append(cur) 107 | return r 108 | 109 | if __name__ == "__main__": 110 | app.debug = True 111 | app.run(host='0.0.0.0') 112 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/anomaly_detector_algorithms/diff_percent_threshold.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | 13 | from luminol import exceptions 14 | from luminol.algorithms.anomaly_detector_algorithms import AnomalyDetectorAlgorithm 15 | from luminol.constants import * 16 | from luminol.modules.time_series import TimeSeries 17 | 18 | 19 | class DiffPercentThreshold(AnomalyDetectorAlgorithm): 20 | """ 21 | In this algorithm, anomalies are those data points that are above a percentage threshold as compared to the baseline. 22 | This algorithm assumes that time_series and baseline_time_series are perfectly aligned, meaning that: 23 | a) every timestamp that exists in time_series also exists in baseline_time_series 24 | b) lengths of both time series are same 25 | """ 26 | def __init__(self, time_series, baseline_time_series, percent_threshold_upper=None, percent_threshold_lower=None): 27 | """ 28 | :param time_series: current time series 29 | :param baseline_time_series: baseline time series 30 | :param percent_threshold_upper: If time_series is larger than baseline_time_series by this percent, then its 31 | an anomaly 32 | :param percent_threshold_lower: If time_series is smaller than baseline_time_series by this percent, then its 33 | an anomaly 34 | """ 35 | super(DiffPercentThreshold, self).__init__(self.__class__.__name__, time_series, baseline_time_series) 36 | self.percent_threshold_upper = percent_threshold_upper 37 | self.percent_threshold_lower = percent_threshold_lower 38 | if not self.percent_threshold_upper and not self.percent_threshold_lower: 39 | raise exceptions.RequiredParametersNotPassed('luminol.algorithms.anomaly_detector_algorithms.diff_percent_threshold: \ 40 | Either percent_threshold_upper or percent_threshold_lower needed') 41 | 42 | def _set_scores(self): 43 | """ 44 | Compute anomaly scores for the time series 45 | This algorithm just takes the diff of threshold with current value as anomaly score 46 | """ 47 | anom_scores = {} 48 | for i, (timestamp, value) in enumerate(self.time_series.items()): 49 | 50 | baseline_value = self.baseline_time_series[i] 51 | 52 | if baseline_value > 0: 53 | diff_percent = 100 * (value - baseline_value) / baseline_value 54 | elif value > 0: 55 | diff_percent = 100.0 56 | else: 57 | diff_percent = 0.0 58 | 59 | anom_scores[timestamp] = 0.0 60 | if self.percent_threshold_upper and diff_percent > 0 and diff_percent > self.percent_threshold_upper: 61 | anom_scores[timestamp] = diff_percent 62 | if self.percent_threshold_lower and diff_percent < 0 and diff_percent < self.percent_threshold_lower: 63 | anom_scores[timestamp] = -1 * diff_percent 64 | 65 | self.anom_scores = TimeSeries(self._denoise_scores(anom_scores)) 66 | -------------------------------------------------------------------------------- /src/naarad/run_steps/local_cmd.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import logging 19 | import shlex 20 | import subprocess 21 | import time 22 | from threading import Timer 23 | from naarad.run_steps.run_step import Run_Step 24 | import naarad.naarad_constants as CONSTANTS 25 | 26 | logger = logging.getLogger('naarad.run_steps.local_cmd') 27 | 28 | 29 | class Local_Cmd(Run_Step): 30 | """ 31 | Class for a local command as run step. 32 | This type will be most likely used when running workload from the same machine running naarad 33 | """ 34 | 35 | def __init__(self, run_type, run_cmd, call_type, run_order, run_rank, should_wait=True, kill_after_seconds=None): 36 | Run_Step.__init__(self, run_type, run_cmd, call_type, run_order, run_rank, should_wait, kill_after_seconds) 37 | self.process = None 38 | 39 | def run(self): 40 | """ 41 | Run the command, infer time period to be used in metric analysis phase. 42 | :return: None 43 | """ 44 | cmd_args = shlex.split(self.run_cmd) 45 | logger.info('Local command RUN-STEP starting with rank %d', self.run_rank) 46 | logger.info('Running subprocess command with following args: ' + str(cmd_args)) 47 | 48 | # TODO: Add try catch blocks. Kill process on CTRL-C 49 | # Infer time period for analysis. Assume same timezone between client and servers. 50 | self.ts_start = time.strftime("%Y-%m-%d %H:%M:%S") 51 | try: 52 | self.process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1) 53 | if self.kill_after_seconds: 54 | self.timer = Timer(self.kill_after_seconds, self.kill) 55 | self.timer.start() 56 | # Using 2nd method here to stream output: 57 | # http://stackoverflow.com/questions/2715847/python-read-streaming-input-from-subprocess-communicate 58 | for line in iter(self.process.stdout.readline, b''): 59 | logger.info(line.strip()) 60 | self.process.communicate() 61 | except KeyboardInterrupt: 62 | logger.warning('Handling keyboard interrupt (Ctrl-C)') 63 | self.kill() 64 | if self.timer: 65 | self.timer.cancel() 66 | self.ts_end = time.strftime("%Y-%m-%d %H:%M:%S") 67 | logger.info('subprocess finished') 68 | logger.info('run_step started at ' + self.ts_start + ' and ended at ' + self.ts_end) 69 | 70 | def kill(self): 71 | """ 72 | If run_step needs to be killed, this method will be called 73 | :return: None 74 | """ 75 | try: 76 | logger.info('Trying to terminating run_step...') 77 | self.process.terminate() 78 | time_waited_seconds = 0 79 | while self.process.poll() is None and time_waited_seconds < CONSTANTS.SECONDS_TO_KILL_AFTER_SIGTERM: 80 | time.sleep(0.5) 81 | time_waited_seconds += 0.5 82 | if self.process.poll() is None: 83 | self.process.kill() 84 | logger.warning('Waited %d seconds for run_step to terminate. Killing now....', CONSTANTS.SECONDS_TO_KILL_AFTER_SIGTERM) 85 | except OSError, e: 86 | logger.error('Error while trying to kill the subprocess: %s', e) 87 | -------------------------------------------------------------------------------- /test/matplotlib/test_matplotlib.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import logging 19 | import os 20 | import glob 21 | import sys 22 | from nose import * 23 | 24 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'src'))) 25 | 26 | import naarad.graphing.matplotlib_naarad as mpl_n 27 | from naarad.graphing.plot_data import PlotData as PlotData 28 | logger = logging.getLogger('naarad') 29 | 30 | 31 | def validate(filename): 32 | if os.path.exists(filename): 33 | if not os.path.getsize(filename): 34 | return False 35 | else: 36 | return True 37 | else: 38 | return False 39 | 40 | 41 | def plot(list_of_plots, output_file, scenario_name, validation): 42 | mpl_n.graph_data(list_of_plots, '.', output_file) 43 | assert validate(output_file + '.png') == validation 44 | 45 | 46 | def init_logging(log_level): 47 | log_file = 'test_matplotlib.log' 48 | # clear the log file 49 | with open(log_file, 'w'): 50 | pass 51 | numeric_level = getattr(logging, log_level.upper(), None) if log_level else logging.INFO 52 | if not isinstance(numeric_level, int): 53 | raise ValueError('Invalid log level: %s' % log_level) 54 | logger.setLevel(logging.DEBUG) 55 | fh = logging.FileHandler(log_file) 56 | fh.setLevel(logging.DEBUG) 57 | ch = logging.StreamHandler() 58 | ch.setLevel(numeric_level) 59 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 60 | fh.setFormatter(formatter) 61 | ch.setFormatter(formatter) 62 | logger.addHandler(fh) 63 | logger.addHandler(ch) 64 | 65 | 66 | def setup_matplotlib_tests(): 67 | init_logging('INFO') 68 | png_list = glob.glob('test*.png') 69 | for png in png_list: 70 | os.remove(png) 71 | logger.info('Deleting : %s', png) 72 | 73 | 74 | # Test Cases 75 | @with_setup(setup_matplotlib_tests()) 76 | def test_generator(): 77 | # Test Data 78 | plot1 = PlotData('test1.csv', 1, 'GC Commit', 'MB', None, 600, 1200, 'line') 79 | plot2 = PlotData('test2.csv', 1, 'GC CMS Pause', 'seconds', None, 600, 1200, 'line') 80 | plot3 = PlotData('test3.csv', 1, 'GC Promo', 'bps', None, 600, 1200, 'line') 81 | plot4 = PlotData('test4.csv', 1, 'GC Promo', 'bps', None, 600, 1200, 'line') 82 | plot5 = PlotData('test5.csv', 1, 'GC Promo', 'bps', None, 600, 1200, 'line') 83 | yield plot, [plot1], 'test1', 'single plot with all valid csv', True 84 | yield plot, [plot1, plot2], 'test2', 'dual plot with all valid csv', True 85 | yield plot, [plot1, plot2, plot3], 'test3', 'multi plot with all valid csv', True 86 | yield plot, [plot4], 'test4', 'single plot with 1 empty csv', False 87 | yield plot, [plot4, plot1], 'test5', 'dual plot with 1 empty csv', True 88 | yield plot, [plot1, plot4, plot3], 'test6', 'multi plot with 1 empty csv', True 89 | yield plot, [plot5], 'test7', 'single plot with 1 non-existent csv', False 90 | yield plot, [plot5, plot1], 'test8', 'dual plot with 1 non-existent csv', True 91 | yield plot, [plot1, plot5, plot3], 'test9', 'multi plot with 1 non-existent csv', True 92 | -------------------------------------------------------------------------------- /lib/luminol/demo/src/rca.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | 13 | from collections import defaultdict 14 | import os 15 | import sys 16 | 17 | from luminol import utils, exceptions 18 | from luminol.anomaly_detector import AnomalyDetector 19 | from luminol.correlator import Correlator 20 | from luminol.modules.correlation_result import CorrelationResult 21 | from luminol.modules.time_series import TimeSeries 22 | 23 | 24 | class RCA(object): 25 | def __init__(self, metrix, related_metrices): 26 | """ 27 | Initializer 28 | :param metrix: a TimeSeries, a dictionary or a path to a csv file(str) 29 | :param list related_metrixes: a list of time series. 30 | """ 31 | self.metrix = self._load(metrix) 32 | self.anomaly_detector = AnomalyDetector(metrix) 33 | self.related_metrices = related_metrices 34 | self.anomalies = self.anomaly_detector.get_anomalies() 35 | self._analyze() 36 | 37 | def _load(self, metrix): 38 | """ 39 | Load time series. 40 | :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str). 41 | :return TimeSeries: a TimeSeries object. 42 | """ 43 | if isinstance(metrix, TimeSeries): 44 | return metrix 45 | if isinstance(metrix, dict): 46 | return TimeSeries(metrix) 47 | return TimeSeries(utils.read_csv(metrix)) 48 | 49 | def _analyze(self): 50 | """ 51 | Analyzes if a matrix has anomalies. 52 | If any anomaly is found, determine if the matrix correlates with any other matrixes. 53 | To be implemented. 54 | """ 55 | output = defaultdict(list) 56 | output_by_name = defaultdict(list) 57 | scores = self.anomaly_detector.get_all_scores() 58 | 59 | if self.anomalies: 60 | for anomaly in self.anomalies: 61 | metrix_scores = scores 62 | start_t, end_t = anomaly.get_time_window() 63 | t = anomaly.exact_timestamp 64 | 65 | # Compute extended start timestamp and extended end timestamp. 66 | room = (end_t - start_t) / 2 67 | if not room: 68 | room = 30 69 | extended_start_t = start_t - room 70 | extended_end_t = end_t + room 71 | metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t) 72 | 73 | # Adjust the two timestamps if not enough data points are included. 74 | while len(metrix_scores_cropped) < 2: 75 | extended_start_t = extended_start_t - room 76 | extended_end_t = extended_end_t + room 77 | metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t) 78 | 79 | # Correlate with other metrics 80 | for entry in self.related_metrices: 81 | try: 82 | entry_correlation_result = Correlator(self.metrix, entry, time_period=(extended_start_t, extended_end_t), 83 | use_anomaly_score=True).get_correlation_result() 84 | record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry 85 | record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__ 86 | output[t].append(record) 87 | output_by_name[entry].append(record_by_name) 88 | except exceptions.NotEnoughDataPoints: 89 | pass 90 | 91 | self.output = output 92 | self.output_by_name = output_by_name 93 | -------------------------------------------------------------------------------- /src/naarad/resources/default_client_charting_page.html: -------------------------------------------------------------------------------- 1 | 2 | 35 | 36 |
37 |
38 |
39 |
40 | 46 |
47 |
48 | 54 |
55 |
56 |
57 | 58 |
59 | 60 | 61 |
62 |
63 | 64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 | 73 |
74 |
75 |

Link to This Page

76 |
77 |
78 |
79 | 80 |
81 |
82 |
83 |
84 |
85 |
86 | -------------------------------------------------------------------------------- /src/naarad/graphing/dygraphs.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import os 19 | import random 20 | import logging 21 | 22 | logger = logging.getLogger('naarad.graphing.dygraphs') 23 | 24 | 25 | def graph_csv(output_directory, resource_path, csv_file, plot_title, output_filename, y_label=None, precision=None, graph_height="600", graph_width="1500"): 26 | """ Single metric graphing function """ 27 | if not os.path.getsize(csv_file): 28 | return False, "" 29 | y_label = y_label or plot_title 30 | div_id = str(random.random()) 31 | div_string = "
" % (div_id, graph_width, graph_height) 32 | script_string = """""" 55 | 56 | with open(os.path.join(output_directory, output_filename + '.div'), 'w') as div_file: 57 | div_file.write(div_string + script_string) 58 | # TODO(ritesh): Also generate PNGs if someone needs them separately 59 | return True, os.path.join(output_directory, output_filename + '.div') 60 | 61 | 62 | def graph_data(list_of_plots, output_directory, resource_path, output_filename): 63 | if len(list_of_plots) > 0: 64 | plot = list_of_plots[0] 65 | success, div_file = graph_csv(output_directory=output_directory, resource_path=resource_path, csv_file=plot.input_csv, plot_title=plot.graph_title, 66 | output_filename=output_filename, y_label=plot.y_label, precision=None, graph_height=plot.graph_height, 67 | graph_width=plot.graph_width) 68 | if len(list_of_plots) > 1: 69 | logger.warning('dygraph module currently does not support co-relation of multiple plots. Only plotting %s', plot.graph_title) 70 | return success, div_file 71 | else: 72 | return False, None 73 | 74 | 75 | def graph_data_on_the_same_graph(list_of_plots, output_directory, resource_path, output_filename): 76 | """ 77 | graph_data_on_the_same_graph: put a list of plots on the same graph: currently it supports CDF 78 | """ 79 | logger.warning('graph_data_on_the_same_graph is currently not supported in dygraph') 80 | return False, None 81 | -------------------------------------------------------------------------------- /examples/logs/stats.out: -------------------------------------------------------------------------------- 1 | QPS Commit_PS Rollback_Commit Write_Commit R_W_Ratio Opens_PS Table_Cache_Used Threads_PS Thread_Cache_Used CXN_Used_Ever CXN_Used_Now 2 | 134.5 19.19 1 1 0 0 0 0 0 6.1 6.1 3 | 134.61 19.21 1 1 0 0 0 0 0 6.1 6.1 4 | 134.71 19.22 1 1 0 0 0 0 0 6.1 6.1 5 | 134.81 19.24 1 1 0 0 0 0 0 6.2 6.2 6 | 134.91 19.25 1 1 0 0 0 0 0 6.2 6.2 7 | 135.01 19.27 1 1 0 0 0 0 0 6.2 6.2 8 | 135.11 19.28 1 1 0 0 0 0 0 6.2 6.2 9 | 135.21 19.3 1 1 0 0 0 0 0 6.2 6.2 10 | 135.32 19.31 1 1 0 0 0 0 0 6.2 6.2 11 | 135.41 19.32 1 1 0 0 0 0 0 6.2 6.2 12 | 135.49 19.33 1 1 0 0 0 0 0 6.2 6.2 13 | 135.61 19.35 1 1 0 0 0 0 0 6.2 6.2 14 | 135.72 19.37 1 1 0 0 0 0 0 6.2 6.2 15 | 135.83 19.38 1 1 0 0 0 0 0 6.2 6.2 16 | 135.93 19.4 1 1 0 0 0 0 0 6.2 6.2 17 | 136.03 19.41 1 1 0 0 0 0 0 6.2 6.2 18 | 136.13 19.43 1 1 0 0 0 0 0 6.2 6.2 19 | 136.24 19.44 1 1 0 0 0 0 0 6.2 6.2 20 | 136.34 19.46 1 1 0 0 0 0 0 6.2 6.2 21 | 136.44 19.47 1 1 0 0 0 0 0 6.2 6.2 22 | 136.54 19.48 1 1 0 0 0 0 0 6.2 6.2 23 | 136.65 19.5 1 1 0 0 0 0 0 6.2 6.2 24 | 136.68 19.5 1 1 0 0 0 0 0 6.2 6.2 25 | 136.79 19.52 1 1 0 0 0 0 0 6.2 6.1 26 | 136.92 19.54 1 1 0 0 0 0 0 6.2 6.2 27 | 136.99 19.55 1 1 0 0 0 0 0 6.2 6.2 28 | 137.11 19.57 1 1 0 0 0 0 0 6.2 6.2 29 | 137.22 19.58 1 1 0 0 0 0 0 6.2 6.2 30 | 137.34 19.6 1 1 0 0 0 0 0 6.2 6.2 31 | 137.46 19.62 1 1 0 0 0 0 0 6.2 6.2 32 | 137.56 19.63 1 1 0 0 0 0 0 6.2 6.2 33 | 137.67 19.65 1 1 0 0 0 0 0 6.2 6.2 34 | 137.77 19.66 1 1 0 0 0 0 0 6.2 6.2 35 | 137.87 19.67 1 1 0 0 0 0 0 6.2 6.2 36 | 137.97 19.69 1 1 0 0 0 0 0 6.2 6.2 37 | 138.07 19.7 1 1 0 0 0 0 0 6.2 6.2 38 | 138.18 19.72 1 1 0 0 0 0 0 6.2 6.2 39 | 138.28 19.73 1 1 0 0 0 0 0 6.2 6.2 40 | 138.38 19.75 1 1 0 0 0 0 0 6.2 6.2 41 | 138.48 19.76 1 1 0 0 0 0 0 6.2 6.2 42 | 138.58 19.78 1 1 0 0 0 0 0 6.2 6.2 43 | 138.68 19.79 1 1 0 0 0 0 0 6.2 6.2 44 | 138.79 19.81 1 1 0 0 0 0 0 6.2 6.2 45 | 138.89 19.82 1 1 0 0 0 0 0 6.2 6.2 46 | 138.99 19.83 1 1 0 0 0 0 0 6.2 6.2 47 | 139.09 19.85 1 1 0 0 0 0 0 6.2 6.2 48 | 139.19 19.86 1 1 0 0 0 0 0 6.2 6.2 49 | 139.3 19.88 1 1 0 0 0 0 0 6.2 6.2 50 | 139.38 19.89 1 1 0 0 0 0 0 6.2 6.2 51 | 139.5 19.91 1 1 0 0 0 0 0 6.2 6.2 52 | 139.6 19.92 1 1 0 0 0 0 0 6.2 6.2 53 | 139.71 19.94 1 1 0 0 0 0 0 6.2 6.2 54 | 139.81 19.95 1 1 0 0 0 0 0 6.2 6.2 55 | 139.91 19.97 1 1 0 0 0 0 0 6.2 6.2 56 | 140.01 19.98 1 1 0 0 0 0 0 6.2 6.2 57 | 140.11 19.99 1 1 0 0 0 0 0 6.2 6.2 58 | 140.22 20.01 1 1 0 0 0 0 0 6.2 6.2 59 | 140.32 20.02 1 1 0 0 0 0 0 6.2 6.2 60 | 140.42 20.04 1 1 0 0 0 0 0 6.2 6.2 61 | 140.52 20.05 1 1 0 0 0 0 0 6.2 6.2 62 | 140.62 20.07 1 1 0 0 0 0 0 6.2 6.2 63 | 140.72 20.08 1 1 0 0 0 0 0 6.2 6.2 64 | 140.83 20.1 1 1 0 0 0 0 0 6.2 6.2 65 | 140.93 20.11 1 1 0 0 0 0 0 6.2 6.2 66 | 141.03 20.13 1 1 0 0 0 0 0 6.2 6.2 67 | 141.13 20.14 1 1 0 0 0 0 0 6.2 6.2 68 | 141.23 20.15 1 1 0 0 0 0 0 6.2 6.2 69 | 141.34 20.17 1 1 0 0 0 0 0 6.2 6.2 70 | 141.42 20.18 1 1 0 0 0 0 0 6.2 6.2 71 | 141.53 20.2 1 1 0 0 0 0 0 6.2 6.2 72 | 141.64 20.21 1 1 0 0 0 0 0 6.2 6.2 73 | 141.73 20.23 1 1 0 0 0 0 0 6.2 6.2 74 | 141.84 20.24 1 1 0 0 0 0 0 6.2 6.2 75 | 141.95 20.26 1 1 0 0 0 0 0 6.2 6.2 76 | 142.05 20.27 1 1 0 0 0 0 0 6.2 6.2 77 | 142.15 20.29 1 1 0 0 0 0 0 6.2 6.2 78 | 142.25 20.3 1 1 0 0 0 0 0 6.2 6.2 79 | 142.36 20.31 1 1 0 0 0 0 0 6.2 6.2 80 | 142.46 20.33 1 1 0 0 0 0 0 6.2 6.2 81 | 142.56 20.34 1 1 0 0 0 0 0 6.2 6.2 82 | 142.66 20.36 1 1 0 0 0 0 0 6.2 6.2 83 | 142.76 20.37 1 1 0 0 0 0 0 6.2 6.2 84 | 142.87 20.39 1 1 0 0 0 0 0 6.2 6.2 85 | 142.97 20.4 1 1 0 0 0 0 0 6.2 6.2 86 | 143.07 20.42 1 1 0 0 0 0 0 6.2 6.2 87 | 143.17 20.43 1 1 0 0 0 0 0 6.2 6.1 88 | 143.28 20.45 1 1 0 0 0 0 0 6.2 6.2 89 | 143.37 20.46 1 1 0 0 0 0 0 6.2 6.2 90 | 143.48 20.48 1 1 0 0 0 0 0 6.2 6.2 91 | 143.58 20.49 1 1 0 0 0 0 0 6.2 6.2 92 | 143.68 20.5 1 1 0 0 0 0 0 6.2 6.2 93 | 143.79 20.52 1 1 0 0 0 0 0 6.2 6.2 94 | 143.89 20.53 1 1 0 0 0 0 0 6.2 6.2 95 | 143.99 20.55 1 1 0 0 0 0 0 6.2 6.2 96 | 144.09 20.56 1 1 0 0 0 0 0 6.2 6.2 97 | 144.17 20.57 1 1 0 0 0 0 0 6.2 6.2 98 | 144.18 20.57 1 1 0 0 0 0 0 6.2 6.2 99 | 144.18 20.58 1 1 0 0 0 0 0 6.2 6.2 100 | 144.27 20.59 1 1 0 0 0 0 0 6.2 6.2 101 | 144.28 20.59 1 1 0 0 0 0 0 6.2 5.4 102 | 103 | 104 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/anomaly_detector_algorithms/exp_avg_detector.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | import numpy 13 | 14 | from luminol import utils 15 | from luminol.algorithms.anomaly_detector_algorithms import AnomalyDetectorAlgorithm 16 | from luminol.constants import * 17 | from luminol.modules.time_series import TimeSeries 18 | 19 | 20 | class ExpAvgDetector(AnomalyDetectorAlgorithm): 21 | 22 | """ 23 | Exponential Moving Average. 24 | This method uses a data point's deviation from the exponential moving average of a lagging window 25 | to determine its anomaly score. 26 | """ 27 | def __init__(self, time_series, baseline_time_series=None, smoothing_factor=None, use_lag_window=False, lag_window_size=None): 28 | """ 29 | Initializer 30 | :param TimeSeries time_series: a TimeSeries object. 31 | :param TimeSeries baseline_time_series: baseline TimeSeries. 32 | :param float smoothing_factor: smoothing factor for computing exponential moving average. 33 | :param int lag_window_size: lagging window size. 34 | """ 35 | super(ExpAvgDetector, self).__init__(self.__class__.__name__, time_series, baseline_time_series) 36 | self.use_lag_window = use_lag_window 37 | self.smoothing_factor = smoothing_factor if smoothing_factor > 0 else DEFAULT_EMA_SMOOTHING_FACTOR 38 | self.lag_window_size = lag_window_size if lag_window_size else int(self.time_series_length * DEFAULT_EMA_WINDOW_SIZE_PCT) 39 | self.time_series_items = self.time_series.items() 40 | 41 | def _compute_anom_score(self, lag_window_points, point): 42 | """ 43 | Compute anomaly score for a single data point. 44 | Anomaly score for a single data point(t,v) equals: abs(v - ema(lagging window)). 45 | :param list lag_window_points: values in the lagging window. 46 | :param float point: data point value. 47 | :return float: the anomaly score. 48 | """ 49 | ema = utils.compute_ema(self.smoothing_factor, lag_window_points)[-1] 50 | return abs(point - ema) 51 | 52 | def _compute_anom_data_using_window(self): 53 | """ 54 | Compute anomaly scores using a lagging window. 55 | """ 56 | anom_scores = {} 57 | values = self.time_series.values 58 | stdev = numpy.std(values) 59 | for i, (timestamp, value) in enumerate(self.time_series_items): 60 | if i < self.lag_window_size: 61 | anom_score = self._compute_anom_score(values[:i + 1], value) 62 | else: 63 | anom_score = self._compute_anom_score(values[i - self.lag_window_size: i + 1], value) 64 | if stdev: 65 | anom_scores[timestamp] = anom_score / stdev 66 | else: 67 | anom_scores[timestamp] = anom_score 68 | self.anom_scores = TimeSeries(self._denoise_scores(anom_scores)) 69 | 70 | def _compute_anom_data_decay_all(self): 71 | """ 72 | Compute anomaly scores using a lagging window covering all the data points before. 73 | """ 74 | anom_scores = {} 75 | values = self.time_series.values 76 | ema = utils.compute_ema(self.smoothing_factor, values) 77 | stdev = numpy.std(values) 78 | for i, (timestamp, value) in enumerate(self.time_series_items): 79 | anom_score = abs((value - ema[i]) / stdev) if stdev else value - ema[i] 80 | anom_scores[timestamp] = anom_score 81 | self.anom_scores = TimeSeries(self._denoise_scores(anom_scores)) 82 | 83 | def _set_scores(self): 84 | """ 85 | Compute anomaly scores for the time series. 86 | Currently uses a lagging window covering all the data points before. 87 | """ 88 | if self.use_lag_window: 89 | self._compute_anom_data_using_window() 90 | self._compute_anom_data_decay_all() 91 | -------------------------------------------------------------------------------- /test/test_cluster_metric.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import os 19 | import nose 20 | import sys 21 | import uuid 22 | import shutil 23 | import time 24 | import sys 25 | 26 | # add the path of ~/naarad/src; the testing py is under ~/naarad/test 27 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'src'))) 28 | import naarad.utils 29 | from naarad.metrics.sar_metric import SARMetric 30 | from naarad.metrics.cluster_metric import ClusterMetric 31 | 32 | # the temporary directory for testing, will remove it after done. 33 | tmp_dir = '' 34 | 35 | 36 | def setup(): 37 | create_tmp_dir() 38 | 39 | 40 | def teardown(): 41 | delete_tmp_dir() 42 | 43 | 44 | def create_tmp_dir(): 45 | '''create a unique tmp dir to hold the downloaded local files''' 46 | ''' if the tmp dir grenerated already exists, then simply return''' 47 | ''' the user simply try again to generate another unique tmp dir''' 48 | global tmp_dir 49 | tmp_dir = os.path.join('./', 'tmp' + '.' + str(uuid.uuid4())) # ./tmp.'randomstring' 50 | if not os.path.exists(tmp_dir): 51 | os.makedirs(tmp_dir) 52 | else: 53 | print "the path of %s already exists, please try again." % tmp_dir 54 | return 55 | 56 | 57 | def delete_tmp_dir(): 58 | '''delete the tmp directory''' 59 | global tmp_dir 60 | shutil.rmtree(tmp_dir) 61 | 62 | 63 | def test_clustermetric(): 64 | # construct 2 SARMetric 65 | metric1 = SARMetric('SAR-cpuusage-host1', 'sar.cpuusage.out', 'host1', '.', 'logs', 'SAR-cpuusage-host1', None, None, {}, None, None) 66 | metric1.csv_column_map['logs/SAR-cpuusage-host1.all.percent-sys.csv'] = 'all.%sys' 67 | metric1.column_csv_map['all.%sys'] = 'logs/SAR-cpuusage-host1.all.percent-sys.csv' 68 | 69 | metric2 = SARMetric('SAR-cpuusage-host2', 'sar.cpuusage.out', 'host2', '.', 'logs', 'SAR-cpuusage-host2', None, None, {}, None, None) 70 | metric2.csv_column_map['logs/SAR-cpuusage-host2.all.percent-sys.csv'] = 'all.%sys' 71 | metric2.column_csv_map['all.%sys'] = 'logs/SAR-cpuusage-host2.all.percent-sys.csv' 72 | 73 | # construct a ClusterMetric 74 | aggregate_metrics = 'SAR-cpuusage.all.percent-sys:raw,avg,sum,count' 75 | section = 'CLUSTER-cpuusage-1' 76 | label = 'CLUSTER-cpuusage-1' 77 | resource_path = 'resources' 78 | rule_strings = {} 79 | output_directory = tmp_dir 80 | aggregate_hosts = 'host1 host2' 81 | other_options = {} 82 | ts_start = None 83 | ts_end = None 84 | metrics = [metric1, metric2] 85 | 86 | cur_metric = ClusterMetric(section, aggregate_hosts, aggregate_metrics, metrics, output_directory, resource_path, label, ts_start, ts_end, rule_strings, 87 | None, None) 88 | 89 | # create sub-directory of resource_path 90 | sub_dir = os.path.join(output_directory, resource_path) 91 | if not os.path.exists(sub_dir): 92 | os.makedirs(sub_dir) 93 | 94 | # the only method to test; it will write to the directory the final csv files; 95 | cur_metric.collect() 96 | 97 | # check the existance of the output files 98 | functions = aggregate_metrics.split(':') 99 | prefix = functions[0].split('.') # 'SAR-cpuusage.all.percent-sys' 100 | prefix[0] = section 101 | prefix = '.'.join(prefix) # CLUSTER-cpuusage-1.all.percent-sys 102 | 103 | for func in functions[1].split(','): # 'raw,avg,sum,count' 104 | file_name = prefix + '.' + func + '.csv' 105 | file_path = os.path.join(sub_dir, file_name) 106 | # print 'file to check = ' + file_path # resources/CLUSTER-cpuusage-1.all.percent-sys.raw.csv 107 | assert os.path.exists(file_path) 108 | 109 | if __name__ == '__main__': 110 | test_clustermetric() 111 | -------------------------------------------------------------------------------- /src/naarad/resources/default_summary_page.html: -------------------------------------------------------------------------------- 1 | 2 | 33 | 34 | 35 |
36 |
37 |
38 |
39 | {%- for metric in metric_list %} 40 | {%- if metric.status == 1 %} 41 |
42 |
43 |

{{ metric.label }} Failed SLA Summary

44 |
45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | {%- for sla in metric.sla_list %} 55 | {%- if sla.sla_passed == False %} 56 | 57 | 58 | 59 | 60 | 61 | {%- endif %} 62 | {%- endfor %} 63 | 64 | 65 |
sub_metricsla rulevalue
{{ sla.sub_metric }}{{ sla.stat_name }} {{ sla.sla_type }} {{ sla.threshold}}{{ sla.stat_value }}
66 |
67 |
68 |
69 | {%- endif %} 70 | {%- endfor %} 71 |
72 |
73 | {%- if summary_html_content %} 74 | 75 | 89 | 90 | {{ summary_html_content }} 91 | {%- endif %} 92 | {%- if overlaid_plot_content %} 93 |
94 |
95 |

Overlaid Metric Plots

96 | {{ overlaid_plot_content }} 97 |
98 |
99 | {%- endif %} 100 |
101 |
102 | 103 | -------------------------------------------------------------------------------- /src/naarad/metrics/procvmstat_metric.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | from collections import defaultdict 19 | import datetime 20 | import gc 21 | import logging 22 | import os 23 | import re 24 | import numpy 25 | from naarad.metrics.metric import Metric 26 | import naarad.utils 27 | 28 | logger = logging.getLogger('naarad.metrics.ProcVmstatMetric') 29 | 30 | 31 | class ProcVmstatMetric(Metric): 32 | """ 33 | logs of /proc/vmstat 34 | The raw log file is assumed to have a timestamp prefix of all lines. E.g. in the format of "2013-01-02 03:55:22.13456 compact_fail 36" 35 | The log lines can be generated by 'cat /proc/vmstat | sed "s/^/$(date +%Y-%m-%d\ %H:%M:%S.%05N)\t/" ' 36 | """ 37 | 38 | def __init__(self, metric_type, infile_list, hostname, aggr_metrics, output_directory, resource_path, label, ts_start, ts_end, 39 | rule_strings, important_sub_metrics, anomaly_detection_metrics, **other_options): 40 | Metric.__init__(self, metric_type, infile_list, hostname, aggr_metrics, output_directory, resource_path, label, ts_start, ts_end, 41 | rule_strings, important_sub_metrics, anomaly_detection_metrics) 42 | 43 | self.sub_metrics = None 44 | # in particular, Section can specify a subset of all rows (default has 86 rows): "sub_metrics=nr_free_pages nr_inactive_anon" 45 | 46 | for (key, val) in other_options.iteritems(): 47 | setattr(self, key, val.split()) 48 | 49 | self.sub_metric_description = { 50 | 'nr_free_pages': 'Number of free pages', 51 | 'nr_inactive_anon': 'Number of inactive anonymous pages', 52 | 'nr_active_anon': 'Number of active anonymous pages', 53 | 'nr_inactive_file': 'Number of inactive file pages', 54 | 'nr_active_file': 'Number of active file pages', 55 | } 56 | 57 | def parse(self): 58 | """ 59 | Parse the vmstat file 60 | :return: status of the metric parse 61 | """ 62 | file_status = True 63 | for input_file in self.infile_list: 64 | file_status = file_status and naarad.utils.is_valid_file(input_file) 65 | if not file_status: 66 | return False 67 | status = True 68 | data = {} # stores the data of each column 69 | for input_file in self.infile_list: 70 | logger.info('Processing : %s', input_file) 71 | timestamp_format = None 72 | with open(input_file) as fh: 73 | for line in fh: 74 | words = line.split() # [0] is day; [1] is seconds; [2] is field name; [3] is value 75 | if len(words) < 3: 76 | continue 77 | ts = words[0] + " " + words[1] 78 | if not timestamp_format or timestamp_format == 'unknown': 79 | timestamp_format = naarad.utils.detect_timestamp_format(ts) 80 | if timestamp_format == 'unknown': 81 | continue 82 | ts = naarad.utils.get_standardized_timestamp(ts, timestamp_format) 83 | if self.ts_out_of_range(ts): 84 | continue 85 | col = words[2] 86 | # if sub_metrics is specified, only process those specified in config. 87 | if self.sub_metrics and col not in self.sub_metrics: 88 | continue 89 | self.sub_metric_unit[col] = 'pages' # The unit of the metric. For /proc/vmstat, they are all in pages 90 | if col in self.column_csv_map: 91 | out_csv = self.column_csv_map[col] 92 | else: 93 | out_csv = self.get_csv(col) # column_csv_map[] is assigned in get_csv() 94 | data[out_csv] = [] 95 | data[out_csv].append(ts + "," + words[3]) 96 | # post processing, putting data in csv files; 97 | for csv in data.keys(): 98 | self.csv_files.append(csv) 99 | with open(csv, 'w') as fh: 100 | fh.write('\n'.join(data[csv])) 101 | return status 102 | -------------------------------------------------------------------------------- /lib/luminol/demo/src/static/inversion.css: -------------------------------------------------------------------------------- 1 | /* 2 | © 2013 LinkedIn Corp. All rights reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 4 | 5 | Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 6 | */ 7 | html, body { 8 | background-color: #f0f0f0; 9 | } 10 | 11 | .topbar { border-top: 5px solid #009cc7; height: 60px; } 12 | 13 | .topbar-inner, .topbar .navbar-inner { 14 | height: 60px; 15 | background-image: -khtml-gradient(linear, left top, left bottom, from(#474747), to(#363636)); 16 | background-image: -moz-linear-gradient(top, #474747, #363636); 17 | background-image: -ms-linear-gradient(top, #474747, #363636); 18 | background-image: -webkit-gradient(linear, left top, left bottom, color-stop(0%, #474747), color-stop(100%, #363636)); 19 | background-image: -webkit-linear-gradient(top, #474747, #363636); 20 | background-image: -o-linear-gradient(top, #474747, #363636); 21 | background-image: linear-gradient(top, #474747, #363636); 22 | filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#474747', endColorstr='#363636', GradientType=0); 23 | -moz-box-shadow: 1px 2px 5px 0px rgba(0, 0, 0, 0.2); 24 | -webkit-box-shadow: 1px 2px 5px 0px rgba(0, 0, 0, 0.2); 25 | box-shadow: 1px 2px 5px 0px rgba(0, 0, 0, 0.2); 26 | } 27 | 28 | .topbar-content { margin: 0 -20px; } 29 | 30 | .topbar a.brand, .navbar .navbar-text a { 31 | color: #009cc7; 32 | } 33 | .topbar .brand { 34 | padding: 18px 20px 22px 60px; 35 | font-weight: bold; 36 | background: url() no-repeat 20px 12px; 37 | } 38 | 39 | .topbar .login { padding: 10px 0; } 40 | 41 | .topbar .nav > li > a { padding: 20px 10px 21px; } 42 | .topbar .nav > li.active > a { 43 | padding: 19px 9px 20px; 44 | border: 1px solid #2b2b2b; 45 | background: #333 !important; 46 | } 47 | .topbar .nav > li.active > a:hover { border-color: #2b2b2b; } 48 | .topbar .nav > li.active > a span { border-bottom: 1px solid #009cc7; } 49 | 50 | .topbar .dropdown-menu { top: 60px; } 51 | 52 | .container > .content { 53 | border: 1px solid #cfcfcf; 54 | background-color: #fff; 55 | padding: 20px; 56 | margin: 0 -21px; /* negative indent the amount of the padding to maintain the grid system */ 57 | } 58 | 59 | /* table contextual styles backported from Bootstrap 3 */ 60 | .table > thead > tr > td.success, 61 | .table > tbody > tr > td.success, 62 | .table > tfoot > tr > td.success, 63 | .table > thead > tr > th.success, 64 | .table > tbody > tr > th.success, 65 | .table > tfoot > tr > th.success, 66 | .table > thead > tr.success > td, 67 | .table > tbody > tr.success > td, 68 | .table > tfoot > tr.success > td, 69 | .table > thead > tr.success > th, 70 | .table > tbody > tr.success > th, 71 | .table > tfoot > tr.success > th { 72 | background-color: #dff0d8; 73 | } 74 | 75 | .table-hover > tbody > tr > td.success:hover, 76 | .table-hover > tbody > tr > th.success:hover, 77 | .table-hover > tbody > tr.success:hover > td, 78 | .table-hover > tbody > tr.success:hover > th { 79 | background-color: #d0e9c6; 80 | } 81 | 82 | .table > thead > tr > td.danger, 83 | .table > tbody > tr > td.danger, 84 | .table > tfoot > tr > td.danger, 85 | .table > thead > tr > th.danger, 86 | .table > tbody > tr > th.danger, 87 | .table > tfoot > tr > th.danger, 88 | .table > thead > tr.danger > td, 89 | .table > tbody > tr.danger > td, 90 | .table > tfoot > tr.danger > td, 91 | .table > thead > tr.danger > th, 92 | .table > tbody > tr.danger > th, 93 | .table > tfoot > tr.danger > th { 94 | background-color: #f2dede; 95 | } 96 | .table-hover > tbody > tr > td.danger:hover, 97 | .table-hover > tbody > tr > th.danger:hover, 98 | .table-hover > tbody > tr.danger:hover > td, 99 | .table-hover > tbody > tr.danger:hover > th { 100 | background-color: #ebcccc; 101 | } 102 | 103 | -------------------------------------------------------------------------------- /test/test_httpdownload.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import nose 19 | from nose.plugins.attrib import attr 20 | import os 21 | import shutil 22 | import sys 23 | import time 24 | import uuid 25 | 26 | # add the path of ~/naarad/src; the testing py is under ~/naarad/test 27 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'src'))) 28 | 29 | import naarad.httpdownload 30 | 31 | # the port of local http server 32 | port_test = 8011 33 | 34 | # the temporary directory for testing, will remove it after done. 35 | tmp_dir = '' 36 | 37 | # the testing download file (will be hosted from local http server) 38 | test_input_file = 'bin/naarad' 39 | 40 | 41 | def setup(): 42 | start_http_server() 43 | create_tmp_dir() 44 | 45 | 46 | def teardown(): 47 | kill_http_server() 48 | delete_tmp_dir() 49 | 50 | 51 | def start_http_server(): 52 | '''start a local http server for testing''' 53 | global port_test 54 | command = 'python -m SimpleHTTPServer %s &' % port_test 55 | os.system(command) 56 | time.sleep(1) 57 | 58 | 59 | def kill_http_server(): 60 | '''After testing, kill the local http server''' 61 | command = 'pkill -f SimpleHTTPServer' 62 | os.system(command) 63 | time.sleep(1) 64 | 65 | 66 | def create_tmp_dir(): 67 | '''create a unique tmp dir to hold the downloaded local files''' 68 | ''' if the tmp dir grenerated already exists, then simply return''' 69 | ''' the user simply try again to generate another unique tmp dir''' 70 | global tmp_dir 71 | tmp_dir = os.path.join('./tmp/', str(uuid.uuid4())) 72 | if not os.path.exists(tmp_dir): 73 | os.makedirs(tmp_dir) 74 | else: 75 | print "the path of %s already exists, please try again." % tmp_dir 76 | return 77 | 78 | 79 | def delete_tmp_dir(): 80 | '''delete the tmp directory''' 81 | global tmp_dir 82 | shutil.rmtree(tmp_dir) 83 | 84 | 85 | @attr('local') 86 | def test_list_of_urls_no_output(): 87 | ''' list of abosulute urls with no output file name''' 88 | global tmp_dir 89 | url = "http://localhost:8011/bin/naarad" 90 | outdir = tmp_dir 91 | 92 | if os.path.exists(os.path.join(outdir, "naarad")): 93 | os.remove(os.path.join(outdir, "naarad")) 94 | 95 | output_file = naarad.httpdownload.download_url_single(url, outdir) 96 | 97 | assert os.path.exists(output_file), "File of %s does not exist! " % output_file 98 | 99 | if os.path.exists(os.path.join(outdir, "naarad")): 100 | os.remove(os.path.join(outdir, "naarad")) 101 | 102 | 103 | @attr('local') 104 | def test_list_of_urls_with_output(): 105 | ''' list of abosulute urls with output file name given''' 106 | global tmp_dir 107 | 108 | url = "http://localhost:8011/bin/naarad" 109 | outfile = "naarad.tmp" 110 | outdir = tmp_dir 111 | 112 | if os.path.exists(os.path.join(outdir, "1a.html")): 113 | os.remove(os.path.join(outdir, "1a.html")) 114 | 115 | output_file = naarad.httpdownload.download_url_single(url, outdir, outfile) 116 | 117 | assert os.path.exists(output_file), "File of %s does not exist! " % output_file 118 | 119 | if os.path.exists(os.path.join(outdir, "1a.html")): 120 | os.remove(os.path.join(outdir, "1a.html")) 121 | 122 | 123 | @attr('local') 124 | def test_regex_urls(): 125 | '''a seeding url, and a regex expression of urls ''' 126 | global tmp_dir 127 | seed_url = "http://localhost:8011/test/httpdownload.html" 128 | outdir = tmp_dir 129 | regex = ".*" 130 | 131 | output_files = [] 132 | output_files = naarad.httpdownload.download_url_regex(seed_url, outdir, regex) 133 | 134 | print output_files 135 | print 'abc' 136 | output_file = os.path.join(outdir, 'test_httpdownload.pyc') 137 | assert os.path.exists(output_file), "File of %s does not exist! " % output_file 138 | output_file = os.path.join(outdir, 'test_httpdownload.py') 139 | assert os.path.exists(output_file), "File of %s does not exist! " % output_file 140 | -------------------------------------------------------------------------------- /src/naarad/metrics/procmeminfo_metric.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | from collections import defaultdict 19 | import datetime 20 | import gc 21 | import logging 22 | import os 23 | import re 24 | import numpy 25 | from naarad.metrics.metric import Metric 26 | import naarad.utils 27 | 28 | logger = logging.getLogger('naarad.metrics.ProcMeminfoMetric') 29 | 30 | 31 | class ProcMeminfoMetric(Metric): 32 | """ 33 | logs of /proc/vmstat 34 | The raw log file is assumed to have a timestamp prefix of all lines. E.g. in the format of "2013-01-02 03:55:22.13456 compact_fail 36" 35 | The log lines can be generated by 'cat /proc/vmstat | sed "s/^/$(date +%Y-%m-%d\ %H:%M:%S.%05N)\t/" ' 36 | """ 37 | 38 | sub_metrics = None 39 | 40 | def __init__(self, metric_type, infile_list, hostname, aggr_metrics, output_directory, resource_path, label, ts_start, ts_end, 41 | rule_strings, important_sub_metrics, anomaly_detection_metrics, **other_options): 42 | Metric.__init__(self, metric_type, infile_list, hostname, aggr_metrics, output_directory, resource_path, label, ts_start, ts_end, 43 | rule_strings, important_sub_metrics, anomaly_detection_metrics) 44 | 45 | # in particular, Section can specify a subset of all rows (default has 43 rows): "sub_metrics=nr_free_pages nr_inactive_anon" 46 | for (key, val) in other_options.iteritems(): 47 | setattr(self, key, val.split()) 48 | 49 | self.sub_metric_description = { 50 | 'MemTotal': 'Total memory in KB', 51 | 'MemFree': 'Total free memory in KB', 52 | 'Buffers': 'Size of buffers in KB', 53 | 'Cached': 'Size of page cache in KB', 54 | } 55 | 56 | def parse(self): 57 | """ 58 | Parse the vmstat file 59 | :return: status of the metric parse 60 | """ 61 | file_status = True 62 | for input_file in self.infile_list: 63 | file_status = file_status and naarad.utils.is_valid_file(input_file) 64 | if not file_status: 65 | return False 66 | status = True 67 | data = {} # stores the data of each column 68 | for input_file in self.infile_list: 69 | logger.info('Processing : %s', input_file) 70 | timestamp_format = None 71 | with open(input_file) as fh: 72 | for line in fh: 73 | words = line.split() # [0] is day; [1] is seconds; [2] is field name:; [3] is value [4] is unit 74 | if len(words) < 3: 75 | continue 76 | ts = words[0] + " " + words[1] 77 | if not timestamp_format or timestamp_format == 'unknown': 78 | timestamp_format = naarad.utils.detect_timestamp_format(ts) 79 | if timestamp_format == 'unknown': 80 | continue 81 | ts = naarad.utils.get_standardized_timestamp(ts, timestamp_format) 82 | if self.ts_out_of_range(ts): 83 | continue 84 | col = words[2].strip(':') 85 | # only process sub_metrics specified in config. 86 | if self.sub_metrics and col not in self.sub_metrics: 87 | continue 88 | # add unit to metric description; most of the metrics have 'KB'; a few others do not have unit, they are in number of pages 89 | if len(words) > 4 and words[4]: 90 | unit = words[4] 91 | else: 92 | unit = 'pages' 93 | self.sub_metric_unit[col] = unit 94 | # stores the values in data[] before finally writing out 95 | if col in self.column_csv_map: 96 | out_csv = self.column_csv_map[col] 97 | else: 98 | out_csv = self.get_csv(col) # column_csv_map[] is assigned in get_csv() 99 | data[out_csv] = [] 100 | data[out_csv].append(ts + "," + words[3]) 101 | # post processing, putting data in csv files; 102 | for csv in data.keys(): 103 | self.csv_files.append(csv) 104 | with open(csv, 'w') as fh: 105 | fh.write('\n'.join(sorted(data[csv]))) 106 | return status 107 | -------------------------------------------------------------------------------- /src/naarad/naarad_constants.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | NAARAD_LOG = 'naarad.log' 19 | 20 | # Metric Constants 21 | DEFAULT_SUMMARY_STATS = ['mean', 'std', 'p50', 'p75', 'p90', 'p95', 'p99', 'min', 'max'] 22 | important_sub_metrics_import = { 23 | 'GC': ('GCPause', 'used', 'cmsIM', 'cmsCM', 'gen0t', 'g1-pause-young', 'g1-pause-mixed', 'g1-pause-remark', 'g1-pause-cleanup'), 24 | 'LINKEDINANDROIDRUM': ('launch_time', 'nus_update_time'), 25 | 'SAR-cpuusage': ('%sys', '%usr'), 26 | 'SAR-device': ('%util', 'await'), 27 | 'JMETER': ('Overall_Summary.ResponseTime', 'Overall_Summary.DataThroughput', 'Overall_Summary.qps') 28 | } 29 | device_type_metrics = ('SAR-cpuusage', 'SAR-cpuhz', 'SAR-device', 'SAR-dev', 'SAR-edev', 'SAR-network') 30 | # Status CODES 31 | OK = 0 32 | SLA_FAILED = 1 33 | COLLECT_FAILED = 2 34 | PARSE_FAILED = 3 35 | GRAPH_FAILED = 4 36 | REPORT_FAILED = 5 37 | ERROR = 6 38 | INVALID_CONFIG = 7 39 | 40 | # Report Constants 41 | RESOURCE_PATH = 'resources' 42 | DEFAULT_REPORT_TITLE = 'naarad analysis report' 43 | STYLESHEET_INCLUDES = ['bootstrap.min.css', 'naarad.css'] 44 | JAVASCRIPT_INCLUDES = ['jquery-1.11.2.min.js', 'dygraph-combined.js', 'bootstrap.min.js', 'sorttable.js', 'naarad.js'] 45 | PLOTS_CSV_LIST_FILE = 'list.txt' 46 | CDF_PLOTS_CSV_LIST_FILE = 'cdf_list.txt' 47 | STATS_CSV_LIST_FILE = 'stats.txt' 48 | SUMMARY_REPORT_FILE = 'summary_report.html' 49 | CLIENT_CHARTING_FILE = 'report.html' 50 | DIFF_REPORT_FILE = 'diff_report.html' 51 | METRIC_REPORT_SUFFIX = '_report.html' 52 | TEMPLATE_HEADER = 'default_report_header.html' 53 | TEMPLATE_FOOTER = 'default_report_footer.html' 54 | TEMPLATE_SUMMARY_CONTENT = 'default_summary_content.html' 55 | TEMPLATE_SUMMARY_PAGE = 'default_summary_page.html' 56 | TEMPLATE_METRIC_PAGE = 'default_metric_page.html' 57 | TEMPLATE_CLIENT_CHARTING = 'default_client_charting_page.html' 58 | TEMPLATE_DIFF_CLIENT_CHARTING = 'default_diff_client_charting_page.html' 59 | TEMPLATE_DIFF_PAGE = 'default_diff_page.html' 60 | SUBMETRIC_HEADER = 'sub_metric' 61 | 62 | # Graphing constants 63 | DEFAULT_GRAPHING_LIBRARY = 'matplotlib' 64 | 65 | # Matplotlib Constants 66 | COLOR_PALETTE = ['black', 'steelblue', 'm', 'red', 'cyan', 'g', 'orange', 'gray'] 67 | SUBPLOT_BOTTOM_OFFSET = 0.1 68 | SUBPLOT_LEFT_OFFSET = 0.05 69 | SUBPLOT_RIGHT_OFFSET = 0.95 70 | SUBPLOT_TOP_OFFSET = 0 71 | X_TICKS_FONTSIZE = 8 72 | X_TICKS_DATEFORMAT = '%H:%M:%S' 73 | Y_AXIS_OFFSET = 0.06 74 | Y_LABEL_FONTSIZE = 10 75 | Y_TICKS_FONTSIZE = 8 76 | ZOOM_FACTOR = 0.02 77 | HIGHLIGHT_COLOR = 'red' 78 | HIGHLIGHT_ALPHA = 0.25 79 | 80 | 81 | # LinkedIn_Android_RUM Constants 82 | LIA_TIMING_NAME = 'timingName' 83 | LIA_TIMING_VALUE = 'timingValue' 84 | LIA_START = 'start' 85 | LIA_APP_ON_CREATE = 'linkedin_android_app_oncreate_time' 86 | LIA_NUS_UPDATE = 'linkedin_android_nus_update_time' 87 | LIA_LONG = 'long' 88 | LIA_NATIVE_TIMINGS = 'nativeTimings' 89 | LIA_ARRAY = 'array' 90 | 91 | 92 | # Narrad Exit Code 93 | SLA_FAILURE = 1 94 | 95 | # RUN STEPS constants 96 | PRE_ANALYSIS_RUN = 'pre' 97 | DURING_ANALYSIS_RUN = 'in' 98 | POST_ANALYSIS_RUN = 'post' 99 | RUN_TYPE_WORKLOAD = 'workload' 100 | SECONDS_TO_KILL_AFTER_SIGTERM = 5 101 | 102 | # Auto Discover Metrics 103 | SUPPORTED_FILENAME_MAPPING = { 104 | 'gc.log': 'GC', 105 | 'perf-results.xml': 'JMETER', 106 | 'perf-result.xml': 'JMETER', 107 | 'proc.vmstat.out': 'PROCVMSTAT', 108 | 'procvmstat.out': 'PROCVMSTAT', 109 | 'proc.meminfo.out': 'PROCMEMINFO', 110 | 'procmeminfo.out': 'PROCMEMINFO', 111 | 'proc.zoneinfo.out': 'PROCZONEINFO', 112 | 'proczoneinfo.out': 'PROCZONEINFO', 113 | 'proc.interrupts.out': 'PROCINTERRUPTS', 114 | 'sar.cpuhz.out': 'SAR-cpuhz', 115 | 'sar.cpuusage.out': 'SAR-cpuusage', 116 | 'sar.device.out': 'SAR-device', 117 | 'sar.edev.out': 'SAR-edev', 118 | 'sar.etcp.out': 'SAR-etcp', 119 | 'sar.memory.out': 'SAR-memory', 120 | 'sar.memutil.out': 'SAR-memutil', 121 | 'sar.network.out': 'SAR-network', 122 | 'sar.paging.out': 'SAR-paging', 123 | 'sar.queue.out': 'SAR-queue', 124 | 'sar.sock.out': 'SAR-sock', 125 | 'sar.swapping.out': 'SAR-swapping', 126 | 'sar.switching.out': 'SAR-switching', 127 | 'sar.tcp.out': 'SAR-tcp', 128 | 'top.out': 'TOP' 129 | } 130 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/algorithms/correlator_algorithms/cross_correlator.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | from luminol.algorithms.correlator_algorithms import CorrelatorAlgorithm 13 | from luminol.constants import * 14 | from luminol.modules.correlation_result import CorrelationResult 15 | 16 | 17 | class CrossCorrelator(CorrelatorAlgorithm): 18 | 19 | """ 20 | Method 1: CrossCorrelation algorithm. 21 | Ideas come from Paul Bourke(http://paulbourke.net/miscellaneous/correlate/). 22 | """ 23 | def __init__(self, time_series_a, time_series_b, max_shift_seconds=None, shift_impact=None): 24 | """ 25 | Initializer 26 | :param TimeSeries time_series_a: TimeSeries a. 27 | :param TimeSeries time_series_b: TimeSeries b. 28 | :param int max_shift_milliseconds: allowed maximal shift seconds. 29 | :param time_period: if given, correlate the data inside the time period only. 30 | """ 31 | super(CrossCorrelator, self).__init__(self.__class__.__name__, time_series_a, time_series_b) 32 | self.shift_impact = shift_impact or DEFAULT_SHIFT_IMPACT 33 | if max_shift_seconds is not None: 34 | self.max_shift_milliseconds = max_shift_seconds 35 | else: 36 | self.max_shift_milliseconds = DEFAULT_ALLOWED_SHIFT_SECONDS * 1000 37 | 38 | def _detect_correlation(self): 39 | """ 40 | Detect correlation by computing correlation coefficients for all allowed shift steps, 41 | then take the maximum. 42 | """ 43 | correlations = [] 44 | shifted_correlations = [] 45 | self.time_series_a.normalize() 46 | self.time_series_b.normalize() 47 | a, b = self.time_series_a.align(self.time_series_b) 48 | a_values, b_values = a.values, b.values 49 | a_avg, b_avg = a.average(), b.average() 50 | a_stdev, b_stdev = a.stdev(), b.stdev() 51 | n = len(a) 52 | denom = a_stdev * b_stdev * n 53 | # Find the maximal shift steps according to the maximal shift seconds. 54 | allowed_shift_step = self._find_allowed_shift(a.timestamps) 55 | if allowed_shift_step: 56 | shift_upper_bound = allowed_shift_step 57 | shift_lower_bound = -allowed_shift_step 58 | else: 59 | shift_upper_bound = 1 60 | shift_lower_bound = 0 61 | for delay in range(shift_lower_bound, shift_upper_bound): 62 | delay_in_seconds = a.timestamps[abs(delay)] - a.timestamps[0] 63 | if delay < 0: 64 | delay_in_seconds = -delay_in_seconds 65 | s = 0 66 | for i in range(n): 67 | j = i + delay 68 | if j < 0 or j >= n: 69 | continue 70 | else: 71 | s += ((a_values[i] - a_avg) * (b_values[j] - b_avg)) 72 | r = s / denom if denom != 0 else s 73 | correlations.append([delay_in_seconds, r]) 74 | # Take shift into account to create a "shifted correlation coefficient". 75 | if self.max_shift_milliseconds: 76 | shifted_correlations.append(r * (1 + float(delay_in_seconds) / self.max_shift_milliseconds * self.shift_impact)) 77 | else: 78 | shifted_correlations.append(r) 79 | max_correlation = list(max(correlations, key=lambda k: k[1])) 80 | max_shifted_correlation = max(shifted_correlations) 81 | max_correlation.append(max_shifted_correlation) 82 | self.correlation_result = CorrelationResult(*max_correlation) 83 | 84 | def _find_allowed_shift(self, timestamps): 85 | """ 86 | Find the maximum allowed shift steps based on max_shift_milliseconds. 87 | param list timestamps: timestamps of a time series. 88 | """ 89 | init_ts = timestamps[0] 90 | residual_timestamps = map(lambda ts: ts - init_ts, timestamps) 91 | n = len(residual_timestamps) 92 | return self._find_first_bigger(residual_timestamps, self.max_shift_milliseconds, 0, n) 93 | 94 | def _find_first_bigger(self, timestamps, target, lower_bound, upper_bound): 95 | """ 96 | Find the first element in timestamps whose value is bigger than target. 97 | param list values: list of timestamps(epoch number). 98 | param target: target value. 99 | param lower_bound: lower bound for binary search. 100 | param upper_bound: upper bound for binary search. 101 | """ 102 | while lower_bound < upper_bound: 103 | pos = lower_bound + (upper_bound - lower_bound) / 2 104 | if timestamps[pos] > target: 105 | upper_bound = pos 106 | else: 107 | lower_bound = pos + 1 108 | return pos 109 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/tests/run_tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | """ 4 | © 2014 LinkedIn Corp. All rights reserved. 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | """ 13 | import os 14 | import sys 15 | import unittest 16 | 17 | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) 18 | 19 | from luminol import exceptions 20 | from luminol import Luminol 21 | from luminol.anomaly_detector import AnomalyDetector 22 | from luminol.correlator import Correlator 23 | from luminol.modules.time_series import TimeSeries 24 | 25 | 26 | class TestCorrelator(unittest.TestCase): 27 | 28 | def setUp(self): 29 | self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0} 30 | self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0} 31 | self.s3 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0} 32 | self.correlator1 = Correlator(self.s1, self.s2) 33 | self.correlator2 = Correlator(self.s1, self.s3) 34 | 35 | def test_use_anomaly_score(self): 36 | """ 37 | Test if use_anomaly_score works as expected. 38 | """ 39 | correlator1 = Correlator(self.s1, self.s2, use_anomaly_score=True) 40 | self.assertNotEqual(self.correlator1.get_correlation_result().coefficient, correlator1.get_correlation_result().coefficient) 41 | 42 | def test_cross_correlation(self): 43 | """ 44 | Test if CrossCorrelation algorithm gives same results as expected. 45 | """ 46 | self.assertEqual(self.correlator1.get_correlation_result().coefficient, self.correlator2.get_correlation_result().coefficient) 47 | self.assertEqual(self.correlator1.get_correlation_result().shift, self.correlator2.get_correlation_result().shift) 48 | 49 | def test_if_correlate(self): 50 | """ 51 | Test if function is_correlated gives same result as function get_correlation_result 52 | when there is a correlation. 53 | """ 54 | self.assertEqual(True, self.correlator2.is_correlated() is not None) 55 | self.assertEqual(self.correlator2.get_correlation_result(), self.correlator2.is_correlated()) 56 | 57 | def test_algorithm(self): 58 | """ 59 | Test if optional parameter algorithm works as expected. 60 | """ 61 | self.assertRaises(exceptions.AlgorithmNotFound, lambda: Correlator(self.s1, self.s2, algorithm_name='NotValidAlgorithm')) 62 | correlator = Correlator(self.s1, self.s2, algorithm_name='cross_correlator') 63 | self.assertEqual(self.correlator2.get_correlation_result().coefficient, correlator.get_correlation_result().coefficient) 64 | self.assertEqual(self.correlator2.get_correlation_result().shift, correlator.get_correlation_result().shift) 65 | 66 | def test_algorithm_params(self): 67 | """ 68 | Test if optional parameter algorithm_params works as expected. 69 | """ 70 | self.assertRaises(exceptions.InvalidDataFormat, lambda: Correlator(self.s1, self.s2, algorithm_name='cross_correlator', algorithm_params=1)) 71 | correlator = Correlator(self.s1, self.s2, algorithm_name='cross_correlator', algorithm_params={'max_shift_seconds': 180}) 72 | self.assertEqual(self.correlator2.get_correlation_result().coefficient, correlator.get_correlation_result().coefficient) 73 | 74 | def test_maximal_shift_seconds(self): 75 | """ 76 | Test if parameter max_shift_seconds works as expected. 77 | """ 78 | correlator = Correlator(self.s1, self.s2, algorithm_name='cross_correlator', algorithm_params={'max_shift_seconds': 0}) 79 | self.assertNotEqual(self.correlator2.get_correlation_result().coefficient, correlator.get_correlation_result().coefficient) 80 | 81 | def test_sanity_check(self): 82 | """ 83 | Test if exception NotEnoughDataPoints is raised as expected. 84 | """ 85 | s4 = {0: 0} 86 | self.assertRaises(exceptions.NotEnoughDataPoints, lambda: Correlator(s4, self.s1)) 87 | 88 | def test_time_series_format(self): 89 | """ 90 | Test if exception InvalidDataFormat is raised as expected. 91 | """ 92 | self.assertRaises(exceptions.InvalidDataFormat, lambda: Correlator(list(), 1)) 93 | 94 | 95 | class TestLuminol(unittest.TestCase): 96 | def setUp(self): 97 | self.anomaly = ['A', 'B'] 98 | self.correlation = { 99 | 'A': ['m1', 'm2', 'm3'], 100 | 'B': ['m2', 'm1', 'm3'] 101 | } 102 | self.luminol = Luminol(self.anomaly, self.correlation) 103 | 104 | def test_get_result(self): 105 | self.assertTrue(isinstance(self.luminol.get_root_causes(), dict)) 106 | self.assertEqual(self.luminol.get_root_causes()['A'], 'm1') 107 | self.assertEqual(self.luminol.get_root_causes()['B'], 'm2') 108 | 109 | if __name__ == '__main__': 110 | unittest.main() 111 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/correlator.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | """ 13 | API for Correlator Module 14 | This module finds correlation between two time series. 15 | """ 16 | 17 | from luminol import exceptions, utils 18 | from luminol.algorithms.correlator_algorithms.all import correlator_algorithms 19 | from luminol.anomaly_detector import AnomalyDetector 20 | from luminol.constants import * 21 | from luminol.modules.time_series import TimeSeries 22 | 23 | 24 | class Correlator(object): 25 | 26 | def __init__(self, time_series_a, time_series_b, time_period=None, use_anomaly_score=False, algorithm_name=None, algorithm_params=None): 27 | """ 28 | Initializer 29 | :param time_series_a: a TimeSeries, a dictionary or a path to a csv file(str). 30 | :param time_series_b: a TimeSeries, a dictionary or a path to a csv file(str). 31 | :param time_period: a tuple (start, end) representing a data period for considering correlation. 32 | :param str algorithm_name: name of the algorithm to use. 33 | :param dict algorithm_params: additional params for the specific algorithm. 34 | """ 35 | self.time_series_a = self._load(time_series_a) 36 | self.time_series_b = self._load(time_series_b) 37 | if use_anomaly_score: 38 | self.time_series_a = self._get_anomaly_scores(self.time_series_a) 39 | self.time_series_b = self._get_anomaly_scores(self.time_series_b) 40 | if time_period: 41 | start_p, end_p = time_period 42 | try: 43 | self.time_series_a = self.time_series_a.crop(start_p, end_p) 44 | self.time_series_b = self.time_series_b.crop(start_p, end_p) 45 | # No data points fall into the specific time range. 46 | except ValueError: 47 | raise exceptions.NotEnoughDataPoints 48 | self._sanity_check() 49 | self.algorithm_params = {'time_series_a': self.time_series_a, 'time_series_b': self.time_series_b} 50 | self._get_algorithm_and_params(algorithm_name, algorithm_params) 51 | self._correlate() 52 | 53 | def _get_anomaly_scores(self, time_series): 54 | """ 55 | Get anomaly scores of a time series. 56 | :param TimeSeries time_series: a time_series. 57 | """ 58 | return AnomalyDetector(time_series, score_only=True).get_all_scores() 59 | 60 | def _load(self, time_series): 61 | """ 62 | Load time series into a TimeSeries object. 63 | :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str). 64 | :return TimeSeries: a TimeSeries object. 65 | """ 66 | if isinstance(time_series, TimeSeries): 67 | return time_series 68 | if isinstance(time_series, dict): 69 | return TimeSeries(time_series) 70 | return TimeSeries(utils.read_csv(time_series)) 71 | 72 | def _get_algorithm_and_params(self, algorithm_name, algorithm_params): 73 | """ 74 | Get the specific algorithm and merge the algorithm params. 75 | :param str algorithm: name of the algorithm to use. 76 | :param dict algorithm_params: additional params for the specific algorithm. 77 | """ 78 | algorithm_name = algorithm_name or CORRELATOR_ALGORITHM 79 | try: 80 | self.algorithm = correlator_algorithms[algorithm_name] 81 | except KeyError: 82 | raise exceptions.AlgorithmNotFound('luminol.Correlator: ' + str(algorithm_name) + ' not found.') 83 | # Merge parameters. 84 | if algorithm_params: 85 | if not isinstance(algorithm_params, dict): 86 | raise exceptions.InvalidDataFormat('luminol.Correlator: algorithm_params passed is not a dictionary.') 87 | else: 88 | self.algorithm_params = dict(algorithm_params.items() + self.algorithm_params.items()) 89 | 90 | def _sanity_check(self): 91 | """ 92 | Check if the time series have more than two data points. 93 | """ 94 | if len(self.time_series_a) < 2 or len(self.time_series_b) < 2: 95 | raise exceptions.NotEnoughDataPoints('luminol.Correlator: Too few data points!') 96 | 97 | def _correlate(self): 98 | """ 99 | Run correlation algorithm. 100 | """ 101 | a = self.algorithm(**self.algorithm_params) 102 | self.correlation_result = a.run() 103 | 104 | def get_correlation_result(self): 105 | """ 106 | Get correlation result. 107 | :return CorrelationResult: a CorrelationResult object. 108 | """ 109 | return self.correlation_result 110 | 111 | def is_correlated(self, threshold=None): 112 | """ 113 | Compare with a threshold to determine whether two timeseries correlate to each other. 114 | :return: a CorrelationResult object if two time series correlate otherwise false. 115 | """ 116 | return self.correlation_result if self.correlation_result.coefficient >= threshold else False 117 | -------------------------------------------------------------------------------- /src/naarad/metrics/linkedin_android_rum_metric.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import datetime 19 | import logging 20 | import os 21 | import re 22 | import sys 23 | import threading 24 | import json 25 | import time 26 | from datetime import date 27 | from naarad.metrics.metric import Metric 28 | import naarad.utils 29 | import naarad.naarad_constants as CONSTANTS 30 | 31 | logger = logging.getLogger('naarad.metrics.linkedin_android_rum_metric') 32 | 33 | 34 | class LinkedInAndroidRumMetric(Metric): 35 | """ 36 | Class for LinkedIn Android RUM logs, deriving from class Metric 37 | Note that this is for LinkedIn only 38 | """ 39 | clock_format = '%Y-%m-%d %H:%M:%S' 40 | val_types = ('launch_time', 'nus_update_time') 41 | 42 | def __init__(self, metric_type, infile_list, hostname, aggr_metrics, outdir, resource_path, label, ts_start, ts_end, rule_strings, 43 | important_sub_metrics, anomaly_detection_metrics, **other_options): 44 | Metric.__init__(self, metric_type, infile_list, hostname, aggr_metrics, outdir, resource_path, label, ts_start, ts_end, rule_strings, 45 | important_sub_metrics, anomaly_detection_metrics) 46 | self.sub_metrics = self.val_types 47 | if not self.important_sub_metrics: 48 | self.important_sub_metrics = CONSTANTS.important_sub_metrics_import['LINKEDINANDROIDRUM'] 49 | self.sub_metric_description = { 50 | "launch_time": "the time taken to launch the client application", 51 | "nus_update_time": "the time taken to update NUS list after launch" 52 | } 53 | 54 | # get start time stamp, launch time duration, and nus update time duration 55 | def get_times(self, native): 56 | """ 57 | get start time stamp, launch time duration, and nus update time duration from JSON object native 58 | :param JSON OBJECT native 59 | :return: LONG event time stamp, LONG launch time, and LONG nus update time 60 | """ 61 | start_time = 0 62 | end_time = 0 63 | launch_time = 0 64 | nus_update_time = 0 65 | 66 | for item in native: 67 | if item[CONSTANTS.LIA_TIMING_NAME] == CONSTANTS.LIA_APP_ON_CREATE and item[CONSTANTS.LIA_START] is not None: 68 | start_time = item[CONSTANTS.LIA_START][CONSTANTS.LIA_LONG] 69 | if item[CONSTANTS.LIA_TIMING_NAME] == CONSTANTS.LIA_NUS_UPDATE: 70 | if item[CONSTANTS.LIA_TIMING_VALUE] is not None: 71 | nus_update_time = item[CONSTANTS.LIA_TIMING_VALUE][CONSTANTS.LIA_LONG] 72 | if item[CONSTANTS.LIA_START] is not None: 73 | end_time = item[CONSTANTS.LIA_START][CONSTANTS.LIA_LONG] 74 | 75 | if start_time == 0 or end_time == 0: 76 | time_stamp = 0 77 | launch_time = 0 78 | else: 79 | time_stamp = start_time 80 | launch_time = end_time - start_time 81 | return (time_stamp, launch_time, nus_update_time) 82 | 83 | # parse Android RUM logs 84 | def parse(self): 85 | # check if outdir exists, if not, create it 86 | if not os.path.isdir(self.outdir): 87 | os.makedirs(self.outdir) 88 | if not os.path.isdir(self.resource_directory): 89 | os.makedirs(self.resource_directory) 90 | 91 | results = {} 92 | ts = None 93 | 94 | # set output csv 95 | launch_time_file = self.get_csv('launch_time') 96 | nus_update_time_file = self.get_csv('nus_update_time') 97 | for input_file in self.infile_list: 98 | # get Android RUM input data: for each line, generate (timestamp, launch_time, nus_update_time) 99 | with open(input_file, 'r') as inf: 100 | for line in inf: 101 | try: 102 | data = json.loads(line) 103 | except ValueError: 104 | logger.warn("Invalid JSON Object at line: %s", line) 105 | if data[CONSTANTS.LIA_NATIVE_TIMINGS] is not None: 106 | native = data[CONSTANTS.LIA_NATIVE_TIMINGS][CONSTANTS.LIA_ARRAY] 107 | time_stamp, launch_time, nus_update_time = self.get_times(native) 108 | if launch_time != 0 and nus_update_time != 0: 109 | results[time_stamp] = (str(launch_time), str(nus_update_time)) 110 | # Writing launch time and nus update time stats 111 | with open(launch_time_file, 'w') as launchtimef: 112 | with open(nus_update_time_file, 'w') as nusupdatetimef: 113 | for ts in sorted(results.iterkeys()): 114 | launchtimef.write(naarad.utils.get_standardized_timestamp(ts, 'epoch_ms') + ',' + results[ts][0] + '\n') 115 | nusupdatetimef.write(naarad.utils.get_standardized_timestamp(ts, 'epoch_ms') + ',' + results[ts][1] + '\n') 116 | self.csv_files.append(launch_time_file) 117 | self.csv_files.append(nus_update_time_file) 118 | return True 119 | -------------------------------------------------------------------------------- /src/naarad/metrics/proczoneinfo_metric.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | from collections import defaultdict 19 | import datetime 20 | import gc 21 | import logging 22 | import os 23 | import re 24 | import numpy 25 | from naarad.metrics.metric import Metric 26 | import naarad.utils 27 | 28 | logger = logging.getLogger('naarad.metrics.ProcZoneinfoMetric') 29 | 30 | 31 | class ProcZoneinfoMetric(Metric): 32 | """ 33 | logs of /proc/vmstat 34 | The raw log file is assumed to have a timestamp prefix of all lines. E.g. in the format of "2013-01-02 03:55:22.13456 compact_fail 36" 35 | The log lines can be generated by 'cat /proc/vmstat | sed "s/^/$(date +%Y-%m-%d\ %H:%M:%S.%05N)\t/" ' 36 | """ 37 | 38 | skipped_sub_metrics = ('protection:', 'pagesets', 'cpu:', 'count:', 'high:', 'batch:', 'vm', 'all_unreclaimable:', 'prev_priority:', 'start_pfn:', 39 | 'inactive_ratio:') 40 | processed_sub_metrics = ('min', 'high', 'scanned', 'spanned', 'present') 41 | 42 | zones = None # Users can specify which zones to process/plot, e.g. zones= Node.0.zone.DMA 43 | 44 | def __init__(self, metric_type, infile_list, hostname, aggr_metrics, output_directory, resource_path, label, ts_start, ts_end, 45 | rule_strings, important_sub_metrics, anomaly_detection_metrics, **other_options): 46 | Metric.__init__(self, metric_type, infile_list, hostname, aggr_metrics, output_directory, resource_path, label, ts_start, ts_end, 47 | rule_strings, important_sub_metrics, anomaly_detection_metrics) 48 | 49 | self.sub_metrics = None 50 | # in particular, Section can specify a subset of all metrics: sub_metrics=pages.min nr_free_pages 51 | 52 | for (key, val) in other_options.iteritems(): 53 | setattr(self, key, val.split()) 54 | 55 | self.sub_metric_description = { 56 | 'nr_free_pages': 'Number of free pages', 57 | 'nr_inactive_anon': 'Number of inactive anonymous pages', 58 | 'nr_active_anon': 'Number of active anonymous pages', 59 | 'nr_inactive_file': 'Number of inactive file cache pages', 60 | 'nr_active_file': 'Number of active file cache pages', 61 | } 62 | 63 | def parse(self): 64 | """ 65 | Parse the vmstat file 66 | :return: status of the metric parse 67 | """ 68 | file_status = True 69 | for input_file in self.infile_list: 70 | file_status = file_status and naarad.utils.is_valid_file(input_file) 71 | if not file_status: 72 | return False 73 | 74 | status = True 75 | cur_zone = None 76 | cur_submetric = None 77 | cur_value = None 78 | data = {} # stores the data of each column 79 | for input_file in self.infile_list: 80 | logger.info('Processing : %s', input_file) 81 | timestamp_format = None 82 | with open(input_file) as fh: 83 | for line in fh: 84 | words = line.replace(',', ' ').split() # [0] is day; [1] is seconds; [2...] is field names:; 85 | if len(words) < 3: 86 | continue 87 | ts = words[0] + " " + words[1] 88 | if not timestamp_format or timestamp_format == 'unknown': 89 | timestamp_format = naarad.utils.detect_timestamp_format(ts) 90 | if timestamp_format == 'unknown': 91 | continue 92 | ts = naarad.utils.get_standardized_timestamp(ts, timestamp_format) 93 | if self.ts_out_of_range(ts): 94 | continue 95 | if words[2] == 'Node': # Node 0 zone DMA 96 | cols = words[2:] 97 | cur_zone = '.'.join(cols) 98 | continue 99 | elif words[2] == 'pages': # pages free 3936 100 | cur_submetric = words[2] + '.' + words[3] # pages.free 101 | cur_value = words[4] 102 | elif words[2] in self.processed_sub_metrics: 103 | cur_submetric = 'pages' + '.' + words[2] # pages.min 104 | cur_value = words[3] 105 | elif words[2] in self.skipped_sub_metrics: 106 | continue 107 | else: # other useful submetrics 108 | cur_submetric = words[2] 109 | cur_value = words[3] 110 | col = cur_zone + '.' + cur_submetric # prefix with 'Node.0.zone.DMA. 111 | # only process zones specified in config 112 | if cur_zone and self.zones and cur_zone not in self.zones: 113 | continue 114 | self.sub_metric_unit[col] = 'pages' # The unit of the sub metric. For /proc/zoneinfo, they are all in pages 115 | # only process sub_metrics specified in config. 116 | if self.sub_metrics and cur_submetric and cur_submetric not in self.sub_metrics: 117 | continue 118 | if col in self.column_csv_map: 119 | out_csv = self.column_csv_map[col] 120 | else: 121 | out_csv = self.get_csv(col) # column_csv_map[] is assigned in get_csv() 122 | data[out_csv] = [] 123 | data[out_csv].append(ts + "," + cur_value) 124 | # post processing, putting data in csv files; 125 | for csv in data.keys(): 126 | self.csv_files.append(csv) 127 | with open(csv, 'w') as fh: 128 | fh.write('\n'.join(sorted(data[csv]))) 129 | return status 130 | -------------------------------------------------------------------------------- /src/naarad/metrics/cluster_metric.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | from collections import defaultdict 19 | import datetime 20 | import gc 21 | import logging 22 | import os 23 | import re 24 | import numpy 25 | from naarad.metrics.metric import Metric 26 | import naarad.utils 27 | import sys 28 | 29 | logger = logging.getLogger('naarad.metrics.cluster_metric') 30 | 31 | 32 | class ClusterMetric(Metric): 33 | """ 34 | supporting the metric of Cluster, which aggregates the performance metrics of multiple hosts 35 | """ 36 | 37 | metrics = [] # all other non-aggregate metrics; 38 | aggr_metrics = [] # metrics to be aggregated 39 | aggr_hosts = [] # hosts to be aggregated 40 | 41 | def __init__(self, section, aggregate_hosts, aggregate_metrics, metrics, output_directory, resource_path, label, 42 | ts_start, ts_end, rule_strings, important_sub_metrics, anomaly_detection_metrics, **other_options): 43 | self.metrics = metrics 44 | self.aggr_hosts = aggregate_hosts.split() 45 | 46 | # Metric arguments take 'infile' and 'hostname', for ClusterMetric, they are invalid, so just provide empty strings. 47 | Metric.__init__(self, section, '', '', '', output_directory, resource_path, label, ts_start, ts_end, rule_strings, 48 | important_sub_metrics, anomaly_detection_metrics) 49 | self.aggr_metrics = aggregate_metrics.split() 50 | 51 | for (key, val) in other_options.iteritems(): 52 | setattr(self, key, val.split()) 53 | 54 | def collect(self): 55 | """ 56 | Take a list of metrics, filter all metrics based on hostname, and metric_type 57 | For each metric, merge the corresponding csv files into one,update corresponding properties such as csv_column_map. 58 | Users can specify functions: raw, count (qps), sum (aggregated value), avg (averaged value) 59 | The timestamp granularity of aggregated submetrics is in seconds (sub-second is not supported) 60 | """ 61 | 62 | for aggr_metric in self.aggr_metrics: # e.g., SAR-device.sda.await:count,sum,avg 63 | functions_aggr = [] 64 | fields = aggr_metric.split(":") 65 | cur_metric_type = fields[0].split(".")[0] # e.g. SAR-device 66 | 67 | if len(fields) > 1: # The user has to specify the aggregate functions (i.e., :raw,count,sum,avg) 68 | func_user = ''.join(fields[1].split()) 69 | functions_aggr.extend(func_user.split(",")) 70 | else: # no user input of aggregate functions 71 | return True 72 | 73 | cur_column = '.'.join(fields[0].split('.')[1:]) # e.g. sda.await or all.percent-sys 74 | 75 | # Store data points of various aggregation functions 76 | aggr_data = {} 77 | aggr_data['raw'] = [] # Store all the raw values 78 | aggr_data['sum'] = defaultdict(float) # Store the sum values for each timestamp 79 | aggr_data['count'] = defaultdict(int) # Store the count of each timestamp (i.e. qps) 80 | 81 | for metric in self.metrics: # Loop the list to find from all metrics to merge 82 | if metric.hostname in self.aggr_hosts and \ 83 | cur_column in metric.csv_column_map.values(): 84 | file_csv = metric.get_csv(cur_column) 85 | timestamp_format = None 86 | with open(file_csv) as fh: 87 | for line in fh: 88 | aggr_data['raw'].append(line.rstrip()) 89 | words = line.split(",") 90 | ts = words[0].split('.')[0] # In case of sub-seconds; we only want the value of seconds; 91 | if not timestamp_format or timestamp_format == 'unknown': 92 | timestamp_format = naarad.utils.detect_timestamp_format(ts) 93 | if timestamp_format == 'unknown': 94 | continue 95 | ts = naarad.utils.get_standardized_timestamp(ts, timestamp_format) 96 | aggr_data['sum'][ts] += float(words[1]) 97 | aggr_data['count'][ts] += 1 98 | # "raw" csv file 99 | if 'raw' in functions_aggr: 100 | out_csv = self.get_csv(cur_column, 'raw') 101 | self.csv_files.append(out_csv) 102 | with open(out_csv, 'w') as fh: 103 | fh.write("\n".join(sorted(aggr_data['raw']))) 104 | 105 | # "sum" csv file 106 | if 'sum' in functions_aggr: 107 | out_csv = self.get_csv(cur_column, 'sum') 108 | self.csv_files.append(out_csv) 109 | with open(out_csv, 'w') as fh: 110 | for (k, v) in sorted(aggr_data['sum'].items()): 111 | fh.write(k + "," + str(v) + '\n') 112 | 113 | # "avg" csv file 114 | if 'avg' in functions_aggr: 115 | out_csv = self.get_csv(cur_column, 'avg') 116 | self.csv_files.append(out_csv) 117 | with open(out_csv, 'w') as fh: 118 | for (k, v) in sorted(aggr_data['sum'].items()): 119 | fh.write(k + "," + str(v / aggr_data['count'][k]) + '\n') 120 | 121 | # "count" csv file (qps) 122 | if 'count' in functions_aggr: 123 | out_csv = self.get_csv(cur_column, 'count') 124 | self.csv_files.append(out_csv) 125 | with open(out_csv, 'w') as fh: 126 | for (k, v) in sorted(aggr_data['count'].items()): 127 | fh.write(k + "," + str(v) + '\n') 128 | 129 | gc.collect() 130 | return True 131 | 132 | def get_csv(self, column, func): 133 | csv_file = Metric.get_csv(self, column + '.' + func) 134 | return csv_file 135 | 136 | def parse(self): 137 | """ 138 | Merge multiple hosts' csv into one csv file. This approach has the benefit of reusing calculate_stats(), but with the penalty of reading the single csv 139 | later for calculate_stats(). However, since file cache will cache the newly written csv files, reading the csv file will not likely be a IO bottleneck. 140 | """ 141 | 142 | return True 143 | -------------------------------------------------------------------------------- /lib/luminol/src/luminol/anomaly_detector.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | © 2014 LinkedIn Corp. All rights reserved. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | """ 12 | 13 | """ 14 | API for Anomaly Detector Module 15 | This module detects anomalies in a single time series. 16 | """ 17 | 18 | from luminol import exceptions, utils 19 | from luminol.algorithms.anomaly_detector_algorithms.all import anomaly_detector_algorithms 20 | from luminol.constants import * 21 | from luminol.modules.anomaly import Anomaly 22 | from luminol.modules.time_series import TimeSeries 23 | 24 | 25 | class AnomalyDetector(object): 26 | 27 | def __init__(self, time_series, baseline_time_series=None, score_only=False, score_threshold=None, 28 | score_percent_threshold=None, algorithm_name=None, algorithm_params=None, refine_algorithm_name=None, 29 | refine_algorithm_params=None, algorithm_class=None): 30 | """ 31 | Initializer 32 | :param time_series: a TimeSeries, a dictionary or a path to a csv file(str). 33 | :param baseline_time_series: a TimeSeries, a dictionary or a path to a csv file(str). 34 | :param bool score_only: if asserted, only anomaly scores are computed. 35 | :param float score_percent_threshold: percent threshold on anomaly score above which is considered an anomaly. 36 | :param str algorithm_name: name of the algorithm to use(file name). 37 | :param dict algorithm_params: additional params for the specific algorithm. 38 | :param str refine_algorithm_name: name of the refine algorithm to use(file name). 39 | :param dict refine_algorithm_params: additional params for the specific refine algorithm. 40 | :param AnomalyDetectorAlgorithm algorithm_class: A AnomalyDetectorAlgorithm class that when passed to luminol will 41 | be used to assign anomaly scores. This is useful when luminol user wants to use a custom algorithm. 42 | """ 43 | 44 | self.time_series = self._load(time_series) 45 | self.baseline_time_series = self._load(baseline_time_series) 46 | self.score_percent_threshold = score_percent_threshold or DEFAULT_SCORE_PERCENT_THRESHOLD 47 | 48 | # Prepare algorithms. 49 | algorithm_name = algorithm_name or ANOMALY_DETECTOR_ALGORITHM 50 | self.algorithm = algorithm_class or self._get_algorithm(algorithm_name) 51 | self.threshold = score_threshold or ANOMALY_THRESHOLD.get(algorithm_name) 52 | self.refine_algorithm = self._get_algorithm(refine_algorithm_name or ANOMALY_DETECTOR_REFINE_ALGORITHM) 53 | 54 | # Prepare parameters. 55 | self.algorithm_params = {'time_series': self.time_series, 'baseline_time_series': self.baseline_time_series} 56 | algorithm_params = algorithm_params or {} 57 | self.algorithm_params.update(algorithm_params) 58 | self.refine_algorithm_params = refine_algorithm_params or {} 59 | 60 | # Detect anomalies. 61 | self._detect(score_only) 62 | 63 | def _load(self, time_series): 64 | """ 65 | Load time series. 66 | :param time_series: a TimeSeries, a dictionary or a path to a csv file(str). 67 | :return TimeSeries: a TimeSeries object. 68 | """ 69 | if not time_series: 70 | return None 71 | if isinstance(time_series, TimeSeries): 72 | return time_series 73 | if isinstance(time_series, dict): 74 | return TimeSeries(time_series) 75 | return TimeSeries(utils.read_csv(time_series)) 76 | 77 | def _get_algorithm(self, algorithm_name): 78 | """ 79 | Get the specific algorithm. 80 | :param str algorithm_name: name of the algorithm to use(file name). 81 | :return: algorithm object. 82 | """ 83 | try: 84 | algorithm = anomaly_detector_algorithms[algorithm_name] 85 | return algorithm 86 | except KeyError: 87 | raise exceptions.AlgorithmNotFound('luminol.AnomalyDetector: ' + str(algorithm_name) + ' not found.') 88 | 89 | def _detect(self, score_only): 90 | """ 91 | Detect anomaly periods. 92 | :param bool score_only: if true, only anomaly scores are computed. 93 | """ 94 | try: 95 | algorithm = self.algorithm(**self.algorithm_params) 96 | self.anom_scores = algorithm.run() 97 | except exceptions.NotEnoughDataPoints: 98 | algorithm = anomaly_detector_algorithms['default_detector'](self.time_series) 99 | self.threshold = self.threshold or ANOMALY_THRESHOLD['default_detector'] 100 | self.anom_scores = algorithm.run() 101 | if not score_only: 102 | self._detect_anomalies() 103 | 104 | def _detect_anomalies(self): 105 | """ 106 | Detect anomalies using a threshold on anomaly scores. 107 | """ 108 | anom_scores = self.anom_scores 109 | max_anom_score = anom_scores.max() 110 | anomalies = [] 111 | 112 | if max_anom_score: 113 | threshold = self.threshold or max_anom_score * self.score_percent_threshold 114 | # Find all the anomaly intervals. 115 | intervals = [] 116 | start, end = None, None 117 | for timestamp, value in anom_scores.iteritems(): 118 | if value > threshold: 119 | end = timestamp 120 | if not start: 121 | start = timestamp 122 | elif start and end is not None: 123 | intervals.append([start, end]) 124 | start = None 125 | end = None 126 | if start is not None: 127 | intervals.append([start, end]) 128 | 129 | # Locate the exact anomaly point within each anomaly interval. 130 | for interval_start, interval_end in intervals: 131 | interval_series = anom_scores.crop(interval_start, interval_end) 132 | 133 | self.refine_algorithm_params['time_series'] = interval_series 134 | refine_algorithm = self.refine_algorithm(**self.refine_algorithm_params) 135 | scores = refine_algorithm.run() 136 | max_refine_score = scores.max() 137 | 138 | # Get the timestamp of the maximal score. 139 | max_refine_timestamp = scores.timestamps[scores.values.index(max_refine_score)] 140 | anomaly = Anomaly(interval_start, interval_end, interval_series.max(), max_refine_timestamp) 141 | anomalies.append(anomaly) 142 | 143 | self.anomalies = anomalies 144 | 145 | def get_anomalies(self): 146 | """ 147 | Get anomalies. 148 | :return list: a list of Anomaly objects. 149 | """ 150 | return getattr(self, 'anomalies', []) 151 | 152 | def get_all_scores(self): 153 | """ 154 | Get anomaly scores. 155 | :return: a TimeSeries object represents anomaly scores. 156 | """ 157 | return getattr(self, 'anom_scores', None) 158 | -------------------------------------------------------------------------------- /src/naarad/httpdownload.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import os 19 | import sys 20 | import re 21 | import urllib2 22 | import logging 23 | from HTMLParser import HTMLParser 24 | 25 | import naarad.utils 26 | 27 | logger = logging.getLogger('naarad.httpdownload') 28 | 29 | 30 | def handle_single_url(url, outdir, outfile=None): 31 | """ 32 | Base function which takes a single url, download it to outdir/outfile 33 | :param str url: a full/absolute url, e.g. http://www.cnn.com/log.zip 34 | :param str outdir: the absolute local directory. e.g. /home/user1/tmp/ 35 | :param str outfile: (optional) filename stored in local directory. If outfile is not given, extract the filename from url 36 | :return: the local full path name of downloaded url 37 | """ 38 | if not url or type(url) != str \ 39 | or not outdir or type(outdir) != str: 40 | logger.error('passed in parameters %s %s are incorrect.' % (url, outdir)) 41 | return 42 | 43 | if not naarad.utils.is_valid_url(url): 44 | logger.error("passed in url %s is incorrect." % url) 45 | return 46 | 47 | if not outfile: 48 | segs = url.split('/') 49 | outfile = segs[-1] 50 | outfile = urllib2.quote(outfile) 51 | 52 | output_file = os.path.join(outdir, outfile) 53 | if os.path.exists(output_file): 54 | logger.warn("the %s already exists!" % outfile) 55 | 56 | with open(output_file, "w") as fh: 57 | try: 58 | response = urllib2.urlopen(url) 59 | fh.write(response.read()) 60 | except urllib2.HTTPError: 61 | logger.error("got HTTPError when retrieving %s" % url) 62 | return 63 | except urllib2.URLError: 64 | logger.error("got URLError when retrieving %s" % url) 65 | return 66 | 67 | return output_file 68 | 69 | 70 | def stream_url(url): 71 | """ 72 | Read response of specified url into memory and return to caller. No persistence to disk. 73 | :return: response content if accessing the URL succeeds, False otherwise 74 | """ 75 | try: 76 | response = urllib2.urlopen(url) 77 | response_content = response.read() 78 | return response_content 79 | except (urllib2.URLError, urllib2.HTTPError) as e: 80 | logger.error('Unable to access requested URL: %s', url) 81 | return False 82 | 83 | 84 | class HTMLLinkExtractor(HTMLParser): 85 | """ 86 | Helper class to parse the html file returned. It extracts href links into links[] 87 | """ 88 | def __init__(self): 89 | HTMLParser.__init__(self) 90 | self.flag = 0 91 | self.links = [] 92 | self.title = "" 93 | self.img = "" 94 | self.content = "" 95 | 96 | def handle_starttag(self, tag, attrs): 97 | if tag == "a": 98 | if len(attrs) != 0: 99 | for (variable, value) in attrs: 100 | if variable == "href": 101 | self.links.append(value) 102 | 103 | 104 | def get_urls_from_seed(url): 105 | """ 106 | get a list of urls from a seeding url, return a list of urls 107 | 108 | :param str url: a full/absolute url, e.g. http://www.cnn.com/logs/ 109 | :return: a list of full/absolute urls. 110 | """ 111 | 112 | if not url or type(url) != str or not naarad.utils.is_valid_url(url): 113 | logger.error("get_urls_from_seed() does not have valid seeding url.") 114 | return 115 | 116 | # Extract the host info of "http://host:port/" in case of href urls are elative urls (e.g., /path/gc.log) 117 | # Then join (host info and relative urls) to form the complete urls 118 | base_index = url.find('/', len("https://")) # get the first "/" after http://" or "https://"; handling both cases. 119 | base_url = url[:base_index] # base_url = "http://host:port" or https://host:port" or http://host" (where no port is given) 120 | 121 | # Extract the "href" denoted urls 122 | urls = [] 123 | try: 124 | response = urllib2.urlopen(url) 125 | hp = HTMLLinkExtractor() 126 | hp.feed(response.read()) 127 | urls = hp.links 128 | hp.close() 129 | except urllib2.HTTPError: 130 | logger.error("Got HTTPError when opening the url of %s" % url) 131 | return urls 132 | 133 | # Check whether the url is relative or complete 134 | for i in range(len(urls)): 135 | if not urls[i].startswith("http://") and not urls[i].startswith("https://"): # a relative url ? 136 | urls[i] = base_url + urls[i] 137 | 138 | return urls 139 | 140 | 141 | def download_url_single(inputs, outdir, outfile=None): 142 | """ 143 | Downloads a http(s) url to a local file 144 | :param str inputs: the absolute url 145 | :param str outdir: Required. the local directory to put the downloadedfiles. 146 | :param str outfile: // Optional. If this is given, the downloaded url will be renated to outfile; 147 | If this is not given, then the local file will be the original one, as given in url. 148 | :return: the local full path name of downloaded url 149 | """ 150 | 151 | if not inputs or type(inputs) != str or not outdir or type(outdir) != str: 152 | logging.error("The call parameters are invalid.") 153 | return 154 | else: 155 | if not os.path.exists(outdir): 156 | os.makedirs(outdir) 157 | 158 | output_file = handle_single_url(inputs, outdir, outfile) 159 | return output_file 160 | 161 | 162 | def download_url_regex(inputs, outdir, regex=".*"): 163 | """ 164 | Downloads http(s) urls to a local files 165 | :param str inputs: Required, the seed url 166 | :param str outdir: Required. the local directory to put the downloadedfiles. 167 | :param str regex: Optional, a regex string. If not given, then all urls will be valid 168 | :return: A list of local full path names (downloaded from inputs) 169 | """ 170 | if not inputs or type(inputs) != str \ 171 | or not outdir or type(outdir) != str: 172 | logging.error("The call parameters are invalid.") 173 | return 174 | else: 175 | if not os.path.exists(outdir): 176 | os.makedirs(outdir) 177 | 178 | output_files = [] 179 | files = get_urls_from_seed(inputs) 180 | for f in files: 181 | if re.compile(regex).match(f): 182 | output_file = handle_single_url(f, outdir) 183 | output_files.append(output_file) 184 | 185 | return output_files 186 | 187 | 188 | def download_url_list(url_list, outdir): 189 | """ 190 | Downloads list of http(s) urls to local files 191 | :param list url_list: list of URLs to download 192 | :param str outdir: Required. the local directory to put the downloadedfiles. 193 | If this is not given, then the local file will be the original one, as given in url. 194 | :return None 195 | """ 196 | for url in url_list: 197 | download_url_single(url, outdir) 198 | -------------------------------------------------------------------------------- /test/test_netstat_metric.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import os 19 | import sys 20 | import uuid 21 | import shutil 22 | 23 | # add the path of ~/naarad/src; the testing py is under ~/naarad/test 24 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'src'))) 25 | import naarad.utils 26 | from naarad.metrics.netstat_metric import NetstatMetric 27 | 28 | # the temporary directory for testing, will remove it after done. 29 | tmp_dir = '' 30 | 31 | 32 | def prepare_data(): 33 | """ 34 | Hard code the raw logs and output into files so that netstat metric can pick them up. 35 | Doing so can remove the dependency on physical logs. 36 | :return: 37 | """ 38 | log = [] 39 | log.append('2014-04-14 12:09:01.67581 Active Internet connections (w/o servers)') 40 | log.append('2014-04-14 12:09:01.67581 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name') 41 | log.append('2014-04-14 12:09:01.67581 tcp 0 500 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox') 42 | log.append('2014-04-14 12:09:01.67581 tcp 120 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox') 43 | log.append('2014-04-14 12:09:03.76251 Active Internet connections (w/o servers)') 44 | log.append('2014-04-14 12:09:03.76251 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name') 45 | log.append('2014-04-14 12:09:03.76251 tcp 0 200 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox') 46 | log.append('2014-04-14 12:09:03.76251 tcp 330 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox') 47 | log.append('2014-04-14 12:09:05.84302 Active Internet connections (w/o servers)') 48 | log.append('2014-04-14 12:09:05.84302 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name') 49 | log.append('2014-04-14 12:09:05.84302 tcp 0 345 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox') 50 | log.append('2014-04-14 12:09:05.84302 tcp 440 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox') 51 | log.append('2014-04-14 12:09:07.91455 Active Internet connections (w/o servers)') 52 | log.append('2014-04-14 12:09:07.91455 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name') 53 | log.append('2014-04-14 12:09:07.91455 tcp 0 0 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox') 54 | log.append('2014-04-14 12:09:07.91455 tcp 1550 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox') 55 | log.append('2014-04-14 12:09:09.98031 Active Internet connections (w/o servers)') 56 | log.append('2014-04-14 12:09:09.98031 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name') 57 | log.append('2014-04-14 12:09:09.98031 tcp 0 564 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox') 58 | log.append('2014-04-14 12:09:09.98031 tcp 20 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox') 59 | log.append('2014-04-14 12:09:12.05993 Active Internet connections (w/o servers)') 60 | log.append('2014-04-14 12:09:12.05993 Proto Recv-Q Send-Q Local Address Foreign Address State PID/Program name') 61 | log.append('2014-04-14 12:09:12.05993 tcp 0 234 host1.localdomain.com:43214 web1.remotedomain.com:https ESTABLISHED 4996/firefox') 62 | log.append('2014-04-14 12:09:12.05993 tcp 3245 0 host1.localdomain.com:48860 email.localdomain.com:https ESTABLISHED 4996/firefox') 63 | 64 | with open(os.path.join(tmp_dir, 'netstat.tcp.out'), 'w') as fh: 65 | fh.write('\n'.join(log)) 66 | 67 | 68 | def setup(): 69 | create_tmp_dir() 70 | prepare_data() 71 | 72 | 73 | def teardown(): 74 | delete_tmp_dir() 75 | 76 | 77 | def create_tmp_dir(): 78 | """ 79 | create a unique tmp dir to hold the downloaded local files 80 | if the tmp_dir grenerated already exists, then simply return 81 | the user simply try again to generate another unique tmp dir 82 | :return: 83 | """ 84 | global tmp_dir 85 | tmp_dir = os.path.join('./', 'tmp' + '.' + str(uuid.uuid4())) # ./tmp.'randomstring' 86 | if not os.path.exists(tmp_dir): 87 | os.makedirs(tmp_dir) 88 | else: 89 | print "the path of %s already exists, please try again." % tmp_dir 90 | return 91 | 92 | 93 | def delete_tmp_dir(): 94 | """ 95 | delete the tmp directory 96 | :return: 97 | """ 98 | shutil.rmtree(tmp_dir) 99 | 100 | 101 | def test_netstatmetric(): 102 | """ 103 | First construct a NetstatMetric, then call the parse(), finally check whether the output files are there 104 | :return: 105 | """ 106 | # construct a NetstatMetric 107 | section = 'NETSTAT-host1' 108 | label = 'NETSTAT-host1' 109 | hostname = 'localhost' 110 | resource_path = 'resources' 111 | rule_strings = {} 112 | output_directory = tmp_dir 113 | infile_list = ['netstat.tcp.out'] 114 | other_options = {'connections': 'host1.localdomain.com<->web1.remotedomain.com:https host1:48860<->email', 'processes': '/firefox'} 115 | ts_start = None 116 | ts_end = None 117 | anomaly_detection_metrics = None 118 | important_sub_metrics = [] 119 | 120 | cur_metric = NetstatMetric(section, infile_list, hostname, output_directory, resource_path, label, ts_start, ts_end, rule_strings, important_sub_metrics, 121 | anomaly_detection_metrics, **other_options) 122 | cur_metric.infile_list = [os.path.join(tmp_dir, f) for f in cur_metric.infile_list] 123 | 124 | # create sub-directory of resource_path 125 | sub_dir = os.path.join(output_directory, resource_path) 126 | if not os.path.exists(sub_dir): 127 | os.makedirs(sub_dir) 128 | 129 | cur_metric.parse() 130 | 131 | # check the existance of the output files 132 | output_files = ['NETSTAT-host1.host1.localdomain.com_43214.web1.remotedomain.com_https.RecvQ.csv', 133 | 'NETSTAT-host1.host1.localdomain.com_43214.web1.remotedomain.com_https.SendQ.csv', 134 | 'NETSTAT-host1.host1.localdomain.com_48860.email.localdomain.com_https.RecvQ.csv', 135 | 'NETSTAT-host1.host1.localdomain.com_48860.email.localdomain.com_https.SendQ.csv'] 136 | for f in output_files: 137 | file_path = os.path.join(sub_dir, f) 138 | assert os.path.exists(file_path) 139 | 140 | if __name__ == '__main__': 141 | test_netstatmetric() 142 | -------------------------------------------------------------------------------- /src/naarad/metrics/sar_metric.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | Copyright 2013 LinkedIn Corp. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import datetime 19 | import logging 20 | import os 21 | 22 | from naarad.metrics.metric import Metric 23 | import naarad.utils 24 | from naarad.naarad_constants import important_sub_metrics_import 25 | 26 | logger = logging.getLogger('naarad.metrics.SARMetric') 27 | 28 | 29 | class SARMetric(Metric): 30 | """ Class for SAR cpuusage logs, deriving from class Metric """ 31 | 32 | supported_sar_types = ('SAR-cpuusage', 'SAR-cpuhz', 'SAR-device', 'SAR-memory', 'SAR-memutil', 'SAR-paging', 33 | 'SAR-etcp', 'SAR-tcp', 'SAR-dev', 'SAR-edev', 'SAR-sock', 'SAR-swapping', 'SAR-network', 'SAR-queue', 'SAR-switching') 34 | 35 | def __init__(self, metric_type, infile_list, hostname, aggr_metrics, outdir, resource_path, label, ts_start, ts_end, rule_strings, 36 | important_sub_metrics, anomaly_detection_metrics, **other_options): 37 | metric_type = self.extract_metric_name(metric_type) 38 | Metric.__init__(self, metric_type, infile_list, hostname, aggr_metrics, outdir, resource_path, label, ts_start, ts_end, 39 | rule_strings, important_sub_metrics, anomaly_detection_metrics) 40 | if not self.important_sub_metrics and self.metric_type in important_sub_metrics_import.keys(): 41 | self.important_sub_metrics = important_sub_metrics_import[self.metric_type] 42 | self.options = None 43 | self.devices = None 44 | for (key, val) in other_options.iteritems(): 45 | setattr(self, key, val.split()) 46 | 47 | def extract_metric_name(self, metric_name): 48 | """ 49 | Method to extract SAR metric names from the section given in the config. The SARMetric class assumes that 50 | the section name will contain the SAR types listed in self.supported_sar_types tuple 51 | 52 | :param str metric_name: Section name from the config 53 | :return: str which identifies what kind of SAR metric the section represents 54 | """ 55 | for metric_type in self.supported_sar_types: 56 | if metric_type in metric_name: 57 | return metric_type 58 | logger.error('Section [%s] does not contain a valid metric type, using type: "SAR-generic". Naarad works better ' 59 | 'if it knows the metric type. Valid SAR metric names are: %s', metric_name, self.supported_sar_types) 60 | return 'SAR-generic' 61 | 62 | def get_csv(self, col, device=None): 63 | column = naarad.utils.sanitize_string(col) 64 | if device is None: 65 | outcsv = os.path.join(self.resource_directory, "{0}.{1}.csv".format(self.label, column)) 66 | self.csv_column_map[outcsv] = col 67 | else: 68 | outcsv = os.path.join(self.resource_directory, "{0}.{1}.{2}.csv".format(self.label, device, column)) 69 | self.csv_column_map[outcsv] = device + '.' + col 70 | return outcsv 71 | 72 | def parse(self): 73 | # Multiple day span not supported. Assumes time is between 0:00 AM to 11:59 PM, or 0:00 to 23:59 74 | if not os.path.isdir(self.outdir): 75 | os.makedirs(self.outdir) 76 | if not os.path.isdir(self.resource_directory): 77 | os.makedirs(self.resource_directory) 78 | data = {} 79 | for input_file in self.infile_list: 80 | timestamp_format = None 81 | with open(input_file, 'r') as infile: 82 | line = infile.readline() 83 | # Pre-processing 84 | try: 85 | datesar = line.split()[3].split('/') 86 | # year is not fully qualified - this will work till year 2999 :) 87 | if int(datesar[2]) < 1000: 88 | year = int(datesar[2]) + 2000 89 | datesar[2] = str(year) 90 | except IndexError: 91 | logger.error("Header not found for file: %s", input_file) 92 | logger.error("line: %s", line) 93 | return False 94 | date = datesar[2] + '-' + datesar[0] + '-' + datesar[1] 95 | infile.readline() # skip blank line 96 | line = infile.readline() 97 | columns = line.split() 98 | if columns[1] in ('AM', 'PM'): 99 | ts_end_index = 2 100 | else: 101 | ts_end_index = 1 102 | if self.metric_type in self.device_types: 103 | columnstart = ts_end_index + 1 104 | else: 105 | columnstart = ts_end_index 106 | # Actually parsing data 107 | lines = infile.readlines() 108 | last_ts = None 109 | for i in range(len(lines)): 110 | # Skipping last line of the file since it could be malformed 111 | if i == len(lines) - 1: 112 | break 113 | line = lines[i] 114 | # Skipping header lines 115 | if 'Linux' in line or 'Average' in line or 'MHz' in line: 116 | continue 117 | words = line.split() 118 | if len(words) <= columnstart: 119 | continue 120 | ts = naarad.utils.convert_to_24hr_format(' '.join(words[0:ts_end_index])) 121 | if last_ts: 122 | if last_ts.startswith("23:") and ts.startswith("00:"): 123 | logger.info("Date rolling over") 124 | old_datetime = datetime.datetime.strptime(date, "%Y-%m-%d") 125 | new_datetime = old_datetime + datetime.timedelta(days=1) 126 | date = new_datetime.strftime("%Y-%m-%d") 127 | datetimestamp = date + ' ' + ts 128 | if not timestamp_format or timestamp_format == 'unknown': 129 | timestamp_format = naarad.utils.detect_timestamp_format(datetimestamp) 130 | if timestamp_format == 'unknown': 131 | continue 132 | datetimestamp = naarad.utils.get_standardized_timestamp(datetimestamp, timestamp_format) 133 | last_ts = ts 134 | if self.ts_out_of_range(datetimestamp): 135 | continue 136 | if self.metric_type in self.device_types: 137 | # Skipping headers that appear in the middle of the file 138 | if not naarad.utils.is_number(words[ts_end_index + 1]): 139 | continue 140 | if self.devices and words[ts_end_index] not in self.devices: 141 | continue 142 | device = words[ts_end_index] 143 | else: 144 | # Skipping headers that appear in the middle of the file 145 | if not naarad.utils.is_number(words[ts_end_index]): 146 | continue 147 | device = None 148 | datetimestamp = naarad.utils.reconcile_timezones(datetimestamp, self.timezone, self.graph_timezone) 149 | for i in range(columnstart, len(words)): 150 | if self.options and columns[i] not in self.options: 151 | continue 152 | outcsv = self.get_csv(columns[i], device) 153 | if outcsv in data: 154 | data[outcsv].append(datetimestamp + ',' + words[i]) 155 | else: 156 | data[outcsv] = [] 157 | data[outcsv].append(datetimestamp + ',' + words[i]) 158 | # Post processing, putting data in csv files 159 | for csv in data.keys(): 160 | self.csv_files.append(csv) 161 | with open(csv, 'w') as csvf: 162 | csvf.write('\n'.join(sorted(data[csv]))) 163 | return True 164 | --------------------------------------------------------------------------------