├── .gitignore ├── LICENSE ├── README.md ├── ostrich ├── __init__.py ├── histogram.py ├── stats.py ├── stats_collection.py ├── stats_provider.py ├── test │ ├── __init__.py │ └── unit │ │ ├── __init__.py │ │ ├── test_stats.py │ │ └── test_time_Series_collector.py ├── time_series_collector.py ├── timing.py └── twisted │ └── __init__.py ├── setup.py └── test.sh /.gitignore: -------------------------------------------------------------------------------- 1 | /.coverage 2 | /.coverage-results 3 | /_trial_temp 4 | /build 5 | /dist 6 | /htmlcov 7 | /*.egg-info 8 | *.pyc 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2010 Wade Simmons 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-ostrich 2 | 3 | This is a port of the Scala [Ostrich](http://github.com/twitter/ostrich) library. This port is currently a work in progress, so only the stuff covered in the unit tests are considered to be completed. 4 | 5 | ## Stats API ## 6 | 7 | There are three kinds of statistics that ostrich captures: 8 | 9 | - counters 10 | 11 | A counter is a value that never decreases. Examples might be "`widgets_sold`" or "`births`". You 12 | just click the counter each time a countable event happens, and graphing utilities usually graph 13 | the deltas over time. To increment a counter, use: 14 | 15 | stats.incr("births") 16 | 17 | # or 18 | 19 | stats.incr("widgets_sold", 5) 20 | 21 | - gauges 22 | 23 | A gauge is a value that has a discrete value at any given moment, like "`heap_used`" or 24 | "`current_temperature`". It's usually a measurement that you only need to take when someone asks. 25 | To define a gauge, stick this code somewhere in the server initialization: 26 | 27 | stats.make_gauge("current_temperature", lambda: my_thermometer.get_temperature_in_celcius()) 28 | 29 | # you can also create a gauge by decorating a method: 30 | 31 | @stats.gauge("current_temperature") 32 | def current_temperature(): 33 | return my_thermometer.get_temperature_in_celcius() 34 | 35 | Gauge methods should always return a number (either an integer or a float) 36 | 37 | - timings 38 | 39 | A timing is a stopwatch timer around code, like so: 40 | 41 | with stats.time("translation"): 42 | document.translate("de", "en") 43 | 44 | # you can also time something by decorating the method: 45 | 46 | @stats.time("translation") 47 | def translation(): 48 | document.translate("de", "en") 49 | 50 | Timings are collected in aggregate, and the aggregation is reported through the "`stats`" command. 51 | The aggregation includes the count (number of timings performed), sum, maximum, minimum, average, 52 | standard deviation, and sum of squares (useful for aggregating the standard deviation). 53 | 54 | ## Dump stats as JSON ## 55 | 56 | There is a `stats.json_encoder` function provided to make dumping that stats to JSON easy. 57 | 58 | json.dumps(stats.stats(reset=False), default=stats.json_encoder) 59 | 60 | ## Twisted Web Resource ## 61 | 62 | If you are using Twisted Web, there is a ready to use Resource available: 63 | 64 | from ostrich.twisted import StatsResource 65 | 66 | This resource will respond to the query string parameter `reset=(0|1)`. If not specified, the default is `reset=0`. 67 | -------------------------------------------------------------------------------- /ostrich/__init__.py: -------------------------------------------------------------------------------- 1 | from ostrich import stats 2 | 3 | __all__ = ["stats"] -------------------------------------------------------------------------------- /ostrich/histogram.py: -------------------------------------------------------------------------------- 1 | from bisect import bisect 2 | import sys 3 | 4 | class Histogram(object): 5 | BUCKET_OFFSETS = [1, 2, 3, 4, 5, 7, 9, 11, 14, 18, 24, 31, 40, 52, 67, 87, 113, 147, 191, 248, 6 | 322, 418, 543, 706, 918, 1193, 1551, 2016, 2620, 3406, 4428, 5757, 7483, 7 | 9728, 12647, 16441, 21373, 27784, 36119, 46955, 61041, 79354, 103160, 134107, 8 | 174339, 226641, 294633, 383023, 497930, 647308, 841501, 1093951] 9 | 10 | def __init__(self, *values): 11 | self.num_buckets = len(self.BUCKET_OFFSETS) + 1 12 | self.buckets = [0] * self.num_buckets 13 | self.total = 0 14 | if values: 15 | for val in values: 16 | self.add(val) 17 | 18 | @classmethod 19 | def from_list(cls, buckets, bucket_offsets=None): 20 | """This method will be lossy if the bucket_offsets are different""" 21 | if not bucket_offsets: 22 | bucket_offsets = cls.BUCKET_OFFSETS 23 | h = cls() 24 | if bucket_offsets == cls.BUCKET_OFFSETS: 25 | for i, v in enumerate(buckets): 26 | h.buckets[i] = v 27 | h.total = sum(buckets) 28 | else: 29 | for i, v in enumerate(buckets): 30 | # hack because binary_search does an exclusive max 31 | h.add(bucket_offsets[i]-1, v) 32 | return h 33 | 34 | def add(self, n, count=1): 35 | index = bisect(self.BUCKET_OFFSETS, n) 36 | self.buckets[index] += count 37 | self.total += count 38 | 39 | def clear(self): 40 | for i in range(self.num_buckets): 41 | self.buckets[i] = 0 42 | self.total = 0 43 | 44 | def get_percentile(self, percentile): 45 | sum = 0 46 | index = 0 47 | while sum < percentile * self.total: 48 | sum += self.buckets[index] 49 | index += 1 50 | 51 | if index == 0: 52 | return 0 53 | elif index - 1 >= len(self.BUCKET_OFFSETS): 54 | return sys.maxint 55 | else: 56 | return self.BUCKET_OFFSETS[index - 1] - 1 57 | 58 | def merge(self, other): 59 | for i in range(self.num_buckets): 60 | self.buckets[i] += other.buckets[i] 61 | self.total += other.total 62 | 63 | def clone(self): 64 | new = Histogram() 65 | new.merge(self) 66 | return new 67 | -------------------------------------------------------------------------------- /ostrich/stats.py: -------------------------------------------------------------------------------- 1 | from ostrich.stats_provider import StatsProvider 2 | from ostrich.stats_collection import StatsCollection 3 | from ostrich.timing import TimingStat 4 | 5 | class Stats(StatsProvider): 6 | def __init__(self): 7 | self.gauges = {} 8 | self.collection = StatsCollection() 9 | self.forked_collections = [] 10 | 11 | def add_timing(self, name, timing): 12 | map(lambda c: c.add_timing(name, timing), self.forked_collections) 13 | return self.collection.add_timing(name, timing) 14 | 15 | def incr(self, name, count=1): 16 | map(lambda c: c.incr(name, count), self.forked_collections) 17 | return self.collection.incr(name, count) 18 | 19 | def get_counter_stats(self, reset=False): 20 | return self.collection.get_counter_stats(reset) 21 | 22 | def get_timing_stats(self, reset=False): 23 | return self.collection.get_timing_stats(reset) 24 | 25 | def get_gauge_stats(self): 26 | return dict(map(lambda (k, gauge): (k, gauge()), self.gauges.items())) 27 | 28 | def get_timing(self, name): 29 | # make sure any new timings get added to forked collections 30 | map(lambda c: c.get_timing(name), self.forked_collections) 31 | return self.collection.get_timing(name) 32 | 33 | def get_counter(self, name): 34 | # make sure any new counters get added to forked collections 35 | map(lambda c: c.get_counter(name), self.forked_collections) 36 | return self.collection.get_counter(name) 37 | 38 | def stats(self, reset=False): 39 | d = self.collection.stats(reset) 40 | d.update(gauges=self.get_gauge_stats()) 41 | return d 42 | 43 | def clear_all(self): 44 | map(lambda c: c.clear_all(), self.forked_collections) 45 | del self.forked_collections[:] 46 | self.collection.clear_all() 47 | self.gauges.clear() 48 | 49 | def make_gauge(self, name, func): 50 | self.gauges[name] = func 51 | 52 | def fork(self): 53 | collection = StatsCollection() 54 | self.forked_collections.append(collection) 55 | return collection 56 | 57 | _stats = Stats() 58 | add_timing = _stats.add_timing 59 | incr = _stats.incr 60 | get_counter_stats = _stats.get_counter_stats 61 | get_timing_stats = _stats.get_timing_stats 62 | get_gauge_stats = _stats.get_gauge_stats 63 | get_timing = _stats.get_timing 64 | get_counter = _stats.get_counter 65 | clear_all = _stats.clear_all 66 | make_gauge = _stats.make_gauge 67 | stats = _stats.stats 68 | time = _stats.time 69 | time_ns = _stats.time_ns 70 | fork = _stats.fork 71 | 72 | def raw_json_encoder(o): 73 | """Use this encoder if you will be merging the data back in to another 74 | ostrich.stats collector with TimingStat.from_raw_dict 75 | """ 76 | if isinstance(o, TimingStat): 77 | return o.to_raw_dict(histogram=True) 78 | else: 79 | raise TypeError(repr(o) + " is not JSON serializable") 80 | 81 | def json_encoder(o): 82 | if isinstance(o, TimingStat): 83 | return o.to_dict(raw_histogram=True) 84 | else: 85 | raise TypeError(repr(o) + " is not JSON serializable") 86 | 87 | def gauge(name): 88 | def _decorator(func): 89 | make_gauge(name, func) 90 | return func 91 | return _decorator 92 | -------------------------------------------------------------------------------- /ostrich/stats_collection.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | from ostrich.stats_provider import StatsProvider 4 | from ostrich.timing import Timing 5 | 6 | class StatsCollection(StatsProvider): 7 | def __init__(self): 8 | self.counters = {} 9 | self.timings = {} 10 | self.counters_lock = threading.Lock() 11 | self.timings_lock = threading.Lock() 12 | 13 | def add_timing(self, name, duration): 14 | self.get_timing(name).add(duration) 15 | 16 | def incr(self, name, count=1): 17 | self.get_counter(name).incr(count) 18 | 19 | def get_counter_stats(self, reset=False): 20 | with self.counters_lock: 21 | return dict(map(lambda (k, counter): (k, counter.get(reset)), self.counters.items())) 22 | 23 | def get_timing_stats(self, reset=False): 24 | with self.timings_lock: 25 | return dict(map(lambda (k, timing): (k, timing.get(reset)), self.timings.items())) 26 | 27 | def get_counter(self, name): 28 | counter = self.counters.get(name, None) 29 | if counter is None: 30 | with self.counters_lock: 31 | counter = self.counters.get(name, None) 32 | if counter is None: 33 | counter = Counter() 34 | self.counters[name] = counter 35 | return counter 36 | 37 | 38 | def get_timing(self, name): 39 | timing = self.timings.get(name, None) 40 | if timing is None: 41 | with self.timings_lock: 42 | timing = self.timings.get(name, None) 43 | if timing is None: 44 | timing = Timing() 45 | self.timings[name] = timing 46 | return timing 47 | 48 | def clear_all(self): 49 | with self.counters_lock: 50 | self.counters.clear() 51 | with self.timings_lock: 52 | self.timings.clear() 53 | 54 | class Counter(object): 55 | def __init__(self): 56 | self.value = 0; 57 | self.lock = threading.Lock() 58 | 59 | def incr(self, n=1): 60 | with self.lock: 61 | self.value += n 62 | return self.value 63 | 64 | def __call__(self): 65 | return self.value 66 | 67 | def reset(self): 68 | with self.lock: 69 | self.value = 0 70 | return self.value 71 | 72 | def get(self, reset=False): 73 | if reset: 74 | with self.lock: 75 | try: 76 | return self.value 77 | finally: 78 | self.value = 0 79 | else: 80 | return self.value 81 | 82 | def __eq__(self, other): 83 | return self.value == other.value 84 | -------------------------------------------------------------------------------- /ostrich/stats_provider.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from decorator import decorator 4 | 5 | class StatsProvider(object): 6 | def time(self, name): 7 | """Returns a Timer that logs the duration, in milliseconds, with the given name.""" 8 | return Timer(self, name) 9 | 10 | def time_ns(self, name): 11 | """Returns a Timer that logs the duration, in nanoseconds, with the given name. 12 | 13 | When using nanoseconds, be sure to encode your field with that fact. Consider 14 | using the suffix `_ns` in your field. 15 | """ 16 | return Timer(self, name, nano=True) 17 | 18 | def stats(self, reset=False): 19 | return dict(counters=self.get_counter_stats(reset), timings=self.get_timing_stats(reset)) 20 | 21 | ## 22 | ## 23 | ## 24 | 25 | def add_timing(self, name, timing): 26 | return 0 27 | 28 | def incr(self, name, count=1): 29 | return count 30 | 31 | def get_counter_stats(self, reset=False): 32 | return {} 33 | 34 | def get_timing_stats(self, reset=False): 35 | return {} 36 | 37 | def clear_all(self): 38 | pass 39 | 40 | class Timer(object): 41 | def __init__(self, provider, key, nano=False): 42 | self.provider = provider 43 | self.key = key 44 | self.nano = nano 45 | 46 | def __enter__(self): 47 | self.start = time.time() 48 | 49 | def __exit__(self, type, value, tb): 50 | self.end = time.time() 51 | if self.nano: 52 | self.provider.add_timing(self.key, int(self.duration() * 1000000000)) 53 | else: 54 | self.provider.add_timing(self.key, int(self.duration() * 1000)) 55 | 56 | def duration(self): 57 | return self.end - self.start 58 | 59 | def __call__(self, f): 60 | def _decorator(func, *args, **kwargs): 61 | with self: 62 | return func(*args, **kwargs) 63 | return decorator(_decorator, f) -------------------------------------------------------------------------------- /ostrich/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wadey/python-ostrich/fb886903b0abcba8abc7d68448aa5ccab995805c/ostrich/test/__init__.py -------------------------------------------------------------------------------- /ostrich/test/unit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wadey/python-ostrich/fb886903b0abcba8abc7d68448aa5ccab995805c/ostrich/test/unit/__init__.py -------------------------------------------------------------------------------- /ostrich/test/unit/test_stats.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import math 4 | import unittest 5 | 6 | from ostrich import stats, timing, histogram 7 | 8 | class StatsTest(unittest.TestCase): 9 | def setUp(self): 10 | stats.clear_all() 11 | 12 | def test_counters(self): 13 | stats.incr("widgets", 1) 14 | stats.incr("wodgets", 12) 15 | stats.incr("wodgets") 16 | self.assertEquals({"widgets": 1, "wodgets": 13}, stats.get_counter_stats()) 17 | 18 | def test_timings_empty(self): 19 | stats.add_timing("test", 0) 20 | test = stats.get_timing("test") 21 | self.assertEqual(timing.TimingStat(1, 0, 0), test.get(reset=True)) 22 | # the timings list will be empty here: 23 | self.assertEqual(timing.TimingStat(0, 0, 0), test.get()) 24 | 25 | def test_timings_basic(self): 26 | stats.add_timing("test", 1) 27 | stats.add_timing("test", 2) 28 | stats.add_timing("test", 3) 29 | test = stats.get_timing("test") 30 | self.assertEqual(timing.TimingStat(3, 3, 1, 2.0, 2.0, histogram.Histogram(1, 2, 3)), test.get()) 31 | 32 | def test_timings_report(self): 33 | x = 0 34 | with stats.time("hundred"): 35 | for i in xrange(100): 36 | x += i 37 | timings = stats.get_timing_stats() 38 | self.assertEquals(["hundred"], timings.keys()) 39 | self.assertEquals(1, timings["hundred"].count) 40 | self.assertEquals(timings["hundred"].average, timings["hundred"].min) 41 | self.assertEquals(timings["hundred"].average, timings["hundred"].max) 42 | 43 | def test_timings_average(self): 44 | stats.add_timing("test", 0) 45 | test = stats.get_timing("test") 46 | self.assertEquals(timing.TimingStat(1, 0, 0), test.get()) 47 | 48 | def test_timings_negative(self): 49 | stats.add_timing("test", 1) 50 | stats.add_timing("test", -1) 51 | test = stats.get_timing("test") 52 | self.assertEquals(timing.TimingStat(1, 1, 1, 1.0, 0.0, histogram.Histogram(1)), test.get()) 53 | 54 | def test_timing_boundarys(self): 55 | stats.add_timing("test", sys.maxint) 56 | stats.add_timing("test", 5) 57 | sum = 5.0 + sys.maxint 58 | avg = sum / 2.0 59 | sumsq = 5.0 * 5.0 + float(sys.maxint) * sys.maxint 60 | partial = sumsq - sum * avg 61 | test = stats.get_timing("test") 62 | self.assertEquals(timing.TimingStat(2, sys.maxint, 5, avg, partial, histogram.Histogram(5, sys.maxint)), test.get()) 63 | 64 | def test_timing_with(self): 65 | with stats.time("test"): 66 | time.sleep(0.01) 67 | test = stats.get_timing("test") 68 | self.assertTrue(test.get().average >= 10) 69 | 70 | def test_timing_decorator(self): 71 | @stats.time("test") 72 | def _test_timing_decorator(): 73 | time.sleep(0.01) 74 | _test_timing_decorator() 75 | test = stats.get_timing("test") 76 | self.assertTrue(test.get().average >= 10) 77 | 78 | def test_timing_reset(self): 79 | x = 0 80 | 81 | with stats.time("hundred"): 82 | for i in xrange(100): x += i 83 | self.assertEqual(1, stats.get_timing_stats(reset=False)["hundred"].count) 84 | with stats.time("hundred"): 85 | for i in xrange(100): x += i 86 | self.assertEqual(2, stats.get_timing_stats(reset=False)["hundred"].count) 87 | self.assertEqual(2, stats.get_timing_stats(reset=True)["hundred"].count) 88 | with stats.time("hundred"): 89 | for i in xrange(100): x += i 90 | self.assertEqual(1, stats.get_timing_stats(reset=False)["hundred"].count) 91 | 92 | def test_timing_bundle(self): 93 | timing_stat = timing.TimingStat(3, 20, 10, 15.0, 50.0, histogram.Histogram(10, 15, 20)) 94 | stats.add_timing("test", timing_stat) 95 | stats.add_timing("test", 25) 96 | test = stats.get_timing_stats(reset=False)["test"] 97 | self.assertEqual(4, test.count) 98 | self.assertEqual(17, test.average) 99 | self.assertEqual(6, int(test.std_dev)) 100 | 101 | stats.clear_all() 102 | 103 | timing_stat1 = timing.TimingStat(2, 25, 15, 20.0, 50.0, histogram.Histogram(15, 25)) 104 | timing_stat2 = timing.TimingStat(2, 20, 10, 15.0, 50.0, histogram.Histogram(10, 20)) 105 | stats.add_timing("test", timing_stat1) 106 | stats.add_timing("test", timing_stat2) 107 | test = stats.get_timing_stats(reset=False)["test"] 108 | self.assertEqual(4, test.count) 109 | self.assertEqual(17, test.average) 110 | self.assertEqual(6, int(test.std_dev)) 111 | 112 | def test_timing_add(self): 113 | x = 0 114 | with stats.time("hundred"): 115 | for i in xrange(100): x += 1 116 | self.assertEquals(1, len(stats.get_timing_stats(reset=False))) 117 | 118 | stats.add_timing("foobar", timing.TimingStat(1, 0, 0)) 119 | self.assertEquals(2, len(stats.get_timing_stats(reset=False))) 120 | self.assertEquals(1, stats.get_timing_stats(reset=True)["foobar"].count) 121 | stats.add_timing("foobar", timing.TimingStat(3, 0, 0)) 122 | self.assertEquals(3, stats.get_timing_stats(reset=False)["foobar"].count) 123 | 124 | # TODO: not implemented 125 | # def test_timing_external(self): 126 | # pass 127 | 128 | def test_timing_report_sorted(self): 129 | stats.add_timing("alpha", timing.TimingStat(1, 0, 0)) 130 | string = str(stats.get_timing_stats(reset=False)["alpha"]) 131 | self.assertEquals("(average=0, count=1, maximum=0, minimum=0, p25=0, p50=0, p75=0, p90=0, p99=0, p999=0, p9999=0, standard_deviation=0)", string) 132 | 133 | # TODO 134 | # def test_timing_json_contains_histogram_buckets(self): 135 | # pass 136 | 137 | def test_gauge_report(self): 138 | @stats.gauge("pi") 139 | def _pi(): 140 | return math.pi 141 | 142 | stats.make_gauge("e", lambda: math.e) 143 | self.assertEquals({"e": math.e, "pi": math.pi}, stats.get_gauge_stats()) 144 | 145 | def test_gauge_update(self): 146 | potatoes = [100.0] 147 | @stats.gauge("stew") 148 | def _stew(): 149 | potatoes[0] += 1.0 150 | return potatoes[0] 151 | self.assertEquals({"stew": 101.0}, stats.get_gauge_stats()) 152 | self.assertEquals({"stew": 102.0}, stats.get_gauge_stats()) 153 | self.assertEquals({"stew": 103.0}, stats.get_gauge_stats()) 154 | 155 | def test_fork(self): 156 | collection = stats.fork() 157 | stats.incr("widgets", 5) 158 | self.assertEquals({"widgets": 5}, collection.get_counter_stats()) 159 | self.assertEquals({"widgets": 5}, stats.get_counter_stats(reset=True)) 160 | stats.incr("widgets", 5) 161 | self.assertEquals({"widgets": 10}, collection.get_counter_stats()) 162 | self.assertEquals({"widgets": 5}, stats.get_counter_stats(reset=True)) 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /ostrich/test/unit/test_time_Series_collector.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import time 3 | 4 | import mock 5 | 6 | from ostrich import stats 7 | from ostrich.time_series_collector import TimeSeriesCollector 8 | 9 | real_time = time.time 10 | 11 | class TimeSeriesCollectorTest(unittest.TestCase): 12 | def setUp(self): 13 | stats.clear_all() 14 | self.collector = TimeSeriesCollector() 15 | 16 | @mock.patch("time.time") 17 | def test_report_basic_stats(self, mock_time): 18 | my_time = real_time() 19 | mock_time.return_value = my_time 20 | 21 | stats.incr("cats") 22 | stats.incr("dogs", 3) 23 | self.collector.collect() 24 | my_time += 60 25 | mock_time.return_value = my_time 26 | stats.incr("dogs") 27 | self.collector.collect() 28 | 29 | data = self.collector.get("counter:dogs") 30 | self.assertEquals((int(my_time - (2 * 60)), 0), data[57]) 31 | self.assertEquals((int(my_time - 60), 3), data[58]) 32 | self.assertEquals((int(my_time), 1), data[59]) 33 | 34 | -------------------------------------------------------------------------------- /ostrich/time_series_collector.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import time 4 | 5 | from ostrich import stats 6 | from ostrich.timing import Timing, TimingStat 7 | from ostrich.histogram import Histogram 8 | 9 | class TimeSeries(object): 10 | def __init__(self, size, empty=0): 11 | self.data = [empty] * size 12 | self.size = size 13 | self.index = 0 14 | 15 | def add(self, n): 16 | self.data[self.index] = n 17 | self.index = (self.index + 1) % self.size 18 | 19 | def to_list(self): 20 | return self.data[self.index:] + self.data[:self.index] 21 | 22 | class TimeSeriesCollector(object): 23 | def __init__(self): 24 | self.hourly = {} 25 | self.hourly_timings = {} 26 | self.last_collection = time.time() 27 | self.stats = stats.fork() 28 | 29 | def collect(self): 30 | def get_or_add(key, new=0, d=self.hourly): 31 | value = d.get(key) 32 | if value is None: 33 | value = TimeSeries(60, new) 34 | d[key] = value 35 | return value 36 | 37 | for k, v in stats.get_gauge_stats().items(): 38 | get_or_add("gauge:%s" % k).add(v) 39 | 40 | for k, v in self.stats.get_counter_stats(reset=True).items(): 41 | get_or_add("counter:%s" % k).add(v) 42 | 43 | for k, v in self.stats.get_timing_stats(reset=True).items(): 44 | get_or_add("timing:%s" % k, None, d=self.hourly_timings).add(v) 45 | 46 | self.last_collection = time.time() 47 | 48 | def start_twisted(self, collect_every=60): 49 | from twisted.internet import task 50 | self.task = task.LoopingCall(self.collect) 51 | self.task.start(collect_every) 52 | return self.task 53 | 54 | def stop_twisted(self): 55 | self.task.stop() 56 | 57 | def get_combined(self, name, series=False): 58 | if name.startswith("counter:"): 59 | if series: 60 | return self.get(name) 61 | else: 62 | counter = self.stats.get_counter(name[8:]).get() 63 | if name in self.hourly: 64 | counter += sum(self.hourly[name].to_list()) 65 | return counter 66 | elif name.startswith("timing:"): 67 | timing = Timing() 68 | timing.add(self.stats.get_timing(name[7:]).get()) 69 | if name in self.hourly_timings: 70 | for v in self.hourly_timings[name].to_list(): 71 | if v: 72 | timing.add(v) 73 | return timing.get() 74 | else: 75 | raise NotImplemented("Only counters and timings supported") 76 | 77 | def get(self, name): 78 | times = [int(self.last_collection + ((i - 59) * 60)) for i in xrange(60)] 79 | if name.startswith("counter:"): 80 | return zip(times, self.hourly.get(name, TimeSeries(60, 0)).to_list()) \ 81 | + [(time.time(), self.stats.get_counter(name[8:]).get())] 82 | elif name.startswith("timing:"): 83 | return zip(times, [v or TimingStat(histogram=Histogram()) for v in self.hourly_timings.get(name, TimeSeries(60, None)).to_list()]) \ 84 | + [(time.time(), self.stats.get_timing(name[7:]).get())] 85 | else: 86 | raise NotImplemented("Only counters and timings supported") 87 | 88 | def keys(self): 89 | return self.hourly.keys() + self.hourly_timings.keys() 90 | -------------------------------------------------------------------------------- /ostrich/timing.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import math 3 | import threading 4 | 5 | from ostrich.histogram import Histogram 6 | 7 | class Timing(object): 8 | def __init__(self): 9 | self.max = 0 10 | self.min = sys.maxint 11 | self.count = 0 12 | self.histogram = Histogram() 13 | self.mean = 0.0 14 | self.partial_variance = 0.0 15 | self.lock = threading.Lock() 16 | 17 | def clear(self): 18 | """Resets the state of this Timing. Clears the durations and counts 19 | collected so far. 20 | """ 21 | with self.lock: 22 | self.max = 0 23 | self.min = sys.maxint 24 | self.count = 0 25 | self.histogram.clear() 26 | 27 | def add(self, n): 28 | if isinstance(n, TimingStat): 29 | return self.add_timing_stat(n) 30 | else: 31 | return self.add_duration(n) 32 | 33 | def add_duration(self, n): 34 | """Adds a duration to our current Timing.""" 35 | with self.lock: 36 | if n > -1: 37 | self.max = max(self.max, n) 38 | self.min = min(self.min, n) 39 | self.count += 1 40 | self.histogram.add(n) 41 | if self.count == 1: 42 | self.mean = float(n) 43 | self.partial_variance = 0.0 44 | else: 45 | new_mean = self.mean + (n - self.mean) / self.count 46 | self.partial_variance += (n - self.mean) * (n - new_mean) 47 | self.mean = new_mean 48 | else: 49 | # TODO: warning? 50 | pass 51 | return self.count 52 | 53 | def add_timing_stat(self, timing_stat): 54 | """Add a summarized set of timings.""" 55 | with self.lock: 56 | if timing_stat.count > 0: 57 | # (comment from Scala ostrich) these equations end up using the sum again, and may be lossy. i couldn't find or think of 58 | # a better way. 59 | new_mean = (self.mean * self.count + timing_stat.mean * timing_stat.count) / (self.count + timing_stat.count) 60 | self.partial_variance = self.partial_variance + timing_stat.partial_variance + \ 61 | (self.mean - new_mean) * self.mean * self.count + \ 62 | (timing_stat.mean - new_mean) * timing_stat.mean * timing_stat.count 63 | self.mean = new_mean 64 | self.count += timing_stat.count 65 | self.max = max(self.max, timing_stat.max) 66 | self.min = min(self.min, timing_stat.min) 67 | if timing_stat.histogram is not None: 68 | self.histogram.merge(timing_stat.histogram) 69 | 70 | def get(self, reset=False): 71 | """Returns a TimingStat for the measured event.""" 72 | with self.lock: 73 | try: 74 | return TimingStat(self.count, self.max, self.min, self.mean, self.partial_variance, self.histogram.clone()) 75 | finally: 76 | if reset: 77 | self.max = 0 78 | self.min = sys.maxint 79 | self.count = 0 80 | self.histogram.clear() 81 | 82 | class TimingStat(object): 83 | """A pre-calculated timing. If you have timing stats from an external source but 84 | still want to report them via the Stats interface, use this. 85 | 86 | Partial variance is `(count - 1)(s^2)`, or `sum(x^2) - sum(x) * mean`. 87 | """ 88 | def __init__(self, count=0, max=0, min=0, mean=0.0, partial_variance=0.0, histogram=None): 89 | self.count = count 90 | self.min = min if count > 0 else 0 91 | self.max = max if count > 0 else 0 92 | self.average = int(mean) if count > 0 else 0 93 | self.mean = mean if count > 0 else 0.0 94 | self.partial_variance = partial_variance if count > 1 else 0.0 95 | self.variance = (partial_variance / (count - 1)) if count > 1 else 0.0 96 | self.std_dev = round(math.sqrt(self.variance)) 97 | self.histogram = histogram 98 | 99 | def __eq__(self, other): 100 | return self.count == other.count and self.max == other.max and \ 101 | self.min == other.min and self.average == other.average and self.variance == other.variance 102 | 103 | def to_raw_dict(self, histogram=False): 104 | d = dict(count=self.count, max=self.max, min=self.min, 105 | mean=self.mean, partial_variance=self.partial_variance) 106 | if histogram and self.histogram: 107 | # strip off all the zeros at the end of the histogram 108 | histogram = list(self.histogram.buckets) 109 | while histogram and histogram[-1] == 0: 110 | histogram = histogram[:-1] 111 | d['histogram'] = histogram 112 | return d 113 | 114 | def to_dict_no_histogram(self): 115 | return dict(count=self.count, maximum=self.max, minimum=self.min, 116 | average=self.average, standard_deviation=long(self.std_dev)) 117 | 118 | def to_dict(self, percentiles=True, raw_histogram=False): 119 | d = self.to_dict_no_histogram() 120 | if self.histogram: 121 | h = self.histogram 122 | d.update(p25=h.get_percentile(0.25), 123 | p50=h.get_percentile(0.50), 124 | p75=h.get_percentile(0.75), 125 | p90=h.get_percentile(0.90), 126 | p99=h.get_percentile(0.99), 127 | p999=h.get_percentile(0.999), 128 | p9999=h.get_percentile(0.9999)) 129 | if raw_histogram: 130 | # strip off all the zeros at the end of the histogram 131 | histogram = list(h.buckets) 132 | while histogram and histogram[-1] == 0: 133 | histogram = histogram[:-1] 134 | d.update(histogram=histogram) 135 | return d 136 | 137 | @classmethod 138 | def from_raw_dict(cls, d, bucket_offsets=None): 139 | histogram = None 140 | if d.has_key('histogram'): 141 | histogram = Histogram.from_list(d['histogram'], bucket_offsets) 142 | return TimingStat(d['count'], d['max'], d['min'], d['mean'], d['partial_variance'], histogram) 143 | 144 | def __repr__(self): 145 | return self.__str__() 146 | 147 | def __str__(self): 148 | return "(" + ", ".join(["%s=%d" % (k, v) for k, v in sorted(self.to_dict().items())]) + ")" 149 | -------------------------------------------------------------------------------- /ostrich/twisted/__init__.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from twisted.web import static 4 | from twisted.web.resource import Resource 5 | 6 | from ostrich import stats 7 | from ostrich.time_series_collector import TimeSeriesCollector 8 | from ostrich.timing import TimingStat 9 | 10 | def respond(request, data, code=200): 11 | data = json.dumps(data, default=stats.json_encoder) 12 | if 'callback' in request.args: 13 | request.setResponseCode(200) 14 | request.setHeader('Content-Type', 'application/javascript') 15 | return request.args['callback'][0] + "(" + data + ")\n" 16 | else: 17 | request.setResponseCode(code) 18 | request.setHeader('Content-Type', 'application/json') 19 | return data + "\n" 20 | 21 | class StatsResource(Resource): 22 | def render_GET(self, request): 23 | reset = int(request.args.get('reset', [0])[0]) 24 | data = stats.stats(reset=reset) 25 | return respond(request, data) 26 | 27 | class StatsTimeSeriesResource(StatsResource): 28 | def __init__(self, collect_every=60): 29 | Resource.__init__(self) 30 | self.collector = TimeSeriesCollector() 31 | self.collector.start_twisted(collect_every=collect_every) 32 | 33 | self.putChild('graph', static.Data(GRAPH_HTML.strip(), "text/html")) 34 | self.putChild('graph_data', TimeSeriesDataResource(self.collector)) 35 | self.putChild('combined', TimeSeriesCombinedResource(self.collector)) 36 | 37 | # def render_GET(self, request): 38 | # reset = int(request.args.get('reset', [0])[0]) 39 | # return json.dumps(stats.stats(reset=reset), default=stats.json_encoder) 40 | 41 | class TimeSeriesDataResource(Resource): 42 | isLeaf = True 43 | 44 | def __init__(self, collector): 45 | self.collector = collector 46 | 47 | def render_GET(self, request): 48 | if len(request.postpath) == 0: 49 | return respond(request, {'keys': self.collector.keys(), 'stats': self.collector.stats.stats()}) 50 | else: 51 | def convert(v): 52 | if isinstance(v, TimingStat): 53 | return v.to_dict() 54 | return v 55 | 56 | name = '/'.join(request.postpath) 57 | 58 | output = {} 59 | try: 60 | for n in name.split(','): 61 | output[n] = [(date, convert(value)) for date, value in self.collector.get(n)] 62 | except KeyError: 63 | return respond(request, dict(code=404, error="Not Found"), code=404); 64 | 65 | if ',' in name: 66 | return respond(request, output) 67 | else: 68 | return respond(request, output[name]) 69 | 70 | class TimeSeriesCombinedResource(Resource): 71 | isLeaf = True 72 | 73 | def __init__(self, collector): 74 | self.collector = collector 75 | 76 | def render_GET(self, request): 77 | series = request.args.get('series', ['false'])[0] == 'true' 78 | if len(request.postpath) == 0: 79 | data = {} 80 | data['timings'] = dict((name, self.collector.get_combined("timing:%s" % name).to_dict()) for name in self.collector.stats.timings.keys()) 81 | data['counters'] = dict((name, self.collector.get_combined("counter:%s" % name, series=series)) for name in self.collector.stats.counters.keys()) 82 | return respond(request, data) 83 | else: 84 | name = '/'.join(request.postpath) 85 | 86 | data = {} 87 | try: 88 | for n in name.split(','): 89 | data[n] = self.collector.get_combined(name, series=series).to_dict() 90 | except KeyError: 91 | return respond(request, dict(code=404, error="Not Found"), code=404); 92 | 93 | if ',' in name: 94 | return respond(request, data) 95 | else: 96 | return respond(request, data[name]) 97 | 98 | GRAPH_HTML = """ 99 | 100 | 101 |
102 | 103 | 104 | 105 | 132 | 133 | 134 | 135 |