├── .gitignore
├── .python-version
├── .travis.yml
├── LICENSE
├── Makefile
├── README.md
├── bin
├── kenshin-cache-query.py
├── kenshin-change-schema.py
├── kenshin-debug.py
├── kenshin-delete.py
├── kenshin-fetch.py
├── kenshin-find-file.py
├── kenshin-get-metrics.py
├── kenshin-info.py
├── kenshin-rebuild-index.py
├── kenshin-rebuild-link.py
├── kenshin-rebuild.py
├── kenshin-rehash.py
├── kenshin-restart.py
├── kenshin-send-zero-metric.py
└── rurouni-cache.py
├── conf
├── rurouni.conf.example
└── storage-schemas.conf.example
├── examples
├── metric_stresser.py
├── rurouni-client.py
└── rurouni-pickle-client.py
├── img
├── docs
│ ├── cluster-before.png
│ ├── cluster-now.png
│ ├── kenshin-structure.png
│ └── query_perf.png
├── kenshin-perf.png
└── kenshin.gif
├── kenshin
├── __init__.py
├── agg.py
├── consts.py
├── storage.py
├── tools
│ ├── __init__.py
│ ├── hash.py
│ └── whisper_tool.py
└── utils.py
├── misc
├── init_setup_demo.sh
├── update_version.sh
└── versioning.py
├── requirements-dev.txt
├── requirements.txt
├── rurouni
├── __init__.py
├── cache.py
├── conf.py
├── exceptions.py
├── fnv1a.pyx
├── log.py
├── protocols.py
├── service.py
├── state
│ ├── __init__.py
│ ├── events.py
│ └── instrumentation.py
├── storage.py
├── utils.py
└── writer.py
├── setup.py
├── tests
├── __init__.py
├── test_agg.py
├── test_fnv1a.py
├── test_io_performance.py
└── test_storage.py
└── twisted
└── plugins
└── rurouni_cache_plugin.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *.confc
5 | *~
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 |
26 | # PyInstaller
27 | # Usually these files are written by a python script from a template
28 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 |
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 |
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .cache
41 | nosetests.xml
42 | coverage.xml
43 |
44 | # Translations
45 | *.mo
46 | *.pot
47 |
48 | # Django stuff:
49 | *.log
50 |
51 | # Sphinx documentation
52 | docs/_build/
53 |
54 | # PyBuilder
55 | target/
56 |
57 | # twisted plugin
58 | *.cache
59 |
60 | # test data
61 | data/
62 |
63 | # ctags
64 | .tags_sorted_by_file
65 | .tags
66 |
67 | # venv
68 | venv/
69 |
70 | rurouni/fnv1a.c
71 |
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 2.7.12
2 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | python:
4 | - "2.7"
5 |
6 | install:
7 | - pip install --upgrade pip
8 | - pip install --upgrade setuptools
9 | - pip install -r requirements-dev.txt
10 | - python setup.py build_ext --inplace
11 | - python setup.py install
12 |
13 | script: nosetests
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright 2008-2012 Chris Davis; 2011-2016 The Graphite Project
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | init:
2 | test -d ${KenshinVenv} || virtualenv ${KenshinVenv}
3 |
4 | install: init
5 | @source ${KenshinVenv}/bin/activate; pip install -r requirements.txt
6 | @source ${KenshinVenv}/bin/activate; python setup.py install
7 |
8 | restart_rurouni:
9 | source ${KenshinVenv}/bin/activate; python bin/kenshin-restart.py
10 |
11 | restart_relay:
12 | find /service -name 'carbon-relay-*' | xargs -rL 1 svc -t
13 |
14 | restart_web:
15 | find /service -name 'graphite-*' | xargs -rL 1 svc -t
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Kenshin
2 | =============
3 |
4 | [](https://travis-ci.org/douban/Kenshin)
5 |
6 | > Kenshin ([るろうに剣心](http://zh.wikipedia.org/wiki/%E6%B5%AA%E5%AE%A2%E5%89%91%E5%BF%83))
7 |
8 | Kenshin project consists of two major components:
9 |
10 | - `Kenshin` is a fixed-size time-series database format, similar in design to [Whisper](https://github.com/graphite-project/whisper), it's an alternative to Whisper for [Graphite](https://github.com/graphite-project) storage component. Whisper performs lots of tiny I/O operations on lots of different files, Kenshin is aiming to improve the I/O performance. For more design details please refer to [design docs](https://github.com/douban/Kenshin/wiki/design) (Chinese) and QCon 2016 Presentation [slide](https://github.com/zzl0/zzl0.github.com/raw/master/files/QCon-Kenshin.pdf).
11 |
12 | - `Rurouni-cache` is a storage agent that sits in front of kenshin to batch up writes to files to make them more sequential, rurouni-cache is to kenshin as carbon-cache is to whisper.
13 |
14 | Kenshin is developing and maintaining by Douban Inc. Currently, it is working in production environment, powering all metrics (host, service, DAE app, user defined) in douban.com.
15 |
16 |
17 | ### What's the performance of Kenshin?
18 |
19 |
20 | In our environment, after using Kenshin, the IOPS is decreased by 97.5%, and the query latency is not significantly slower than Whisper.
21 |
22 |
23 |
24 |
25 | Quick Start
26 | ------------------
27 |
28 | We recommend using virtualenv when installing dependencies:
29 |
30 | $ git clone https://github.com/douban/Kenshin.git
31 | $ cd Kenshin
32 | $ virtualenv venv
33 | $ source venv/bin/activate
34 | $ pip install -r requirements.txt
35 |
36 | Tests can be run using nosetests:
37 |
38 | $ nosetests -v
39 |
40 | Setup configuration
41 |
42 | $ misc/init_setup_demo.sh
43 |
44 | Setup Kenshin
45 |
46 | $ python setup.py build_ext --inplace && python setup.py install
47 |
48 | Start two rurouni-cache instances
49 |
50 | $ python bin/rurouni-cache.py --debug --config=conf/rurouni.conf --instance=0 start
51 | $ python bin/rurouni-cache.py --debug --config=conf/rurouni.conf --instance=1 start
52 |
53 | Then go to [Graphite-Kenshin](https://github.com/douban/graphite-kenshin) for starting Web instances.
54 |
55 | FAQ
56 | ----------
57 |
58 |
59 | ### Why don't you just use whisper?
60 |
61 | Whisper is great, and initially we did use it. Over time though, we ran into several issues:
62 |
63 | 1. Whisper using a lot of IO. There are serval reasons:
64 | - Using one file per metric.
65 | - Realtime downsample feature (different data resolutions based on age) causes a lot of extra IO
66 | 2. Carbon-cache & Carbon-relay is inefficient and even is cpu-bound. We didn't write our own carbon-relay, but replaced carbon-relay with [carbon-c-relay](https://github.com/grobian/carbon-c-relay).
67 |
68 |
69 | ### Why did you totally rewrite whisper? Couldn't you just submit a patch?
70 |
71 | The reason I didn't simply submit a patch for Whisper is that kenshin's design is incompatible with Whisper's design. Whisper using one file per metric. Kenshin on the other hand merge N metrics into one file.
72 |
73 | ### How to intergrate Kenshin with Graphite-Web?
74 |
75 | We use write a plugin for Graphite-API named [Graphite-Kenshin](https://github.com/douban/graphite-kenshin)
76 |
77 | Acknowledgments
78 | ------------------
79 |
80 | - Thanks for the [Graphite](https://github.com/graphite-project) project.
81 | - Thanks to [@grobian](https://github.com/grobian) for the [carbon-c-relay](https://github.com/grobian/carbon-c-relay) project.
82 |
83 |
84 | Contributors
85 | ---------------
86 |
87 | - [@zzl0](https://github.com/zzl0)
88 | - [@mckelvin](https://github.com/mckelvin)
89 | - [@windreamer](https://github.com/windreamer)
90 | - [@youngsofun](https://github.com/youngsofun)
91 | - [@camper42](https://github.com/camper42)
92 |
93 | License
94 | -------
95 |
96 | Kenshin is licensed under version 2.0 of the Apache License. See the [LICENSE](/LICENSE) file for details.
97 |
--------------------------------------------------------------------------------
/bin/kenshin-cache-query.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import argparse
5 | import struct
6 | import socket
7 | import cPickle as pickle
8 | from ConfigParser import ConfigParser
9 |
10 | from rurouni.utils import get_instance_of_metric
11 |
12 |
13 | def gen_rurouni_query_port(conf_file):
14 | parser = ConfigParser()
15 | parser.read(conf_file)
16 | prefix = 'cache:'
17 | rs = {}
18 | for s in parser.sections():
19 | if s.startswith(prefix):
20 | instance = int(s[len(prefix):])
21 | for k, v in parser.items(s):
22 | k = k.upper()
23 | if k == 'CACHE_QUERY_PORT':
24 | rs[instance] = int(v)
25 | return rs
26 |
27 |
28 | def main():
29 | parser = argparse.ArgumentParser()
30 | parser.add_argument('--server', default='127.0.0.1',
31 | help="server's host(or ip).")
32 | parser.add_argument('--num', type=int, default=3,
33 | help='number of rurouni caches.')
34 | parser.add_argument('--conf',
35 | help='rurouni-cache conf file path.')
36 | parser.add_argument('metric', help="metric name.")
37 | args = parser.parse_args()
38 |
39 | rurouni_query_ports = gen_rurouni_query_port(args.conf)
40 | server = args.server
41 | metric = args.metric
42 | num = args.num
43 | port_idx = get_instance_of_metric(metric, num)
44 | port = rurouni_query_ports[port_idx]
45 |
46 | conn = socket.socket()
47 | try:
48 | conn.connect((server, port))
49 | except socket.error:
50 | raise SystemError("Couldn't connect to %s on port %s" %
51 | (server, port))
52 |
53 | request = {
54 | 'type': 'cache-query',
55 | 'metric': metric,
56 | }
57 |
58 | serialized_request = pickle.dumps(request, protocol=-1)
59 | length = struct.pack('!L', len(serialized_request))
60 | request_packet = length + serialized_request
61 |
62 | try:
63 | conn.sendall(request_packet)
64 | rs = recv_response(conn)
65 | print rs
66 | except Exception as e:
67 | raise e
68 |
69 |
70 | def recv_response(conn):
71 | length = recv_exactly(conn, 4)
72 | body_size = struct.unpack('!L', length)[0]
73 | body = recv_exactly(conn, body_size)
74 | return pickle.loads(body)
75 |
76 |
77 | def recv_exactly(conn, num_bytes):
78 | buf = ''
79 | while len(buf) < num_bytes:
80 | data = conn.recv(num_bytes - len(buf))
81 | if not data:
82 | raise Exception("Connection lost.")
83 | buf += data
84 | return buf
85 |
86 |
87 | if __name__ == '__main__':
88 | main()
89 |
--------------------------------------------------------------------------------
/bin/kenshin-change-schema.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | import sys
4 | import os
5 | import time
6 | import glob
7 | import struct
8 |
9 | import kenshin
10 | from kenshin.consts import NULL_VALUE
11 | from kenshin.agg import Agg
12 | from rurouni.storage import loadStorageSchemas
13 |
14 |
15 | # Three action types.
16 | NO_OPERATION, CHANGE_META, REBUILD = range(3)
17 |
18 |
19 | def get_schema(storage_schemas, schema_name):
20 | for schema in storage_schemas:
21 | if schema.name == schema_name:
22 | return schema
23 |
24 |
25 | def resize_data_file(schema, data_file):
26 | print data_file
27 | with open(data_file) as f:
28 | header = kenshin.header(f)
29 | retentions = schema.archives
30 | old_retentions = [(x['sec_per_point'], x['count'])
31 | for x in header['archive_list']]
32 | msg = []
33 | action = NO_OPERATION
34 |
35 | # x files factor
36 | if schema.xFilesFactor != header['x_files_factor']:
37 | action = CHANGE_META
38 | msg.append("x_files_factor: %f -> %f" %
39 | (header['x_files_factor'], schema.xFilesFactor))
40 |
41 | # agg method
42 | old_agg_name = Agg.get_agg_name(header['agg_id'])
43 | if schema.aggregationMethod != old_agg_name:
44 | action = CHANGE_META
45 | msg.append("agg_name: %s -> %s" %
46 | (old_agg_name, schema.aggregationMethod))
47 |
48 | # retentions
49 | if retentions != old_retentions:
50 | action = REBUILD
51 | msg.append("retentions: %s -> %s" % (old_retentions, retentions))
52 |
53 | if action == NO_OPERATION:
54 | print "No operation needed."
55 | return
56 |
57 | elif action == CHANGE_META:
58 | print 'Change Meta.'
59 | print '\n'.join(msg)
60 | change_meta(data_file, schema, header['max_retention'])
61 | return
62 |
63 | elif action == REBUILD:
64 | print 'Rebuild File.'
65 | print '\n'.join(msg)
66 | rebuild(data_file, schema, header, retentions)
67 |
68 | else:
69 | raise ValueError(action)
70 |
71 |
72 | def change_meta(data_file, schema, max_retention):
73 | with open(data_file, 'r+b') as f:
74 | format = '!2Lf'
75 | agg_id = Agg.get_agg_id(schema.aggregationMethod)
76 | xff = schema.xFilesFactor
77 | packed_data = struct.pack(format, agg_id, max_retention, xff)
78 | f.write(packed_data)
79 |
80 |
81 | def rebuild(data_file, schema, header, retentions):
82 | now = int(time.time())
83 | tmpfile = data_file + '.tmp'
84 | if os.path.exists(tmpfile):
85 | print "Removing previous temporary database file: %s" % tmpfile
86 | os.unlink(tmpfile)
87 |
88 | print "Creating new kenshin database: %s" % tmpfile
89 | kenshin.create(tmpfile,
90 | header['tag_list'],
91 | retentions,
92 | schema.xFilesFactor,
93 | schema.aggregationMethod)
94 |
95 | size = os.stat(tmpfile).st_size
96 | old_size = os.stat(data_file).st_size
97 |
98 | print "Created: %s (%d bytes, was %d bytes)" % (
99 | tmpfile, size, old_size)
100 |
101 | print "Migrating data to new kenshin database ..."
102 | for archive in header['archive_list']:
103 | from_time = now - archive['retention'] + archive['sec_per_point']
104 | until_time = now
105 | _, timeinfo, values = kenshin.fetch(data_file, from_time, until_time)
106 | datapoints = zip(range(*timeinfo), values)
107 | datapoints = [[p[0], list(p[1])] for p in datapoints if p[1]]
108 | for _, values in datapoints:
109 | for i, v in enumerate(values):
110 | if v is None:
111 | values[i] = NULL_VALUE
112 | kenshin.update(tmpfile, datapoints)
113 | backup = data_file + ".bak"
114 |
115 | print 'Renaming old database to: %s' % backup
116 | os.rename(data_file, backup)
117 |
118 | print "Renaming new database to: %s" % data_file
119 | try:
120 | os.rename(tmpfile, data_file)
121 | except Exception as e:
122 | print "Operation failed, restoring backup"
123 | os.rename(backup, data_file)
124 | raise e
125 | # Notice: by default, '.bak' files are not deleted.
126 |
127 |
128 | def main():
129 | usage = ("e.g: kenshin-change-schema.py -d ../graphite-root/conf/ -n default -f '../graphite-root/storage/data/*/default/*.hs'\n"
130 | "Note: kenshin combined many metrics to one file, "
131 | " please check file's meta data before you change it. "
132 | " (use keshin-info.py to view file's meta data)")
133 |
134 | import argparse
135 | parser = argparse.ArgumentParser(description=usage,
136 | formatter_class=argparse.RawTextHelpFormatter)
137 | parser.add_argument(
138 | "-d", "--conf-dir", required=True, help="kenshin conf directory.")
139 | parser.add_argument(
140 | "-n", "--schema-name", required=True, help="schema name.")
141 | parser.add_argument(
142 | "-f", "--files", required=True,
143 | help="metric data file paterns. (e.g. /data/kenshin/storage/data/*/mfs/*.hs)")
144 | args = parser.parse_args()
145 |
146 | storage_conf_path = os.path.join(args.conf_dir, 'storage-schemas.conf')
147 | storage_schemas = loadStorageSchemas(storage_conf_path)
148 | schema = get_schema(storage_schemas, args.schema_name)
149 | if not schema:
150 | print 'not matched schema name: %s' % args.schema_name
151 | sys.exit(1)
152 | for f in sorted(glob.glob(args.files)):
153 | resize_data_file(schema, os.path.abspath(f))
154 |
155 |
156 | if __name__ == '__main__':
157 | main()
158 |
--------------------------------------------------------------------------------
/bin/kenshin-debug.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import argparse
5 | import struct
6 | import kenshin
7 | from datetime import datetime
8 | from kenshin.utils import get_metric
9 |
10 |
11 | def timestamp_to_datestr(ts):
12 | try:
13 | d = datetime.fromtimestamp(ts)
14 | return d.strftime('%Y-%m-%d %H:%M:%S')
15 | except:
16 | return 'invalid timestamp'
17 |
18 |
19 | def get_point(fh, offset, size, format):
20 | fh.seek(offset)
21 | data = fh.read(size)
22 | return struct.unpack(format, data)
23 |
24 |
25 | def run(filepath, archive_idx, point_idx, error):
26 | with open(filepath) as f:
27 | header = kenshin.header(f)
28 | archive = header['archive_list'][archive_idx]
29 | point_size = header['point_size']
30 | point_format = header['point_format']
31 |
32 | start_offset = archive['offset'] + point_idx * point_size
33 | if point_idx < 0:
34 | start_offset += archive['size']
35 |
36 | point = get_point(f, start_offset, point_size, point_format)
37 | print 'count: %s' % archive['count']
38 |
39 | if not error:
40 | metric = get_metric(filepath)
41 | date_str = timestamp_to_datestr(point[0])
42 | if metric:
43 | idx = header['tag_list'].index(metric)
44 | return (point[0], point[idx + 1]), date_str
45 |
46 | else:
47 | return point, date_str
48 | else:
49 | sec_per_point = archive['sec_per_point']
50 | ts = point[0]
51 | start_offset += point_size
52 | point_idx += 1
53 | while start_offset < archive['size'] + archive['offset']:
54 | point = get_point(f, start_offset, point_size, point_format)
55 | if point[0] != ts + sec_per_point:
56 | return point_idx
57 | start_offset += point_size
58 | point_idx += 1
59 | ts = point[0]
60 | return 'No error!'
61 |
62 |
63 | def main():
64 | parser = argparse.ArgumentParser(description="debug kenshin file")
65 | parser.add_argument('filepath', help="metric file path")
66 | parser.add_argument('archive_idx', type=int, help="the archive index")
67 | parser.add_argument('point_idx', type=int, help="the point index")
68 | parser.add_argument('-e', '--error', action="store_true", help="run until meet an unexpected point (empty or error)")
69 |
70 | args = parser.parse_args()
71 | print run(args.filepath, args.archive_idx, args.point_idx, args.error)
72 |
73 |
74 | if __name__ == '__main__':
75 | main()
76 |
--------------------------------------------------------------------------------
/bin/kenshin-delete.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | import os
4 | import sys
5 | import glob
6 | import shutil
7 | from subprocess import check_output
8 |
9 | from kenshin import header, pack_header
10 | from kenshin.agg import Agg
11 |
12 | from rurouni.storage import getFilePathByInstanceDir, getMetricPathByInstanceDir
13 |
14 |
15 | METRIC_NAME, SCHEMA_NAME, FILE_ID, POS_IDX = range(4)
16 |
17 |
18 | def try_to_delete_empty_directory(path):
19 | dirname = os.path.dirname(path)
20 | try:
21 | os.rmdir(dirname)
22 | try_to_delete_empty_directory(dirname)
23 | except OSError:
24 | pass
25 |
26 |
27 | def delete_links(storage_dir, metric_file):
28 | with open(metric_file) as f:
29 | for line in f:
30 | line = line.strip()
31 | bucket, schema_name, fid, pos, metric = line.split(" ")
32 | bucket_link_dir = os.path.join(storage_dir, 'link', bucket)
33 | path = getMetricPathByInstanceDir(bucket_link_dir, metric)
34 | if os.path.exists(path):
35 | os.remove(path)
36 | try_to_delete_empty_directory(path)
37 |
38 |
39 | def delete_file(storage_dir, index, pos_metrics):
40 | """
41 | Note: We do not delete the data file, just delete the tags in data file,
42 | so the space can reused by new metric.
43 | """
44 | bucket, schema_name, fid = index
45 | bucket_data_dir = os.path.join(storage_dir, 'data', bucket)
46 | filepath = getFilePathByInstanceDir(bucket_data_dir, schema_name, fid)
47 |
48 | with open(filepath, "r+b") as fh:
49 | header_info = header(fh)
50 | tag_list = header_info["tag_list"]
51 | reserved_size = header_info["reserved_size"]
52 | archive_list = [(a["sec_per_point"], a["count"])
53 | for a in header_info["archive_list"]]
54 | agg_name = Agg.get_agg_name(header_info["agg_id"])
55 |
56 | released_size = 0
57 | for pos_idx, tag in pos_metrics:
58 | if tag == tag_list[pos_idx]:
59 | tag_list[pos_idx] = ""
60 | released_size += len(tag)
61 | elif tag_list[pos_idx] != "":
62 | print >>sys.stderr, "tag not match: (%s, %d)" % (tag, pos_idx)
63 |
64 | if released_size != 0:
65 | inter_tag_list = tag_list + ["N" * (reserved_size + released_size)]
66 | packed_header, _ = pack_header(inter_tag_list,
67 | archive_list,
68 | header_info["x_files_factor"],
69 | agg_name)
70 | fh.write(packed_header)
71 |
72 |
73 | def delete(storage_dir, metric_file):
74 | with open(metric_file) as f:
75 | group = []
76 | last_index = None
77 | for line in f:
78 | line = line.strip()
79 | bucket, schema_name, fid, pos, metric = line.split(" ")
80 | fid = int(fid)
81 | pos = int(pos)
82 | index = (bucket, schema_name, fid)
83 | if index == last_index:
84 | group.append((pos, metric))
85 | else:
86 | if last_index is not None:
87 | delete_file(storage_dir, last_index, group)
88 | group = [(pos, metric)]
89 | last_index = index
90 | if last_index is not None:
91 | delete_file(storage_dir, last_index, group)
92 |
93 | # delete metric-test directory
94 | metric_test_dirs = glob.glob(os.path.join(storage_dir, '*', 'metric-test'))
95 | for d in metric_test_dirs:
96 | shutil.rmtree(d)
97 |
98 |
99 | def sort_metric_file(metric_file):
100 | sorted_metric_file = "%s.sorted" % metric_file
101 | check_output("sort %s > %s" % (metric_file, sorted_metric_file), shell=True)
102 | return sorted_metric_file
103 |
104 |
105 | def main():
106 | import argparse
107 | parser = argparse.ArgumentParser()
108 | parser.add_argument(
109 | "-s", "--storage-dir",
110 | help="Kenshin storage directory."
111 | )
112 | parser.add_argument(
113 | "-m", "--metric-file",
114 | help=("Metrics that need to be deleted, "
115 | "use kenshin-get-metrics.py to generate this file")
116 | )
117 | parser.add_argument(
118 | "--only-link",
119 | action="store_true",
120 | help="Only delete link files."
121 | )
122 | args = parser.parse_args()
123 |
124 | sorted_metric_file = sort_metric_file(args.metric_file)
125 | delete_links(args.storage_dir, sorted_metric_file)
126 | if not args.only_link:
127 | delete(args.storage_dir, sorted_metric_file)
128 |
129 |
130 | if __name__ == '__main__':
131 | main()
132 |
--------------------------------------------------------------------------------
/bin/kenshin-fetch.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | import sys
4 | import time
5 | import optparse
6 | import signal
7 | import kenshin
8 | from kenshin.utils import get_metric
9 |
10 | signal.signal(signal.SIGPIPE, signal.SIG_DFL)
11 |
12 |
13 | def main():
14 | NOW = int(time.time())
15 | YESTERDAY = NOW - 24 * 60 * 60
16 |
17 | usage = "%prog [options] path"
18 | option_parser = optparse.OptionParser(usage=usage)
19 | option_parser.add_option('--from',
20 | default=YESTERDAY,
21 | type=int,
22 | dest='_from',
23 | help="begin timestamp(default: 24 hours ago)")
24 | option_parser.add_option('--until',
25 | default=NOW,
26 | type=int,
27 | help="end timestamp")
28 |
29 | (options, args) = option_parser.parse_args()
30 | if len(args) != 1:
31 | option_parser.print_help()
32 | sys.exit(1)
33 |
34 | path = args[0]
35 | metric = get_metric(path)
36 | from_time = int(options._from)
37 | until_time = int(options.until)
38 |
39 | header, timeinfo, points = kenshin.fetch(path, from_time, until_time, NOW)
40 | start, end, step = timeinfo
41 |
42 | if metric:
43 | idx = header['tag_list'].index(metric)
44 | points = (p[idx] if p else None for p in points)
45 |
46 | t = start
47 | for p in points:
48 | print "%s\t%s" % (t, p)
49 | t += step
50 |
51 |
52 | if __name__ == '__main__':
53 | main()
54 |
--------------------------------------------------------------------------------
/bin/kenshin-find-file.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | import os
4 | import re
5 | import glob
6 | from collections import defaultdict
7 |
8 | default_black_list = [
9 | '.*metric_test.*',
10 | '^rurouni\.',
11 | '^carbon\.',
12 | '^stats\.counters\..*\.count$',
13 | ]
14 |
15 |
16 | def main():
17 | import argparse
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument(
20 | '-d', '--data-dir', required=True,
21 | help="data directory.")
22 | parser.add_argument(
23 | '-f', '--black-list-file',
24 | help="black list (regular expression for metric) file."
25 | )
26 | args = parser.parse_args()
27 |
28 | idx_files = glob.glob(os.path.join(args.data_dir, '*.idx'))
29 | black_list_pattern = gen_black_list_pattern(args.black_list_file)
30 | for idx_file in idx_files:
31 | dir_, filename = os.path.split(idx_file)
32 | instance = os.path.splitext(filename)[0]
33 | for p in yield_kenshin_files(dir_, instance, idx_file, black_list_pattern):
34 | print p
35 |
36 |
37 | def gen_black_list_pattern(black_list_file):
38 | rs = []
39 | if not black_list_file:
40 | for x in default_black_list:
41 | rs.append(re.compile(x))
42 | else:
43 | with open(black_list_file) as f:
44 | for line in f:
45 | line = line.strip()
46 | if line:
47 | rs.append(re.compile(line))
48 | return rs
49 |
50 |
51 | def yield_kenshin_files(dir_, instance, idx_file, black_list_pattern):
52 | all_fids = defaultdict(set)
53 | del_fids = defaultdict(set)
54 | with open(idx_file) as f:
55 | for line in f:
56 | line = line.strip()
57 | if not line:
58 | continue
59 | try:
60 | metric, schema, fid, _ = line.split()
61 | fid = int(fid)
62 | except Exception:
63 | continue
64 | all_fids[schema].add(fid)
65 | for p in black_list_pattern:
66 | if p.match(metric):
67 | del_fids[schema].add(fid)
68 | break
69 | for schema in all_fids:
70 | valid_fids = all_fids[schema] - del_fids[schema]
71 | for i in sorted(valid_fids)[:-1]:
72 | path = os.path.join(dir_, instance, schema, '%s.hs' % i)
73 | yield path
74 |
75 |
76 | if __name__ == '__main__':
77 | main()
78 |
--------------------------------------------------------------------------------
/bin/kenshin-get-metrics.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | import re
4 | import os
5 | import glob
6 |
7 |
8 | def match_metrics(index_dir, regexps):
9 | index_files = glob.glob(os.path.join(index_dir, '*.idx'))
10 | for index in index_files:
11 | bucket = os.path.splitext(os.path.basename(index))[0]
12 | with open(index) as f:
13 | for line in f:
14 | line = line.strip()
15 | try:
16 | metric, schema_name, fid, pos = line.split(' ')
17 | except ValueError:
18 | pass
19 | for p in regexps:
20 | if p.match(metric):
21 | yield ' '.join([bucket, schema_name, fid, pos, metric])
22 | break
23 |
24 |
25 | def compile_regexp(regexp_file):
26 | with open(regexp_file) as f:
27 | for line in f:
28 | line = line.strip()
29 | if line and not line.startswith("#"):
30 | yield re.compile(line)
31 |
32 |
33 | def main():
34 | import argparse
35 | parser = argparse.ArgumentParser()
36 | parser.add_argument(
37 | '-d', '--dirs',
38 | required=True,
39 | help='directories that contain kenshin index files, seperated by comma.'
40 | )
41 | parser.add_argument(
42 | '-f', '--regexp-file',
43 | required=True,
44 | help='file that contain regular expressions.'
45 | )
46 | args = parser.parse_args()
47 |
48 | regexps = list(compile_regexp(args.regexp_file))
49 |
50 | for dir_ in args.dirs.split(","):
51 | for m in match_metrics(dir_, regexps):
52 | print m
53 |
54 |
55 | if __name__ == '__main__':
56 | main()
57 |
--------------------------------------------------------------------------------
/bin/kenshin-info.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 |
5 | from pprint import pprint
6 | import kenshin
7 |
8 |
9 | if __name__ == '__main__':
10 | import sys
11 | if len(sys.argv) < 2:
12 | print 'Usage: kenshin-info.py '
13 | sys.exit(1)
14 | path = sys.argv[1]
15 | with open(path) as f:
16 | pprint(kenshin.header(f))
17 |
--------------------------------------------------------------------------------
/bin/kenshin-rebuild-index.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import sys
5 | from rurouni.storage import rebuildIndex
6 |
7 |
8 | def main():
9 | if len(sys.argv) < 3:
10 | print 'need bucket_data_dir and bucket_index_file'
11 | sys.exit(1)
12 |
13 | data_dir, index_file = sys.argv[1:]
14 | rebuildIndex(data_dir, index_file)
15 |
16 |
17 | if __name__ == '__main__':
18 | main()
19 |
--------------------------------------------------------------------------------
/bin/kenshin-rebuild-link.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import os
5 | import sys
6 | import glob
7 | import errno
8 |
9 | import kenshin
10 | from kenshin.utils import mkdir_p
11 |
12 |
13 | def main():
14 | if len(sys.argv) < 3:
15 | print('Need data_dir and link_dir.\n'
16 | 'e.g.: kenshin-rebuild-link.py /kenshin/data/a /kenshin/link/a')
17 | sys.exit(1)
18 |
19 | data_dir, link_dir = sys.argv[1:]
20 | data_dir = os.path.abspath(data_dir)
21 | link_dir = os.path.abspath(link_dir)
22 |
23 | for schema_name in os.listdir(data_dir):
24 | hs_file_pat = os.path.join(data_dir, schema_name, '*.hs')
25 | for fp in glob.glob(hs_file_pat):
26 | with open(fp) as f:
27 | header = kenshin.header(f)
28 | metric_list = header['tag_list']
29 | for metric in metric_list:
30 | if metric != '':
31 | try:
32 | create_link(metric, link_dir, fp)
33 | except OSError as exc:
34 | if exc.errno == errno.ENAMETOOLONG:
35 | pass
36 | else:
37 | raise
38 |
39 |
40 | def create_link(metric, link_dir, file_path):
41 | link_path = metric.replace('.', os.path.sep)
42 | link_path = os.path.join(link_dir, link_path + '.hs')
43 | dirname = os.path.dirname(link_path)
44 | mkdir_p(dirname)
45 | if os.path.exists(link_path):
46 | os.remove(link_path)
47 | os.symlink(file_path, link_path)
48 |
49 |
50 | if __name__ == '__main__':
51 | main()
52 |
--------------------------------------------------------------------------------
/bin/kenshin-rebuild.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | import re
4 | import time
5 | from os.path import join
6 | from subprocess import check_output
7 |
8 |
9 | class Status(object):
10 | def __init__(self, status, pid, time):
11 | self.status = status
12 | self.pid = pid
13 | self.time = int(time)
14 |
15 | def __str__(self):
16 | return '' % (self.status, self.pid, self.time)
17 |
18 |
19 | def get_service_status(service_name):
20 | """Return Status(status, pid, time).
21 | e.g. Status('up', 1024, 12342), Status('down', None, 2)
22 | """
23 | cmd = ['svstat', service_name]
24 | out = check_output(cmd)
25 |
26 | down_pattern = r'down (\d+) seconds, normally up'
27 | up_pattern = r'up \(pid (\d+)\) (\d+) seconds'
28 |
29 | if re.search(up_pattern, out):
30 | pid, t = re.search(up_pattern, out).groups()
31 | return Status('up', pid, t)
32 | elif re.search(down_pattern, out):
33 | (t,) = re.search(down_pattern, out).groups()
34 | return Status('down', None, t)
35 | else:
36 | raise Exception('Unkown service status, service=%s, status=%s', service_name, out)
37 |
38 |
39 | def run_cmd(cmd, user=None):
40 | if user:
41 | cmd = 'sudo -u %s %s' % (user, cmd)
42 | print cmd
43 | return check_output(cmd, shell=True)
44 |
45 |
46 | def main():
47 | import argparse
48 | parser = argparse.ArgumentParser()
49 | parser.add_argument(
50 | '-s', '--storage-dir',
51 | help='Kenshine storage directory.'
52 | )
53 | parser.add_argument(
54 | '-b', '--begin-bucket',
55 | type=int,
56 | help='Begin bucket number.'
57 | )
58 | parser.add_argument(
59 | '-e', '--end-bucket',
60 | type=int,
61 | help='End bucket number.'
62 | )
63 | parser.add_argument(
64 | '--skip-rebuild-link',
65 | action='store_true',
66 | help='Skip rebuild link.'
67 | )
68 | args = parser.parse_args()
69 |
70 | storage_dir = args.storage_dir
71 | begin = args.begin_bucket
72 | end = args.end_bucket
73 |
74 | for i in range(begin, end + 1):
75 | if i != begin:
76 | time.sleep(10)
77 |
78 | bucket = str(i)
79 | data_dir = join(storage_dir, 'data', bucket)
80 | data_idx = join(storage_dir, 'data', bucket + '.idx')
81 | link_dir = join(storage_dir, 'link', bucket)
82 | service = '/service/rurouni-cache-%s' % bucket
83 |
84 | run_cmd('svc -d %s' % service)
85 | while get_service_status(service).status != 'down':
86 | print 'wating for service down'
87 | time.sleep(5)
88 |
89 | run_cmd('rm %s' % data_idx)
90 | run_cmd('kenshin-rebuild-index.py %s %s' % (data_dir, data_idx),
91 | 'graphite')
92 |
93 | if not args.skip_rebuild_link:
94 | run_cmd('rm -r %s/*' % link_dir)
95 | run_cmd('kenshin-rebuild-link.py %s %s' % (data_dir, link_dir),
96 | 'graphite')
97 |
98 | run_cmd('svc -u %s' % service)
99 | while get_service_status(service).status != 'up':
100 | print 'wating for service up'
101 | time.sleep(5)
102 |
103 |
104 | if __name__ == '__main__':
105 | main()
106 |
--------------------------------------------------------------------------------
/bin/kenshin-rehash.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | import os
4 | import sys
5 | import time
6 | import urllib
7 | import struct
8 | import StringIO
9 | from multiprocessing import Process, Queue
10 |
11 | from kenshin import storage
12 | from kenshin.agg import Agg
13 | from kenshin.storage import Storage
14 | from kenshin.consts import NULL_VALUE
15 | from rurouni.utils import get_instance_of_metric
16 | from kenshin.tools.whisper_tool import (
17 | read_header as whisper_read_header,
18 | pointFormat as whisperPointFormat,
19 | )
20 |
21 |
22 | def main():
23 | import argparse
24 | parser = argparse.ArgumentParser()
25 | parser.add_argument(
26 | '-t', '--src-type', required=True,
27 | choices=['whisper', 'kenshin'],
28 | help="src storage type"
29 | )
30 | parser.add_argument(
31 | '-d', '--src-data-dir', required=True,
32 | help="src data directory (http address)."
33 | )
34 | parser.add_argument(
35 | '-n', '--src-instance-num', type=int,
36 | help=('src rurouni cache instance number (required when src_type '
37 | 'is kenshin)')
38 | )
39 | parser.add_argument(
40 | '-m', '--kenshin-file', required=True,
41 | help=('kenshin data files that we want to add the history, '
42 | 'use kenshin-find-file.py to generate this file.')
43 | )
44 | parser.add_argument(
45 | '-p', '--processes', default=10, type=int,
46 | help="number of processes."
47 | )
48 | args = parser.parse_args()
49 |
50 | if args.src_type == 'kenshin' and args.src_instance_num is None:
51 | parser.error('src-instance-num is required')
52 |
53 | # start processes
54 | processes = []
55 | queue = Queue()
56 | for w in xrange(args.processes):
57 | p = Process(target=worker, args=(queue,))
58 | p.start()
59 | processes.append(p)
60 |
61 | # generate data
62 | with open(args.kenshin_file) as f:
63 | for line in f:
64 | kenshin_filepath = line.strip()
65 | if not kenshin_filepath:
66 | continue
67 | with open(kenshin_filepath) as f:
68 | header = Storage.header(f)
69 | metrics = header['tag_list']
70 | if args.src_type == 'kenshin':
71 | metric_paths = [
72 | metric_to_filepath(args.src_data_dir, m, args.src_instance_num)
73 | for m in metrics
74 | ]
75 | else: # whisper
76 | metric_paths = [
77 | metric_to_whisper_filepath(args.src_data_dir, m)
78 | for m in metrics
79 | ]
80 | item = (args.src_type, header, metric_paths, metrics, kenshin_filepath)
81 | queue.put(item)
82 |
83 | # stop processes
84 | for _ in xrange(args.processes):
85 | queue.put("STOP")
86 | for p in processes:
87 | p.join()
88 |
89 |
90 | def worker(queue):
91 | for (src_type, meta, metric_paths, metrics, dst_file) in iter(queue.get, 'STOP'):
92 | try:
93 | tmp_file = dst_file + '.tmp'
94 | merge_metrics(src_type, meta, metric_paths, metrics, tmp_file)
95 | os.rename(tmp_file, dst_file)
96 | except Exception as e:
97 | print >>sys.stderr, '[merge error] %s: %s' % (dst_file, e)
98 | if os.path.exists(tmp_file):
99 | os.remove(tmp_file)
100 | return True
101 |
102 |
103 | def merge_metrics(src_type, meta, metric_paths, metric_names, output_file):
104 | ''' Merge metrics to a kenshin file.
105 | '''
106 | # Get content(data points grouped by archive) of each metric.
107 | if src_type == 'kenshin':
108 | metrics_archives_points = [
109 | get_metric_content(path, metric)
110 | for (path, metric) in zip(metric_paths, metric_names)
111 | ]
112 | else: # whipser
113 | metrics_archives_points = [
114 | get_whisper_metric_content(path)
115 | for path in metric_paths
116 | ]
117 |
118 | # Merge metrics to a kenshin file
119 | with open(output_file, 'wb') as f:
120 | archives = meta['archive_list']
121 | archive_info = [(archive['sec_per_point'], archive['count'])
122 | for archive in archives]
123 | inter_tag_list = metric_names + [''] # for reserved space
124 |
125 | # header
126 | packed_kenshin_header = Storage.pack_header(
127 | inter_tag_list,
128 | archive_info,
129 | meta['x_files_factor'],
130 | Agg.get_agg_name(meta['agg_id']),
131 | )[0]
132 | f.write(packed_kenshin_header)
133 |
134 | for i, archive in enumerate(archives):
135 | archive_points = [x[i] for x in metrics_archives_points]
136 | merged_points = merge_points(archive_points)
137 | points = fill_gap(merged_points, archive, len(meta['tag_list']))
138 | packed_str = packed_kenshin_points(points)
139 | f.write(packed_str)
140 |
141 |
142 | def metric_to_filepath(data_dir, metric, instance_num):
143 | if metric.startswith('rurouni.'):
144 | instance = metric.split('.')[2]
145 | else:
146 | instance = str(get_instance_of_metric(metric, instance_num))
147 | return os.path.sep.join([data_dir, instance] + metric.split('.')) + '.hs'
148 |
149 |
150 | def metric_to_whisper_filepath(data_dir, metric):
151 | return os.path.sep.join([data_dir] + metric.split('.')) + '.wsp'
152 |
153 |
154 | def merge_points(metrics_archive_points):
155 | ''' Merge metrics' archive points to kenshin points.
156 |
157 | >>> whisper_points = [
158 | ... [[1421830133, 0], [1421830134, 1], [1421830135, 2]],
159 | ... [[1421830134, 4], [1421830135, 5]],
160 | ... [[1421830133, 6], [1421830134, 7], [1421830135, 8]]
161 | ... ]
162 | >>> merge_points(whisper_points)
163 | [(1421830133, [0, -4294967296.0, 6]), (1421830134, [1, 4, 7]), (1421830135, [2, 5, 8])]
164 | '''
165 | length = len(metrics_archive_points)
166 | d = {}
167 | for i, points in enumerate(metrics_archive_points):
168 | for t, v in points:
169 | if not t:
170 | continue
171 | if t not in d:
172 | d[t] = [NULL_VALUE] * length
173 | d[t][i] = v
174 | return sorted(d.items())
175 |
176 |
177 | def fill_gap(archive_points, archive, metric_num):
178 | EMPTY_POINT = (0, (0,) * metric_num)
179 | if not archive_points:
180 | return [EMPTY_POINT] * archive['count']
181 | step = archive['sec_per_point']
182 | rs = [archive_points[0]]
183 | prev_ts = archive_points[0][0]
184 | for ts, point in archive_points[1:]:
185 | if prev_ts + step == ts:
186 | rs.append((ts, point))
187 | else:
188 | rs.extend([EMPTY_POINT] * ((ts-prev_ts) / step))
189 | prev_ts = ts
190 | if len(rs) < archive['count']:
191 | rs.extend([EMPTY_POINT] * (archive['count'] - len(rs)))
192 | else:
193 | rs = rs[:archive['count']]
194 | return rs
195 |
196 |
197 | def packed_kenshin_points(points):
198 | point_format = storage.POINT_FORMAT % len(points[0][1])
199 | str_format = point_format[0] + point_format[1:] * len(points)
200 | return struct.pack(str_format, *flatten(points))
201 |
202 |
203 | def flatten(iterable):
204 | """ Recursively iterate lists and tuples.
205 |
206 | >>> list(flatten([1, (2, 3, [4]), 5]))
207 | [1, 2, 3, 4, 5]
208 | """
209 | for elm in iterable:
210 | if isinstance(elm, (list, tuple)):
211 | for relm in flatten(elm):
212 | yield relm
213 | else:
214 | yield elm
215 |
216 |
217 | def get_metric_content(metric_path, metric_name):
218 | ''' Return data points of each archive of the metric.
219 | '''
220 | conn = urllib.urlopen(metric_path)
221 | if conn.code == 200:
222 | content = conn.read()
223 | else:
224 | raise Exception('HTTP Error Code %s for %s' % (conn.code, metric_path))
225 |
226 | header = Storage.header(StringIO.StringIO(content))
227 | metric_list = header['tag_list']
228 | metric_cnt = len(metric_list)
229 | metric_idx = metric_list.index(metric_name)
230 | step = metric_cnt + 1
231 | point_format = header['point_format']
232 | byte_order, point_type = point_format[0], point_format[1:]
233 | metric_content = []
234 | now = int(time.time())
235 |
236 | for archive in header['archive_list']:
237 | ts_min = now - archive['retention']
238 | archive_points = []
239 | series_format = byte_order + (point_type * archive['count'])
240 | packed_str = content[archive['offset']: archive['offset'] + archive['size']]
241 | unpacked_series = struct.unpack(series_format, packed_str)
242 | for i in xrange(0, len(unpacked_series), step):
243 | ts = unpacked_series[i]
244 | if ts > ts_min:
245 | # (timestamp, value)
246 | datapoint = (ts, unpacked_series[i+1+metric_idx])
247 | archive_points.append(datapoint)
248 | metric_content.append(archive_points)
249 |
250 | return metric_content
251 |
252 |
253 | def get_whisper_metric_content(metric_path):
254 | conn = urllib.urlopen(metric_path)
255 | if conn.code == 200:
256 | content = conn.read()
257 | else:
258 | raise Exception('HTTP Error Code %s for %s' % (conn.code, metric_path))
259 |
260 | header = whisper_read_header(StringIO.StringIO(content))
261 | byte_order, point_type = whisperPointFormat[0], whisperPointFormat[1:]
262 | metric_content = []
263 | now = int(time.time())
264 | step = 2
265 |
266 | for archive in header['archives']:
267 | ts_min = now - archive['retention']
268 | archive_points = []
269 | series_format = byte_order + (point_type * archive['count'])
270 | packed_str = content[archive['offset']: archive['offset'] + archive['size']]
271 | unpacked_series = struct.unpack(series_format, packed_str)
272 | for i in xrange(0, len(unpacked_series), step):
273 | ts = unpacked_series[i]
274 | if ts > ts_min:
275 | datapoint = (ts, unpacked_series[i+1])
276 | archive_points.append(datapoint)
277 | metric_content.append(archive_points)
278 |
279 | return metric_content
280 |
281 |
282 | if __name__ == '__main__':
283 | main()
284 |
--------------------------------------------------------------------------------
/bin/kenshin-restart.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | '''
4 | 目前 kenshin 不支持动态加载配置,在配置变更时需要重启,此脚本用于重启所有 kenshin 实例。
5 |
6 | $ sudo kenshin-restart.py
7 | '''
8 |
9 | import re
10 | import time
11 | import glob
12 | from subprocess import check_output
13 |
14 |
15 | class Status(object):
16 | def __init__(self, status, pid, time):
17 | self.status = status
18 | self.pid = pid
19 | self.time = int(time)
20 |
21 | def __str__(self):
22 | return '' % (self.status, self.pid, self.time)
23 |
24 |
25 | def find_cache_services(start_num):
26 | def get_instance_num(service_path):
27 | return int(service_path.rsplit('-', 1)[1])
28 |
29 | services = glob.glob('/service/rurouni-cache-*')
30 | services = [x for x in services if get_instance_num(x) >= start_num]
31 | return sorted(services, key=get_instance_num)
32 |
33 |
34 | def get_service_status(service_name):
35 | """Return Status(status, pid, time).
36 | e.g. Status('up', 1024, 12342), Status('down', None, 2)
37 | """
38 | cmd = ['svstat', service_name]
39 | out = check_output(cmd)
40 |
41 | down_pattern = r'down (\d+) seconds, normally up'
42 | up_pattern = r'up \(pid (\d+)\) (\d+) seconds'
43 |
44 | if re.search(up_pattern, out):
45 | pid, t = re.search(up_pattern, out).groups()
46 | return Status('up', pid, t)
47 | elif re.search(down_pattern, out):
48 | (t,) = re.search(down_pattern, out).groups()
49 | return Status('down', None, t)
50 | else:
51 | raise Exception('Unkown service status, service=%s, status=%s', service_name, out)
52 |
53 |
54 | def svc(service_name, arg):
55 | cmd = ['svc', arg, service_name]
56 | return check_output(cmd)
57 |
58 |
59 | def svc_restart(service_name):
60 | return svc(service_name, '-t')
61 |
62 |
63 | def restart_service(service_name):
64 | old = get_service_status(service_name)
65 | assert old.status == 'up'
66 | svc_restart(service_name)
67 | i = 0
68 | while True:
69 | time.sleep(2)
70 | new = get_service_status(service_name)
71 | print i, new
72 | if new.status == 'up':
73 | # 重启成功需要满足下面两个条件:
74 | # 1. pid 已经发生变化
75 | # 2. 新的服务已经可用
76 | #
77 | # 关于第 2 点,由于 kenshin 没有对外的接口可以查到内部状态,
78 | # 所以目前是靠时间来估计服务状态。未来计划把 pickle 格式端口去掉,
79 | # 用 pickle 格式端口作为内部状态查询的接口,也可以通过该接口
80 | # 实现动态加载配置文件,到时这个脚本的就可以下岗了。
81 | # PS: 时间的单位是"秒"
82 | if new.pid != old.pid and new.time > 10:
83 | break
84 | i += 1
85 |
86 | def main():
87 | import argparse
88 | parser = argparse.ArgumentParser()
89 | parser.add_argument("-t", "--time-interval", default=60, type=int,
90 | help="time interval between two restarting operations.")
91 | parser.add_argument("-s", "--start-num", default=0, type=int,
92 | help="start instance number")
93 | args = parser.parse_args()
94 | services = find_cache_services(args.start_num)
95 | for s in services:
96 | print 'restarting %s' % s
97 | print get_service_status(s)
98 | restart_service(s)
99 | print get_service_status(s)
100 | print
101 | time.sleep(args.time_interval)
102 |
103 |
104 | if __name__ == '__main__':
105 | main()
106 |
--------------------------------------------------------------------------------
/bin/kenshin-send-zero-metric.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | """
4 | 为了在迁移数据时减少不必要的指标(metric),目前只迁移一周内发生过的指标(即至少有一点)。
5 | 但是一些类似 5XX 这样的 web error 相关的指标,可能会有一段时间没有发生(例如,一周),
6 | 所以新的节点上没有这些指标,但是 SA 那边希望保留这些指标。那么该脚本的作用就是给 Graphite
7 | 发送这些指标,每个指标发送一个 val(默认为0).
8 |
9 | # 指标的获取
10 |
11 | $ kenshin-get-metrics.py -d /data/kenshin/storage/data/ -f error_code.re | awk '{print $5}'
12 | $ cat error_code.re
13 | .*\.code\.\d+\.rate$
14 |
15 | # 使用方法
16 |
17 | $ kenshin-send-zero-metric.py -a : -m error_metric.src -b error_metric.dst
18 | """
19 |
20 | import sys
21 | import socket
22 | import time
23 | import argparse
24 |
25 |
26 | def run(sock, interval, metrics):
27 | now = int(time.time())
28 | for m in metrics:
29 | line = '%s %s %d\n' % (m, 0, now)
30 | print line,
31 | sock.sendall(line)
32 | time.sleep(interval)
33 |
34 |
35 | def get_metrics(filename):
36 | metrics = []
37 | with open(filename) as f:
38 | for line in f:
39 | line = line.strip()
40 | if line:
41 | metrics.append(line)
42 | return set(metrics)
43 |
44 |
45 | def parse_addr(addr):
46 | try:
47 | host, port = addr.split(":")
48 | return host, int(port)
49 | except ValueError:
50 | msg = "%r is not a valid addr" % addr
51 | raise argparse.ArgumentTypeError(msg)
52 |
53 |
54 | def main():
55 | parser = argparse.ArgumentParser()
56 | parser.add_argument('-a', '--addr', required=True, type=parse_addr,
57 | help="addr of carbon relay, it is format host:port")
58 | parser.add_argument('-i', '--interval', default=0.01, type=float,
59 | help="time interval between two send.")
60 | parser.add_argument('-m', '--metric-file', required=True,
61 | help="file that contains metric name to send.")
62 | parser.add_argument('-b', '--black-list-file', default=None,
63 | help="file that contains black list of metrics.")
64 |
65 | args = parser.parse_args()
66 | host, port = args.addr
67 |
68 | sock = socket.socket()
69 | try:
70 | sock.connect((host, port))
71 | except socket.error:
72 | raise SystemError("Couldn't connect to %s on port %d" %
73 | (host, port))
74 | metrics = get_metrics(args.metric_file)
75 | if args.black_list_file:
76 | metrics -= get_metrics(args.black_list_file)
77 | try:
78 | run(sock, args.interval, metrics)
79 | except KeyboardInterrupt:
80 | sys.stderr.write("\nexiting on CTRL+c\n")
81 | sys.exit(0)
82 |
83 |
84 | if __name__ == '__main__':
85 | main()
86 |
--------------------------------------------------------------------------------
/bin/rurouni-cache.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import os.path
5 |
6 | BIN_DIR = os.path.dirname(os.path.abspath(__file__))
7 | ROOT_DIR = os.path.dirname(BIN_DIR)
8 |
9 | from rurouni.utils import run_twistd_plugin
10 | from rurouni.exceptions import RurouniException
11 |
12 | try:
13 | run_twistd_plugin(__file__)
14 | except RurouniException as e:
15 | raise SystemError(e)
16 |
--------------------------------------------------------------------------------
/conf/rurouni.conf.example:
--------------------------------------------------------------------------------
1 | [cache]
2 | # Configure rurouni-cache directories.
3 |
4 | CONF_DIR = /data/kenshin/conf
5 | LOCAL_DATA_DIR = /data/kenshin/storage/data
6 | LOCAL_LINK_DIR = /data/kenshin/storage/link
7 | LOG_DIR = /data/kenshin/storage/log
8 | PID_DIR = /data/kenshin/storage/run
9 |
10 | LINE_RECEIVER_INTERFACE = 0.0.0.0
11 | PICKLE_RECEIVER_INTERFACE = 0.0.0.0
12 | CACHE_QUERY_INTERFACE = 0.0.0.0
13 |
14 | LOG_UPDATES = True
15 | MAX_CREATES_PER_MINUTE = 1000
16 | NUM_ALL_INSTANCE = 2
17 |
18 | DEFAULT_WAIT_TIME = 1
19 |
20 |
21 | [cache:0]
22 | LINE_RECEIVER_PORT = 2003
23 | PICKLE_RECEIVER_PORT = 2004
24 | CACHE_QUERY_PORT = 7002
25 |
26 | [cache:1]
27 | LINE_RECEIVER_PORT = 2013
28 | PICKLE_RECEIVER_PORT = 2014
29 | CACHE_QUERY_PORT = 7012
30 |
--------------------------------------------------------------------------------
/conf/storage-schemas.conf.example:
--------------------------------------------------------------------------------
1 | # Schema definitions for kenshin files. Entries are scanned in order,
2 | # and first match wins. This file is scanned for changes every 60 seconds.
3 | #
4 | # Definition Syntax:
5 | #
6 | # [name]
7 | # pattern = regex
8 | # xFilesFactor = xff
9 | # aggregationMethod = agg
10 | # retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ...
11 | # cacheRetention = seconds
12 | # metricsPerFile = num
13 | #
14 | # Remember: To support accurate aggregation from higher to lower resolution
15 | # archives, the precision of a longer retention archive must be
16 | # cleanly divisible by precision of next lower retention archive.
17 | #
18 | # Valid: 60s:7d,300s:30d (300/60 = 5)
19 | # Invalid: 180s:7d,300s:30d (300/180 = 3.333)
20 | #
21 |
22 | [metric-test]
23 | pattern = .*metric_test.*
24 | xFilesFactor = 1
25 | aggregationMethod = average
26 | retentions = 1s:1h,6s:2h
27 | cacheRetention = 10s
28 | metricsPerFile = 8
29 |
30 | [rurouni-stats]
31 | pattern = ^rurouni\.
32 | xFilesFactor = 20.0
33 | aggregationMethod = average
34 | retentions = 60s:2d,300s:7d,15m:25w,12h:5y
35 | cacheRetention = 600s
36 | metricsPerFile = 8
37 |
38 | [default]
39 | pattern = .*
40 | retentions = 10s:12h,60s:2d,300s:7d,15m:25w,12h:5y
41 | xFilesFactor = 20
42 | aggregationMethod = average
43 | metricsPerFile = 8
44 | cacheRetention = 620s
45 |
--------------------------------------------------------------------------------
/examples/metric_stresser.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import time
3 | import random
4 | import socket
5 | import struct
6 | import cPickle as pickle
7 | from multiprocessing import Process
8 |
9 |
10 | def main():
11 | import argparse
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument("-a", "--address", type=str, help="host:port pair.")
14 | parser.add_argument("-f", "--format", type=str, choices=["line", "pickle"], help="Format of data.")
15 | parser.add_argument("-p", "--process", type=int, default=1, help="Number of processes.")
16 | parser.add_argument("-m", "--metric", type=int, default=1000, help="Number of metrics for one process.")
17 | parser.add_argument("-i", "--interval", type=int, default=10, help="Publish time interval.")
18 | parser.add_argument("-d", "--debug", action='store_true', help="Debug mode, send the metrics to terminal.")
19 | args = parser.parse_args()
20 |
21 | stresser(args)
22 |
23 |
24 | def stresser(args):
25 | host, port = args.address.split(":")
26 | port = int(port)
27 | metric_args = (host, port, args.format, args.metric, args.interval, args.debug)
28 |
29 | processes = []
30 | for i in xrange(args.process):
31 | pname = 'process_%s' % i
32 | p = Process(target=send_metrics, args=(pname,) + metric_args)
33 | p.start()
34 | processes.append(p)
35 |
36 | try:
37 | for p in processes:
38 | p.join()
39 | except KeyboardInterrupt:
40 | for p in processes:
41 | p.terminate()
42 | print 'KeyboardInterrupt'
43 |
44 |
45 | def send_metrics(pname, host, port, format, num_metrics, interval, debug):
46 | time.sleep(random.random() * interval)
47 | sock = socket.socket()
48 | try:
49 | sock.connect((host, port))
50 | except socket.error:
51 | if not debug:
52 | raise SystemError("Couldn't connect to %s on port %s" %
53 | (host, port))
54 | metrics = list(gen_metrics(pname, num_metrics))
55 | while True:
56 | points = gen_metric_points(metrics, format)
57 | if debug:
58 | print '\n'.join(map(str, points))
59 | else:
60 | if format == 'line':
61 | msg = '\n'.join(points) + '\n' # all lines end in a newline
62 | sock.sendall(msg)
63 | else:
64 | # pickle
65 | package = pickle.dumps(points, 1)
66 | size = struct.pack('!L', len(package))
67 | sock.sendall(size)
68 | sock.sendall(package)
69 | time.sleep(interval)
70 |
71 |
72 | def gen_metrics(id_, num_metrics):
73 | METRIC_PATTERN = 'metric_stresser.{0}.metric_id.%s'.format(id_)
74 | for i in xrange(num_metrics):
75 | yield METRIC_PATTERN % str(i)
76 |
77 |
78 | def gen_metric_points(metrics, format):
79 | base_val = random.random()
80 | now = int(time.time())
81 | points = []
82 | for i, m in enumerate(metrics):
83 | val = base_val + i
84 | if format == 'line':
85 | points.append("%s %s %s" % (m, val, now))
86 | else:
87 | points.append((m, (now, val)))
88 | return points
89 |
90 |
91 | if __name__ == '__main__':
92 | main()
93 |
--------------------------------------------------------------------------------
/examples/rurouni-client.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import re
4 | import sys
5 | import socket
6 | import time
7 | import subprocess
8 |
9 | RUROUNI_SERVER = '127.0.0.1'
10 | RUROUNI_PORT = 2003
11 | DELAY = 60
12 |
13 | idx = 0
14 |
15 | def get_loadavg():
16 | cmd = 'uptime'
17 | output = subprocess.check_output(cmd, shell=True).strip()
18 | output = re.split("\s+", output)
19 | # return output[-3:]
20 | # 发送伪造数据,容易肉眼验证处理结果是否正确
21 | global idx
22 | idx += 1
23 | return idx, 100+idx, 200+idx
24 |
25 |
26 | def run(sock, delay):
27 | while True:
28 | now = int(time.time())
29 | loadavg = get_loadavg()
30 |
31 | lines = []
32 | idx2min = [1, 5, 15]
33 | for i, val in enumerate(loadavg):
34 | line = "test.system.loadavg.min_%s %s %d" % (idx2min[i], val, now)
35 | lines.append(line)
36 | msg = '\n'.join(lines) + '\n' # all lines must end in a newline
37 | print 'sending message'
38 | print '-' * 80
39 | print msg
40 | sock.sendall(msg)
41 | time.sleep(delay)
42 |
43 |
44 | def main():
45 | if len(sys.argv) > 1:
46 | delay = int(sys.argv[1])
47 | else:
48 | delay = DELAY
49 |
50 | sock = socket.socket()
51 | try:
52 | sock.connect((RUROUNI_SERVER, RUROUNI_PORT))
53 | except socket.error:
54 | raise SystemError("Couldn't connect to %s on port %s" %
55 | (RUROUNI_SERVER, RUROUNI_PORT))
56 |
57 | try:
58 | run(sock, delay)
59 | except KeyboardInterrupt:
60 | sys.stderr.write("\nexiting on CTRL+c\n")
61 | sys.exit(0)
62 |
63 |
64 | if __name__ == '__main__':
65 | main()
66 |
--------------------------------------------------------------------------------
/examples/rurouni-pickle-client.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import re
4 | import sys
5 | import socket
6 | import time
7 | import subprocess
8 | import pickle
9 | import struct
10 |
11 | RUROUNI_SERVER = '127.0.0.1'
12 | RUROUNI_PORT = 2004
13 | DELAY = 60
14 |
15 | idx = 0
16 |
17 |
18 | def get_loadavg():
19 | cmd = 'uptime'
20 | output = subprocess.check_output(cmd, shell=True).strip()
21 | output = re.split("\s+", output)
22 | # return output[-3:]
23 | # 发送伪造数据,容易肉眼验证处理结果是否正确
24 | global idx
25 | idx += 1
26 | return idx, 100+idx, 200+idx
27 |
28 |
29 | def run(sock, delay):
30 | while True:
31 | now = int(time.time())
32 | loadavg = get_loadavg()
33 |
34 | lines = [] # for print info
35 | tuples = []
36 | idx2min = [1, 5, 15]
37 | for i, val in enumerate(loadavg):
38 | line = "system.loadavg.min_%s.metric_test %s %d" % (idx2min[i], val, now)
39 | lines.append(line)
40 | tuples.append(('system.loadavg.min_%s.metric_test' % idx2min[i], (now, val)))
41 | msg = '\n'.join(lines) + '\n' # all lines must end in a newline
42 | print 'sending message'
43 | print '-' * 80
44 | print msg
45 | package = pickle.dumps(tuples, 1)
46 | size = struct.pack('!L', len(package))
47 | sock.sendall(size)
48 | sock.sendall(package)
49 | time.sleep(delay)
50 |
51 |
52 | def main():
53 | if len(sys.argv) > 1:
54 | delay = int(sys.argv[1])
55 | else:
56 | delay = DELAY
57 |
58 | sock = socket.socket()
59 | try:
60 | sock.connect((RUROUNI_SERVER, RUROUNI_PORT))
61 | except socket.error:
62 | raise SystemError("Couldn't connect to %s on port %s" %
63 | (RUROUNI_SERVER, RUROUNI_PORT))
64 |
65 | try:
66 | run(sock, delay)
67 | except KeyboardInterrupt:
68 | sys.stderr.write("\nexiting on CTRL+c\n")
69 | sys.exit(0)
70 |
71 |
72 | if __name__ == '__main__':
73 | main()
74 |
--------------------------------------------------------------------------------
/img/docs/cluster-before.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/docs/cluster-before.png
--------------------------------------------------------------------------------
/img/docs/cluster-now.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/docs/cluster-now.png
--------------------------------------------------------------------------------
/img/docs/kenshin-structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/docs/kenshin-structure.png
--------------------------------------------------------------------------------
/img/docs/query_perf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/docs/query_perf.png
--------------------------------------------------------------------------------
/img/kenshin-perf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/kenshin-perf.png
--------------------------------------------------------------------------------
/img/kenshin.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/kenshin.gif
--------------------------------------------------------------------------------
/kenshin/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from kenshin.storage import (
4 | Storage, KenshinException, InvalidConfig, InvalidTime,
5 | RetentionParser)
6 |
7 | __version__ = "0.3.1"
8 | __commit__ = "9b67db3"
9 | __author__ = "zzl0"
10 | __email__ = "zhuzhaolong0@gmail.com"
11 | __date__ = "Sun Dec 18 16:09:53 2016 +0800"
12 |
13 |
14 | _storage = Storage()
15 | validate_archive_list = _storage.validate_archive_list
16 | create = _storage.create
17 | update = _storage.update
18 | fetch = _storage.fetch
19 | header = _storage.header
20 | pack_header = _storage.pack_header
21 | add_tag = _storage.add_tag
22 |
23 | parse_retention_def = RetentionParser.parse_retention_def
24 |
--------------------------------------------------------------------------------
/kenshin/agg.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | #
3 | # This module implements various aggregation method.
4 | #
5 |
6 | import operator
7 |
8 |
9 | class Agg(object):
10 | agg_funcs = [
11 | ['average', lambda x: sum(x) / len(x)],
12 | ['sum', sum],
13 | ['last', operator.itemgetter(-1)],
14 | ['max', max],
15 | ['min', min],
16 | ]
17 |
18 | agg_type_list = [typ for typ, _ in agg_funcs]
19 | agg_func_dict = dict(agg_funcs)
20 |
21 | @classmethod
22 | def get_agg_id(cls, agg_name):
23 | return cls.agg_type_list.index(agg_name)
24 |
25 | @classmethod
26 | def get_agg_func(cls, agg_id):
27 | agg_type = cls.agg_type_list[agg_id]
28 | return cls.agg_func_dict[agg_type]
29 |
30 | @classmethod
31 | def get_agg_type_list(cls):
32 | return cls.agg_type_list
33 |
34 | @classmethod
35 | def get_agg_name(cls, agg_id):
36 | return cls.agg_type_list[agg_id]
37 |
--------------------------------------------------------------------------------
/kenshin/consts.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 |
4 | NULL_VALUE = -4294967296.0
5 | DEFAULT_TAG_LENGTH = 96
6 | CHUNK_SIZE = 16384
--------------------------------------------------------------------------------
/kenshin/storage.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | """
4 | Copyright 2015-Present Douban Inc.
5 | Copyright 2009-2014 The Graphite Development Team
6 | Copyright 2008 Orbitz WorldWide
7 |
8 | Licensed under the Apache License, Version 2.0 (the "License");
9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 |
12 | http://www.apache.org/licenses/LICENSE-2.0
13 |
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | """
20 |
21 | #
22 | # This module is an implementation of storage API. Here is the
23 | # basic layout of fileformat.
24 | #
25 | # File = Header, Data
26 | # Header = Metadata, Tag+, ArchiveInfo+
27 | # Metadata = agg_id, max_retention, x_files_factor, archive_count, tag_size, point_size
28 | # Tag = metric
29 | # ArchiveInfo = offset, seconds_per_point, point_count
30 | # Data = Archive+
31 | # Archive = Point+
32 | # Point = timestamp, value
33 | #
34 |
35 | import os
36 | import re
37 | import time
38 | import numpy as np
39 | import math
40 | import struct
41 | import operator
42 | import inspect
43 |
44 | from kenshin.agg import Agg
45 | from kenshin.utils import mkdir_p, roundup
46 | from kenshin.consts import DEFAULT_TAG_LENGTH, NULL_VALUE, CHUNK_SIZE
47 |
48 |
49 | LONG_FORMAT = "!L"
50 | LONG_SIZE = struct.calcsize(LONG_FORMAT)
51 | FLOAT_FORMAT = "!f"
52 | FLOAT_SIZE = struct.calcsize(FLOAT_FORMAT)
53 | VALUE_FORMAT = "!d"
54 | VALUE_SIZE = struct.calcsize(VALUE_FORMAT)
55 | POINT_FORMAT = "!L%dd"
56 | METADATA_FORMAT = "!2Lf3L"
57 | METADATA_SIZE = struct.calcsize(METADATA_FORMAT)
58 | ARCHIVEINFO_FORMAT = "!3L"
59 | ARCHIVEINFO_SIZE = struct.calcsize(ARCHIVEINFO_FORMAT)
60 |
61 | # reserved tag index for reserved space,
62 | # this is usefull when adding a tag to a file.
63 | RESERVED_INDEX = -1
64 |
65 |
66 | ### Exceptions
67 |
68 | class KenshinException(Exception):
69 | pass
70 |
71 |
72 | class InvalidTime(KenshinException):
73 | pass
74 |
75 |
76 | class InvalidConfig(KenshinException):
77 | pass
78 |
79 |
80 | ### debug tool
81 |
82 | debug = lambda *a, **kw: None
83 |
84 |
85 | def enable_debug(ignore_header=False):
86 | """
87 | 监控读写操作.
88 |
89 | 由于 header 函数在一次写入中被调用了多次,而 header 数据较小,完全可以读取缓存数据,
90 | 因此 enable_debug 中使用 ignore_header 来忽略了 header 的读操作,从而方便 io
91 | 性能的测试.
92 | """
93 | global open, debug
94 |
95 | if not ignore_header:
96 | def debug(msg):
97 | print "DEBUG :: %s" % msg
98 |
99 | class open(file):
100 | write_cnt = 0
101 | read_cnt = 0
102 |
103 | def __init__(self, *args, **kwargs):
104 | caller = self.get_caller()
105 | debug("=========== open in %s ===========" % caller)
106 | file.__init__(self, *args, **kwargs)
107 |
108 | def write(self, data):
109 | caller = self.get_caller()
110 | open.write_cnt += 1
111 | debug("Write %d bytes #%d in %s" % (len(data),
112 | self.write_cnt, caller))
113 | return file.write(self, data)
114 |
115 | def read(self, bytes):
116 | caller = self.get_caller()
117 | if ignore_header and caller == "header":
118 | pass
119 | else:
120 | open.read_cnt += 1
121 | debug("Read %d bytes #%d in %s" % (bytes, self.read_cnt, caller))
122 | return file.read(self, bytes)
123 |
124 | def get_caller(self):
125 | return inspect.stack()[2][3]
126 |
127 |
128 | ### retention parser
129 |
130 | class RetentionParser(object):
131 | TIME_UNIT = {
132 | 'seconds': 1,
133 | 'minutes': 60,
134 | 'hours': 3600,
135 | 'days': 86400,
136 | 'weeks': 86400 * 7,
137 | 'years': 86400 * 365,
138 | }
139 | # time pattern (e.g. 60s, 12h)
140 | pat = re.compile(r'^(\d+)([a-z]+)$')
141 |
142 | @classmethod
143 | def get_time_unit_name(cls, s):
144 | for k in cls.TIME_UNIT.keys():
145 | if k.startswith(s):
146 | return k
147 | raise InvalidTime("Invalid time unit: '%s'" % s)
148 |
149 | @classmethod
150 | def get_seconds(cls, time_unit):
151 | return cls.TIME_UNIT[cls.get_time_unit_name(time_unit)]
152 |
153 | @classmethod
154 | def parse_time_str(cls, s):
155 | """
156 | Parse time string to seconds.
157 |
158 | >>> RetentionParser.parse_time_str('12h')
159 | 43200
160 | """
161 | if s.isdigit():
162 | return int(s)
163 |
164 | m = cls.pat.match(s)
165 | if m:
166 | num, unit = m.groups()
167 | return int(num) * cls.get_seconds(unit)
168 | else:
169 | raise InvalidTime("Invalid rention specification '%s'" % s)
170 |
171 | @classmethod
172 | def parse_retention_def(cls, retention_def):
173 | precision, point_cnt = retention_def.strip().split(':')
174 | precision = cls.parse_time_str(precision)
175 |
176 | if point_cnt.isdigit():
177 | point_cnt = int(point_cnt)
178 | else:
179 | point_cnt = cls.parse_time_str(point_cnt) / precision
180 |
181 | return precision, point_cnt
182 |
183 |
184 | ### Storage
185 |
186 | class Storage(object):
187 |
188 | def __init__(self, data_dir=''):
189 | self.data_dir = data_dir
190 |
191 | def create(self, metric_name, tag_list, archive_list, x_files_factor=None,
192 | agg_name=None):
193 | Storage.validate_archive_list(archive_list, x_files_factor)
194 |
195 | path = self.gen_path(self.data_dir, metric_name)
196 | if os.path.exists(path):
197 | raise IOError('file %s already exits.' % path)
198 | else:
199 | mkdir_p(os.path.dirname(path))
200 |
201 | # inter_tag_list[RESERVED_INDEX] is reserved space
202 | # to avoid move data points.
203 | empty_tag_cnt = sum(1 for t in tag_list if not t)
204 | inter_tag_list = tag_list + ['N' * DEFAULT_TAG_LENGTH * empty_tag_cnt]
205 |
206 | with open(path, 'wb') as f:
207 | packed_header, end_offset = self.pack_header(
208 | inter_tag_list, archive_list, x_files_factor, agg_name)
209 | f.write(packed_header)
210 |
211 | # init data
212 | remaining = end_offset - f.tell()
213 | zeroes = '\x00' * CHUNK_SIZE
214 | while remaining > CHUNK_SIZE:
215 | f.write(zeroes)
216 | remaining -= CHUNK_SIZE
217 | f.write(zeroes[:remaining])
218 |
219 | @staticmethod
220 | def validate_archive_list(archive_list, xff):
221 | """
222 | Validates an archive_list.
223 |
224 | An archive_list must:
225 | 1. Have at least one archive config.
226 | 2. No duplicates.
227 | 3. Higher precision archives' precision must evenly divide
228 | all lower precison archives' precision.
229 | 4. Lower precision archives must cover larger time intervals
230 | than higher precision archives.
231 | 5. Each archive must have at least enough points to the next
232 | archive.
233 | """
234 |
235 | # 1
236 | if not archive_list:
237 | raise InvalidConfig("must specify at least one archive config")
238 |
239 | archive_list.sort(key=operator.itemgetter(0))
240 |
241 | for i, archive in enumerate(archive_list):
242 | try:
243 | next_archive = archive_list[i+1]
244 | except:
245 | break
246 | # 2
247 | if not archive[0] < next_archive[0]:
248 | raise InvalidConfig("two same precision config: '%s' and '%s'" %
249 | (archive, next_archive))
250 | # 3
251 | if next_archive[0] % archive[0] != 0:
252 | raise InvalidConfig("higher precision must evenly divide lower "
253 | "precision: %s and %s" %
254 | (archive[0], next_archive[0]))
255 | # 4
256 | retention = archive[0] * archive[1]
257 | next_retention = next_archive[0] * next_archive[1]
258 | if not next_retention > retention:
259 | raise InvalidConfig("lower precision archive must cover "
260 | "larger time intervals that higher "
261 | "precision archive: (%d, %s) and (%d, %s)" %
262 | (i, retention, i+1, next_retention))
263 | # 5
264 | archive_point_cnt = archive[1]
265 | point_per_consolidation = next_archive[0] / archive[0]
266 | if not (archive_point_cnt / xff) >= point_per_consolidation:
267 | raise InvalidConfig("each archive must have at least enough "
268 | "points to consolidate to the next archive: "
269 | "(%d, %s) and (%d, %s) xff=%s" %
270 | (i, retention, i+1, next_retention, xff))
271 |
272 | @staticmethod
273 | def gen_path(data_dir, metric_name):
274 | """
275 | Generate file path of `metric_name`.
276 |
277 | eg, metric_name is `sys.cpu.user`, the absolute file path will be
278 | `self.data_dir/sys/cpu/user.hs`
279 | """
280 | if metric_name[0] == '/':
281 | return metric_name
282 | parts = metric_name.split('.')
283 | parts[-1] = parts[-1] + '.hs'
284 | file_path = os.path.sep.join(parts)
285 | return os.path.join(data_dir, file_path)
286 |
287 | @staticmethod
288 | def pack_header(inter_tag_list, archive_list, x_files_factor, agg_name):
289 | # tag
290 | tag = str('\t'.join(inter_tag_list))
291 |
292 | # metadata
293 | agg_id = Agg.get_agg_id(agg_name)
294 | max_retention = reduce(operator.mul, archive_list[-1], 1)
295 | xff = x_files_factor
296 | archive_cnt = len(archive_list)
297 | tag_size = len(tag)
298 | point_size = struct.calcsize(POINT_FORMAT % (len(inter_tag_list) - 1))
299 | metadata = struct.pack(METADATA_FORMAT, agg_id, max_retention,
300 | xff, archive_cnt, tag_size, point_size)
301 |
302 | # archive_info
303 | header = [metadata, tag]
304 | offset = METADATA_SIZE + len(tag) + ARCHIVEINFO_SIZE * len(archive_list)
305 |
306 | for sec, cnt in archive_list:
307 | archive_info = struct.pack(ARCHIVEINFO_FORMAT, offset, sec, cnt)
308 | header.append(archive_info)
309 | offset += point_size * cnt
310 | return ''.join(header), offset
311 |
312 | @staticmethod
313 | def header(fh):
314 | origin_offset = fh.tell()
315 | if origin_offset != 0:
316 | fh.seek(0)
317 | packed_metadata = fh.read(METADATA_SIZE)
318 | agg_id, max_retention, xff, archive_cnt, tag_size, point_size = struct.unpack(
319 | METADATA_FORMAT, packed_metadata)
320 | inter_tag_list = fh.read(tag_size).split('\t')
321 |
322 | archives = []
323 | for i in xrange(archive_cnt):
324 | packed_archive_info = fh.read(ARCHIVEINFO_SIZE)
325 | offset, sec, cnt = struct.unpack(
326 | ARCHIVEINFO_FORMAT, packed_archive_info)
327 | archive_info = {
328 | 'offset': offset,
329 | 'sec_per_point': sec,
330 | 'count': cnt,
331 | 'size': point_size * cnt,
332 | 'retention': sec * cnt,
333 | }
334 | archives.append(archive_info)
335 |
336 | fh.seek(origin_offset)
337 | tag_list = inter_tag_list[:RESERVED_INDEX]
338 | info = {
339 | 'agg_id': agg_id,
340 | 'max_retention': max_retention,
341 | 'x_files_factor': xff,
342 | 'tag_list': tag_list,
343 | 'reserved_size': len(inter_tag_list[RESERVED_INDEX]),
344 | 'point_size': point_size,
345 | 'point_format': POINT_FORMAT % len(tag_list),
346 | 'archive_list': archives,
347 | }
348 | return info
349 |
350 | @staticmethod
351 | def add_tag(tag, path, pos_idx):
352 | with open(path, 'r+b') as fh:
353 | header_info = Storage.header(fh)
354 | tag_list = header_info['tag_list']
355 | reserved_size = header_info['reserved_size']
356 |
357 | archive_list = [(a['sec_per_point'], a['count'])
358 | for a in header_info['archive_list']]
359 | agg_name = Agg.get_agg_name(header_info['agg_id'])
360 |
361 | if len(tag) <= len(tag_list[pos_idx]) + reserved_size:
362 | diff = len(tag_list[pos_idx]) + reserved_size - len(tag)
363 | tag_list[pos_idx] = tag
364 | inter_tag_list = tag_list + ['N' * diff]
365 | packed_header, _ = Storage.pack_header(
366 | inter_tag_list, archive_list, header_info['x_files_factor'], agg_name)
367 | fh.write(packed_header)
368 | else:
369 | tag_list[pos_idx] = tag
370 | inter_tag_list = tag_list + ['']
371 | packed_header, _ = Storage.pack_header(
372 | inter_tag_list, archive_list, header_info['x_files_factor'], agg_name)
373 | tmpfile = path + '.tmp'
374 | with open(tmpfile, 'wb') as fh_tmp:
375 | fh_tmp.write(packed_header)
376 | fh.seek(header_info['archive_list'][0]['offset'])
377 | while True:
378 | bytes = fh.read(CHUNK_SIZE)
379 | if not bytes:
380 | break
381 | fh_tmp.write(bytes)
382 | os.rename(tmpfile, path)
383 |
384 | def update(self, path, points, now=None, mtime=None):
385 | # order points by timestamp, newest first
386 | points.sort(key=operator.itemgetter(0), reverse=True)
387 | mtime = mtime or int(os.stat(path).st_mtime)
388 | with open(path, 'r+b') as f:
389 | header = self.header(f)
390 | if now is None:
391 | now = int(time.time())
392 | archive_list = header['archive_list']
393 | i = 0
394 | curr_archive = archive_list[i]
395 | curr_points = []
396 |
397 | for point in points:
398 | age = now - point[0]
399 |
400 | while age > curr_archive['retention']:
401 | # we can't fit any more points in archive i
402 | if curr_points:
403 | timestamp_range = (min(mtime, curr_points[-1][0]),
404 | curr_points[0][0])
405 | self._update_archive(f, header, curr_archive,
406 | curr_points, i, timestamp_range)
407 | curr_points = []
408 | try:
409 | curr_archive = archive_list[i+1]
410 | i += 1
411 | except IndexError:
412 | curr_archive = None
413 | break
414 |
415 | if not curr_archive:
416 | # drop remaining points that don't fit in the database
417 | break
418 |
419 | curr_points.append(point)
420 |
421 | if curr_archive and curr_points:
422 | timestamp_range = (min(mtime, curr_points[-1][0]),
423 | curr_points[0][0])
424 | self._update_archive(f, header, curr_archive, curr_points, i,
425 | timestamp_range)
426 |
427 | def _update_archive(self, fh, header, archive, points, archive_idx, timestamp_range):
428 | step = archive['sec_per_point']
429 | aligned_points = sorted((p[0] - (p[0] % step), p[1])
430 | for p in points if p)
431 | if not aligned_points:
432 | return
433 |
434 | # create a packed string for each contiguous sequence of points
435 | point_format = header['point_format']
436 | packed_strings = []
437 | curr_strings = []
438 | previous_ts = None
439 | len_aligned_points = len(aligned_points)
440 | for i in xrange(0, len_aligned_points):
441 | # take last val of duplicates
442 | if (i+1 < len_aligned_points and
443 | aligned_points[i][0] == aligned_points[i+1][0]):
444 | continue
445 | (ts, val) = aligned_points[i]
446 | packed_str = struct.pack(point_format, ts, *val)
447 | if (not previous_ts) or (ts == previous_ts + step):
448 | curr_strings.append(packed_str)
449 | else:
450 | start_ts = previous_ts - (step * (len(curr_strings) - 1))
451 | packed_strings.append((start_ts, ''.join(curr_strings)))
452 | curr_strings = [packed_str]
453 | previous_ts = ts
454 |
455 | if curr_strings:
456 | start_ts = previous_ts - (step * (len(curr_strings) - 1))
457 | packed_strings.append((start_ts, ''.join(curr_strings)))
458 |
459 | # read base point and determine where our writes will start
460 | base_point = self._read_base_point(fh, archive, header)
461 | base_ts = base_point[0]
462 |
463 | first_ts = aligned_points[0][0]
464 | if base_ts == 0:
465 | # this file's first update, so set it to first timestamp
466 | base_ts = first_ts
467 |
468 | # write all of our packed strings in locations
469 | # determined by base_ts
470 | archive_end = archive['offset'] + archive['size']
471 | for (ts, packed_str) in packed_strings:
472 | offset = self._timestamp2offset(ts, base_ts, header, archive)
473 | bytes_beyond = (offset + len(packed_str)) - archive_end
474 | fh.seek(offset)
475 | if bytes_beyond > 0:
476 | fh.write(packed_str[:-bytes_beyond])
477 | fh.seek(archive['offset'])
478 | fh.write(packed_str[-bytes_beyond:])
479 | else:
480 | fh.write(packed_str)
481 |
482 | # now we propagate the updates to lower-precision archives
483 | archive_list = header['archive_list']
484 | next_archive_idx = archive_idx + 1
485 | if next_archive_idx < len(archive_list):
486 | # update timestamp_range
487 | time_start, time_end = timestamp_range
488 | time_end = max(time_end, aligned_points[-1][0])
489 | time_start = min(time_start, aligned_points[0][0])
490 | timestamp_range = (time_start, time_end)
491 | self._propagate(fh, header, archive, archive_list[next_archive_idx],
492 | timestamp_range, next_archive_idx)
493 |
494 | def _read_base_point(self, fh, archive, header):
495 | fh.seek(archive['offset'])
496 | base_point = fh.read(header['point_size'])
497 | return struct.unpack(header['point_format'], base_point)
498 |
499 | def _timestamp2offset(self, ts, base_ts, header, archive):
500 | time_distance = ts - base_ts
501 | point_distince = time_distance / archive['sec_per_point']
502 | byte_distince = point_distince * header['point_size']
503 | return archive['offset'] + (byte_distince % archive['size'])
504 |
505 | @staticmethod
506 | def get_propagate_timeunit(low_sec_per_point, high_sec_per_point, xff):
507 | num_point = low_sec_per_point / high_sec_per_point
508 | return int(math.ceil(num_point * xff)) * high_sec_per_point
509 |
510 | def _propagate(self, fh, header, higher, lower, timestamp_range, lower_idx):
511 | """
512 | propagte update to low precision archives.
513 | """
514 | from_time, until_time = timestamp_range
515 | timeunit = Storage.get_propagate_timeunit(lower['sec_per_point'],
516 | higher['sec_per_point'],
517 | header['x_files_factor'])
518 | from_time_boundary = from_time / timeunit
519 | until_time_boundary = until_time / timeunit
520 | if (from_time_boundary == until_time_boundary) and (from_time % timeunit) != 0:
521 | return False
522 |
523 | if lower['sec_per_point'] <= timeunit:
524 | lower_interval_end = until_time_boundary * timeunit
525 | lower_interval_start = min(lower_interval_end-timeunit, from_time_boundary*timeunit)
526 | else:
527 | lower_interval_end = roundup(until_time, lower['sec_per_point'])
528 | lower_interval_start = from_time - from_time % lower['sec_per_point']
529 |
530 | fh.seek(higher['offset'])
531 | packed_base_interval = fh.read(LONG_SIZE)
532 | higher_base_interval = struct.unpack(LONG_FORMAT, packed_base_interval)[0]
533 |
534 | if higher_base_interval == 0:
535 | higher_first_offset = higher['offset']
536 | else:
537 | higher_first_offset = self._timestamp2offset(lower_interval_start,
538 | higher_base_interval,
539 | header,
540 | higher)
541 |
542 | higher_point_num = (lower_interval_end - lower_interval_start) / higher['sec_per_point']
543 | higher_size = higher_point_num * header['point_size']
544 | relative_first_offset = higher_first_offset - higher['offset']
545 | relative_last_offset = (relative_first_offset + higher_size) % higher['size']
546 | higher_last_offset = relative_last_offset + higher['offset']
547 |
548 | # get unpacked series str
549 | # TODO: abstract this to a function
550 | fh.seek(higher_first_offset)
551 | if higher_first_offset < higher_last_offset:
552 | series_str = fh.read(higher_last_offset - higher_first_offset)
553 | else:
554 | higher_end = higher['offset'] + higher['size']
555 | series_str = fh.read(higher_end - higher_first_offset)
556 | fh.seek(higher['offset'])
557 | series_str += fh.read(higher_last_offset - higher['offset'])
558 |
559 | # now we unpack the series data we just read
560 | point_format = header['point_format']
561 | byte_order, point_type = point_format[0], point_format[1:]
562 | point_num = len(series_str) / header['point_size']
563 | # assert point_num == higher_point_num
564 | series_format = byte_order + (point_type * point_num)
565 | unpacked_series = struct.unpack(series_format, series_str)
566 |
567 | # and finally we construct a list of values
568 | point_cnt = (lower_interval_end - lower_interval_start) / lower['sec_per_point']
569 | tag_cnt = len(header['tag_list'])
570 | agg_cnt = lower['sec_per_point'] / higher['sec_per_point']
571 | step = (tag_cnt + 1) * agg_cnt
572 | lower_points = [None] * point_cnt
573 |
574 | unpacked_series = unpacked_series[::-1]
575 | ts = lower_interval_end
576 | for i in xrange(0, len(unpacked_series), step):
577 | higher_points = unpacked_series[i: i+step]
578 | ts -= higher['sec_per_point'] * agg_cnt
579 | agg_value = self._get_agg_value(higher_points, tag_cnt, header['agg_id'],
580 | lower_interval_start, lower_interval_end)
581 | lower_points[i/step] = (ts, agg_value)
582 |
583 | lower_points = [x for x in lower_points if x and x[0]] # filter zero item
584 | timestamp_range = (lower_interval_start, max(lower_interval_end, until_time))
585 | self._update_archive(fh, header, lower, lower_points, lower_idx,
586 | timestamp_range)
587 |
588 | def _get_agg_value(self, higher_points, tag_cnt, agg_id, ts_start, ts_end):
589 | higher_points = higher_points[::-1]
590 | agg_func = Agg.get_agg_func(agg_id)
591 | step = tag_cnt + 1
592 |
593 | # points format:
594 | # t1 v11 v12,
595 | # t2 v21 v22,
596 | # t3 v31 v32,
597 | points = [higher_points[i: i+step]
598 | for i in xrange(0, len(higher_points), step)]
599 | valid_points = self.filter_points_by_time(points, ts_start, ts_end)
600 | if not valid_points:
601 | val = [NULL_VALUE] * tag_cnt
602 | else:
603 | points = np.array(valid_points)
604 | points = points.transpose()
605 | val = [agg_func(self.filter_values(x)) for x in points[1:]]
606 | return val
607 |
608 | @staticmethod
609 | def filter_points_by_time(points, ts_start, ts_end):
610 | return [p for p in points if ts_start <= p[0] < ts_end]
611 |
612 | @staticmethod
613 | def filter_values(points):
614 | rs = [p for p in points if p != NULL_VALUE]
615 | return rs if rs else [NULL_VALUE]
616 |
617 | def fetch(self, path, from_time, until_time=None, now=None):
618 | with open(path, 'rb') as f:
619 | header = self.header(f)
620 |
621 | # validate timestamp
622 | if now is None:
623 | now = int(time.time())
624 | if until_time is None:
625 | until_time = now
626 | if from_time >= until_time:
627 | raise InvalidTime("from_time '%s' is after unitl_time '%s'" %
628 | (from_time, until_time))
629 |
630 | oldest_time = now - header['max_retention']
631 | if from_time > now:
632 | return None
633 | if until_time < oldest_time:
634 | return None
635 |
636 | until_time = min(now, until_time)
637 | from_time = max(oldest_time, from_time)
638 |
639 | diff = now - from_time
640 | for archive in header['archive_list']:
641 | if archive['retention'] >= diff:
642 | break
643 |
644 | return self._archive_fetch(f, header, archive, from_time, until_time)
645 |
646 | def _archive_fetch(self, fh, header, archive, from_time, until_time):
647 | from_time = roundup(from_time, archive['sec_per_point'])
648 | until_time = roundup(until_time, archive['sec_per_point'])
649 | tag_cnt = len(header['tag_list'])
650 | null_point = (None,) * tag_cnt
651 |
652 | base_point = self._read_base_point(fh, archive, header)
653 | base_ts = base_point[0]
654 |
655 | if base_ts == 0:
656 | step = archive['sec_per_point']
657 | cnt = (until_time - from_time) / step
658 | time_info = (from_time, until_time, step)
659 | val_list = [null_point] * cnt
660 | return (header, time_info, val_list)
661 |
662 | from_offset = self._timestamp2offset(from_time, base_ts, header, archive)
663 | until_offset = self._timestamp2offset(until_time, base_ts, header, archive)
664 |
665 | fh.seek(from_offset)
666 | if from_offset < until_offset:
667 | series_str = fh.read(until_offset - from_offset)
668 | else:
669 | archive_end = archive['offset'] + archive['size']
670 | series_str = fh.read(archive_end - from_offset)
671 | fh.seek(archive['offset'])
672 | series_str += fh.read(until_offset - archive['offset'])
673 |
674 | ## unpack series string
675 | point_format = header['point_format']
676 | byte_order, point_type = point_format[0], point_format[1:]
677 | cnt = len(series_str) / header['point_size']
678 | series_format = byte_order + point_type * cnt
679 | unpacked_series = struct.unpack(series_format, series_str)
680 |
681 | ## construct value list
682 | # pre-allocate entire list or speed
683 | val_list = [null_point] * cnt
684 | step = tag_cnt + 1
685 | sec_per_point = archive['sec_per_point']
686 | for i in xrange(0, len(unpacked_series), step):
687 | point_ts = unpacked_series[i]
688 | if from_time <= point_ts < until_time:
689 | val = unpacked_series[i+1: i+step]
690 | idx = (point_ts - from_time) / sec_per_point
691 | val_list[idx] = self._conver_null_value(val)
692 |
693 | time_info = (from_time, until_time, sec_per_point)
694 | return header, time_info, val_list
695 |
696 | @staticmethod
697 | def _conver_null_value(point_val):
698 | val = [None if x == NULL_VALUE else x
699 | for x in point_val]
700 | return tuple(val)
701 |
--------------------------------------------------------------------------------
/kenshin/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/kenshin/tools/__init__.py
--------------------------------------------------------------------------------
/kenshin/tools/hash.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | from rurouni.fnv1a import get_int32_hash
3 |
4 | class Hash:
5 | def __init__(self, nodes):
6 | self.nodes = nodes
7 |
8 | def add_node(self, node):
9 | self.nodes.append(node)
10 |
11 | def remove_code(self, node):
12 | self.nodes.remove(node)
13 |
14 | def get_node(self, key):
15 | idx = get_int32_hash(key) % len(self.nodes)
16 | return self.nodes[idx]
17 |
18 | def get_nodes(self, key):
19 | idx = get_int32_hash(key) % len(self.nodes)
20 | return self.nodes[idx:] + self.nodes[:idx]
21 |
--------------------------------------------------------------------------------
/kenshin/tools/whisper_tool.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | import re
4 | import struct
5 |
6 | import kenshin
7 | from rurouni.conf import OrderedConfigParser
8 |
9 |
10 | longFormat = "!L"
11 | longSize = struct.calcsize(longFormat)
12 | floatFormat = "!f"
13 | floatSize = struct.calcsize(floatFormat)
14 | valueFormat = "!d"
15 | valueSize = struct.calcsize(valueFormat)
16 | pointFormat = "!Ld"
17 | pointSize = struct.calcsize(pointFormat)
18 | metadataFormat = "!2LfL"
19 | metadataSize = struct.calcsize(metadataFormat)
20 | archiveInfoFormat = "!3L"
21 | archiveInfoSize = struct.calcsize(archiveInfoFormat)
22 |
23 | agg_type_dict = dict({
24 | 1: 'average',
25 | 2: 'sum',
26 | 3: 'last',
27 | 4: 'max',
28 | 5: 'min'
29 | })
30 |
31 |
32 | def get_agg_name(agg_id):
33 | return agg_type_dict[agg_id]
34 |
35 |
36 | def remote_url(filepath):
37 | return filepath.startswith('http://')
38 |
39 |
40 | def read_header(fh):
41 | packed_meta = fh.read(metadataSize)
42 | agg_type, max_ret, xff, archive_cnt = struct.unpack(
43 | metadataFormat, packed_meta)
44 |
45 | archives = []
46 | for i in xrange(archive_cnt):
47 | packed_archive_info = fh.read(archiveInfoSize)
48 | off, sec, cnt = struct.unpack(archiveInfoFormat, packed_archive_info)
49 | archive_info = {
50 | 'offset': off,
51 | 'sec_per_point': sec,
52 | 'count': cnt,
53 | 'size': pointSize * cnt,
54 | 'retention': sec * cnt,
55 | }
56 | archives.append(archive_info)
57 |
58 | info = {
59 | 'xff': xff,
60 | 'archives': archives,
61 | 'agg_type': agg_type,
62 | }
63 | fh.close()
64 | return info
65 |
66 |
67 | ### schema (copy from carbon with some small change)
68 |
69 | class Schema:
70 | def match(self, metric):
71 | raise NotImplementedError()
72 |
73 |
74 | class DefaultSchema(Schema):
75 | def __init__(self, name, archives):
76 | self.name = name
77 | self.archives = archives
78 |
79 | def match(self, metric):
80 | return True
81 |
82 |
83 | class PatternSchema(Schema):
84 | def __init__(self, name, pattern, archives):
85 | self.name = name
86 | self.pattern = pattern
87 | self.regex = re.compile(pattern)
88 | self.archives = archives
89 |
90 | def match(self, metric):
91 | return self.regex.search(metric)
92 |
93 | class Archive:
94 | def __init__(self, secondsPerPoint, points):
95 | self.secondsPerPoint = int(secondsPerPoint)
96 | self.points = int(points)
97 |
98 | def __str__(self):
99 | return "Archive = (Seconds per point: %d, Datapoints to save: %d)" % (
100 | self.secondsPerPoint, self.points)
101 |
102 | def getTuple(self):
103 | return (self.secondsPerPoint, self.points)
104 |
105 | @staticmethod
106 | def fromString(retentionDef):
107 | rs = kenshin.parse_retention_def(retentionDef)
108 | return Archive(*rs)
109 |
110 |
111 | def loadStorageSchemas(storage_schemas_conf):
112 | schemaList = []
113 | config = OrderedConfigParser()
114 | config.read(storage_schemas_conf)
115 |
116 | for section in config.sections():
117 | options = dict(config.items(section))
118 | pattern = options.get('pattern')
119 |
120 | retentions = options['retentions'].split(',')
121 | archives = [Archive.fromString(s).getTuple() for s in retentions]
122 |
123 | mySchema = PatternSchema(section, pattern, archives)
124 | schemaList.append(mySchema)
125 |
126 | schemaList.append(defaultSchema)
127 | return schemaList
128 |
129 |
130 | def loadAggregationSchemas(storage_aggregation_conf):
131 | schemaList = []
132 | config = OrderedConfigParser()
133 | config.read(storage_aggregation_conf)
134 |
135 | for section in config.sections():
136 | options = dict(config.items(section))
137 | pattern = options.get('pattern')
138 | aggregationMethod = options.get('aggregationmethod')
139 | archives = aggregationMethod
140 | mySchema = PatternSchema(section, pattern, archives)
141 | schemaList.append(mySchema)
142 |
143 | schemaList.append(defaultAggregation)
144 | return schemaList
145 |
146 | defaultArchive = Archive(60, 60 * 24 * 7) # default retention for unclassified data (7 days of minutely data)
147 | defaultSchema = DefaultSchema('default', [defaultArchive])
148 | defaultAggregation = DefaultSchema('default', (None, None))
149 |
150 |
151 | class NewSchema(Schema):
152 | def __init__(self, name, archives, aggregationMethod):
153 | self.name = name
154 | self.archives = archives
155 | self.aggregationMethod = aggregationMethod
156 |
157 |
158 | def gen_whisper_schema_func(whisper_conf_dir):
159 | storage_schemas_conf = os.path.join(whisper_conf_dir, 'storage-schemas.conf')
160 | storage_aggregation_conf = os.path.join(whisper_conf_dir, 'storage-aggregation.conf')
161 | storage_schemas = loadStorageSchemas(storage_schemas_conf)
162 | storage_aggs = loadAggregationSchemas(storage_aggregation_conf)
163 |
164 | def get_schema(schemas, metric):
165 | for schema in schemas:
166 | if schema.match(metric):
167 | return schema
168 |
169 | def _(metric):
170 | schema = get_schema(storage_schemas, metric)
171 | agg = get_schema(storage_aggs, metric)
172 | return NewSchema(schema.name, schema.archives, agg.archives)
173 | return _
174 |
--------------------------------------------------------------------------------
/kenshin/utils.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import os
4 | import errno
5 |
6 |
7 | def get_metric(path):
8 | import re
9 | abspath = os.path.abspath(path)
10 | realpath = os.path.realpath(path)
11 | metric = None
12 | if abspath != realpath:
13 | try:
14 | metric = re.split('/link/[a-z0-9]+/', abspath)[1]
15 | metric = metric[:-3] # remove .hs
16 | metric = metric.replace('/', '.')
17 | except IndexError:
18 | pass
19 | return metric
20 |
21 |
22 | def mkdir_p(path):
23 | try:
24 | os.makedirs(path)
25 | except OSError as exc:
26 | if exc.errno == errno.EEXIST and os.path.isdir(path):
27 | pass
28 | else:
29 | raise
30 |
31 |
32 | def roundup(x, base):
33 | """
34 | Roundup to nearest multiple of `base`.
35 |
36 | >>> roundup(21, 10)
37 | 30
38 | >>> roundup(20, 10)
39 | 20
40 | >>> roundup(19, 10)
41 | 20
42 | """
43 | t = x % base
44 | return (x - t + base) if t else x
45 |
46 |
47 | if __name__ == '__main__':
48 | import doctest
49 | doctest.testmod()
50 |
--------------------------------------------------------------------------------
/misc/init_setup_demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | PWD=`pwd`
4 |
5 | # init config
6 |
7 | cp conf/storage-schemas.conf.example conf/storage-schemas.conf
8 | cp conf/rurouni.conf.example conf/rurouni.conf
9 | sed -i".bak" 's?/data/kenshin?'$PWD'?g' conf/rurouni.conf
10 |
11 | # init storage directory
12 |
13 | mkdir -p $PWD/storage/data
14 | mkdir -p $PWD/storage/link
15 | mkdir -p $PWD/storage/log
16 | mkdir -p $PWD/storage/run
17 | touch $PWD/storage/index
18 |
--------------------------------------------------------------------------------
/misc/update_version.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # ln -sf ../../misc/update_version.sh .git/hooks/pre-commit
3 | VERSIONING_SCRIPT="`pwd`/misc/versioning.py"
4 | VERSIONING_FILE="`pwd`/kenshin/__init__.py"
5 | TMPFILE=$VERSIONING_FILE".tmp"
6 | cat $VERSIONING_FILE | python $VERSIONING_SCRIPT --clean | python $VERSIONING_SCRIPT > $TMPFILE
7 | mv $TMPFILE $VERSIONING_FILE
8 | git add $VERSIONING_FILE
--------------------------------------------------------------------------------
/misc/versioning.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # from: https://gist.githubusercontent.com/pkrusche/7369262/raw/5bf2dc8afb88d3fdde7be6d16ee4290db6735f37/versioning.py
3 |
4 | """ Git Versioning Script
5 |
6 | Will transform stdin to expand some keywords with git version/author/date information.
7 |
8 | Specify --clean to remove this information before commit.
9 |
10 | Setup:
11 |
12 | 1. Copy versioning.py into your git repository
13 |
14 | 2. Run:
15 |
16 | git config filter.versioning.smudge 'python versioning.py'
17 | git config filter.versioning.clean 'python versioning.py --clean'
18 | echo 'version.py filter=versioning' >> .gitattributes
19 | git add versioning.py
20 |
21 |
22 | 3. add a version.py file with this contents:
23 |
24 | __commit__ = ""
25 | __author__ = ""
26 | __email__ = ""
27 | __date__ = ""
28 |
29 | """
30 |
31 | import sys
32 | import subprocess
33 | import re
34 |
35 |
36 | def main():
37 | clean = False
38 | if len(sys.argv) > 1:
39 | if sys.argv[1] == '--clean':
40 | clean = True
41 |
42 | # initialise empty here. Otherwise: forkbomb through the git calls.
43 | subst_list = {
44 | "commit": "",
45 | "date": "",
46 | "author": "",
47 | "email": ""
48 | }
49 |
50 | for line in sys.stdin:
51 | if not clean:
52 | subst_list = {
53 | "commit": subprocess.check_output(['git', 'describe', '--always']),
54 | "date": subprocess.check_output(['git', 'log', '--pretty=format:"%ad"', '-1']),
55 | "author": subprocess.check_output(['git', 'log', '--pretty=format:"%an"', '-1']),
56 | "email": subprocess.check_output(['git', 'log', '--pretty=format:"%ae"', '-1'])
57 | }
58 | for k, v in subst_list.iteritems():
59 | v = re.sub(r'[\n\r\t"\']', "", v)
60 | rexp = "__%s__\s*=[\s'\"]+" % k
61 | line = re.sub(rexp, "__%s__ = \"%s\"\n" % (k, v), line)
62 | sys.stdout.write(line)
63 | else:
64 | for k in subst_list:
65 | rexp = "__%s__\s*=.*" % k
66 | line = re.sub(rexp, "__%s__ = \"\"" % k, line)
67 | sys.stdout.write(line)
68 |
69 |
70 | if __name__ == "__main__":
71 | main()
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | nose==1.3.7
3 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Cython
2 | numpy==1.8.0
3 | zope.interface==4.1.1
4 | Twisted==13.1
5 |
--------------------------------------------------------------------------------
/rurouni/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/rurouni/__init__.py
--------------------------------------------------------------------------------
/rurouni/cache.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | """
4 | Copyright 2015-Present Douban Inc.
5 | Copyright 2009 Chris Davis
6 | Licensed under the Apache License, Version 2.0 (the "License");
7 | you may not use this file except in compliance with the License.
8 | You may obtain a copy of the License at
9 | http://www.apache.org/licenses/LICENSE-2.0
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | """
16 |
17 | import os
18 | import time
19 | from threading import Lock
20 |
21 | import kenshin
22 | from kenshin.consts import NULL_VALUE
23 | from rurouni import log
24 | from rurouni.conf import settings
25 | from rurouni.storage import (
26 | getFilePath, createLink, StorageSchemas, rebuildIndex, rebuildLink
27 | )
28 | from rurouni.utils import TokenBucket, get_instance_of_metric
29 | from rurouni.exceptions import TokenBucketFull, UnexpectedMetric
30 |
31 |
32 | class MetricCache(object):
33 | """
34 | (schema, file_idx, pos_idx)
35 | """
36 | def __init__(self):
37 | self.lock = Lock()
38 | self.metric_idxs = {}
39 | self.schema_caches = {}
40 | self.metrics_fh = None
41 | self.storage_schemas = None
42 | self.token_bucket = None
43 |
44 | def __del__(self):
45 | if self.metrics_fh is not None:
46 | self.metrics_fh.close()
47 |
48 | def init(self):
49 | with self.lock:
50 | index_file = settings.INDEX_FILE
51 | instance_data_dir = os.path.join(
52 | settings.LOCAL_DATA_DIR, settings.instance)
53 | instance_link_dir = os.path.join(
54 | settings.LOCAL_LINK_DIR, settings.instance)
55 |
56 | if os.path.exists(instance_data_dir):
57 | if not os.path.exists(index_file):
58 | rebuildIndex(instance_data_dir, index_file)
59 | if not os.path.exists(instance_link_dir):
60 | rebuildLink(instance_data_dir, instance_link_dir)
61 |
62 | # init token bucket
63 | capacity = settings.MAX_CREATES_PER_MINUTE
64 | fill_rate = float(capacity) / 60
65 | self.token_bucket = TokenBucket(capacity, fill_rate)
66 |
67 | self._initCache(index_file)
68 |
69 | def _initCache(self, index_file):
70 | # avoid repeated call
71 | if self.metrics_fh is not None:
72 | return
73 |
74 | self._initStorageSchemas()
75 | if os.path.exists(index_file):
76 | MAX_ALLOW_ERR_LINE = 1
77 | err_line_cnt = 0
78 | with open(index_file) as f:
79 | for line in f:
80 | line = line.strip('\n')
81 | try:
82 | metric, schema_name, file_idx, file_pos = line.split(" ")
83 | file_idx = int(file_idx)
84 | file_pos = int(file_pos)
85 | except Exception as e:
86 | if err_line_cnt < MAX_ALLOW_ERR_LINE:
87 | err_line_cnt += 1
88 | continue
89 | else:
90 | raise Exception('Index file has many error: %s' % e)
91 |
92 | schema = self.storage_schemas.getSchemaByName(schema_name)
93 | schema_cache = self.getSchemaCache(schema)
94 | schema_cache.add(schema, file_idx, file_pos)
95 | self.metric_idxs[metric] = (schema.name, file_idx, file_pos)
96 |
97 | self.metrics_fh = open(index_file, 'a')
98 |
99 | def _initStorageSchemas(self):
100 | if self.storage_schemas is None:
101 | conf_file = os.path.join(settings.CONF_DIR, 'storage-schemas.conf')
102 | self.storage_schemas = StorageSchemas(conf_file)
103 |
104 | def put(self, metric, datapoint):
105 | try:
106 | (schema_name, file_idx, pos_idx) = self.getMetricIdx(metric)
107 | except (TokenBucketFull, UnexpectedMetric):
108 | return
109 | file_cache = self.schema_caches[schema_name][file_idx]
110 | file_cache.put(pos_idx, datapoint)
111 |
112 | def getMetricIdx(self, metric):
113 | with self.lock:
114 | if metric in self.metric_idxs:
115 | return self.metric_idxs[metric]
116 | else:
117 | from rurouni.state import instrumentation
118 |
119 | if not self.token_bucket.consume(1):
120 | instrumentation.incr('droppedCreates')
121 | raise TokenBucketFull()
122 |
123 | instance = get_instance_of_metric(metric, settings['NUM_ALL_INSTANCE'])
124 | if (instance != int(settings['instance']) and
125 | not metric.startswith(settings.RUROUNI_METRIC)):
126 | log.cache("UnexpectedMetric: %s" % metric)
127 | instrumentation.incr('droppedCreates')
128 | raise UnexpectedMetric()
129 |
130 | instrumentation.incr('creates')
131 |
132 | schema = self.storage_schemas.getSchemaByMetric(metric)
133 | schema_cache = self.getSchemaCache(schema)
134 | file_idx = schema_cache.getFileCacheIdx(schema)
135 | pos_idx = schema_cache[file_idx].getPosIdx()
136 |
137 | # create file
138 | file_path = getFilePath(schema.name, file_idx)
139 | if not os.path.exists(file_path):
140 | tags = [''] * schema.metrics_max_num
141 | kenshin.create(file_path, tags, schema.archives, schema.xFilesFactor,
142 | schema.aggregationMethod)
143 | # update file metadata
144 | kenshin.add_tag(metric, file_path, pos_idx)
145 | # create link
146 | createLink(metric, file_path)
147 | # create index
148 | self.metrics_fh.write("%s %s %s %s\n" % (metric, schema.name, file_idx, pos_idx))
149 |
150 | self.metric_idxs[metric] = (schema.name, file_idx, pos_idx)
151 | return self.metric_idxs[metric]
152 |
153 | def getSchemaCache(self, schema):
154 | try:
155 | return self.schema_caches[schema.name]
156 | except:
157 | schema_cache = SchemaCache()
158 | self.schema_caches[schema.name] = schema_cache
159 | return schema_cache
160 |
161 | def get(self, metric):
162 | if metric not in self.metric_idxs:
163 | return []
164 | (schema_name, file_idx, pos_idx) = self.metric_idxs[metric]
165 | file_cache = self.schema_caches[schema_name][file_idx]
166 | now = int(time.time())
167 | data = file_cache.get(end_ts=now)
168 | return [(ts, val[pos_idx]) for ts, val in data
169 | if val[pos_idx] != NULL_VALUE]
170 |
171 | def pop(self, schema_name, file_idx, end_ts=None, clear=True):
172 | file_cache = self.schema_caches[schema_name][file_idx]
173 | datapoints = file_cache.get(end_ts=end_ts, clear=clear)
174 | return datapoints
175 |
176 | def writableFileCaches(self):
177 | now = int(time.time())
178 | with self.lock:
179 | return[(schema_name, file_idx)
180 | for (schema_name, schema_cache) in self.schema_caches.items()
181 | for file_idx in range(schema_cache.size())
182 | if schema_cache[file_idx].canWrite(now)]
183 |
184 | def getAllFileCaches(self):
185 | return [(schema_name, file_idx)
186 | for (schema_name, schema_cache) in self.schema_caches.iteritems()
187 | for file_idx in range(schema_cache.size())]
188 |
189 |
190 | class SchemaCache(object):
191 | def __init__(self):
192 | self.file_caches = []
193 | self.curr_idx = 0
194 |
195 | def __getitem__(self, idx):
196 | return self.file_caches[idx]
197 |
198 | def size(self):
199 | return len(self.file_caches)
200 |
201 | def getFileCacheIdx(self, schema):
202 | while self.curr_idx < len(self.file_caches):
203 | if not self.file_caches[self.curr_idx].metricFull():
204 | return self.curr_idx
205 | else:
206 | self.curr_idx += 1
207 | # there is no file cache avaiable, we create a new one
208 | cache = FileCache(schema)
209 | self.file_caches.append(cache)
210 | return self.curr_idx
211 |
212 | def add(self, schema, file_idx, file_pos):
213 | if len(self.file_caches) <= file_idx:
214 | for _ in range(len(self.file_caches), file_idx + 1):
215 | self.file_caches.append(FileCache(schema))
216 | self.file_caches[file_idx].add(file_pos)
217 |
218 |
219 | class FileCache(object):
220 | def __init__(self, schema):
221 | self.lock = Lock()
222 | self.metrics_max_num = schema.metrics_max_num
223 | self.bitmap = 0
224 | self.avaiable_pos_idx = 0
225 | self.resolution = schema.archives[0][0]
226 | self.retention = schema.cache_retention
227 |
228 | # +1 to avoid self.points_num == 0
229 | self.points_num = self.retention / self.resolution + 1
230 | self.cache_size = int(self.points_num * schema.cache_ratio)
231 | self.points = [NULL_VALUE] * self.metrics_max_num * self.cache_size
232 | self.base_idxs = [i * self.cache_size for i in xrange(self.metrics_max_num)]
233 |
234 | self.start_ts = None
235 | self.max_ts = 0
236 | self.start_offset = 0
237 |
238 | def add(self, file_pos):
239 | with self.lock:
240 | self.bitmap |= (1 << file_pos)
241 |
242 | def getPosIdx(self):
243 | with self.lock:
244 | while True:
245 | if self.bitmap & (1 << self.avaiable_pos_idx):
246 | self.avaiable_pos_idx += 1
247 | else:
248 | self.bitmap |= (1 << self.avaiable_pos_idx)
249 | self.avaiable_pos_idx += 1
250 | return self.avaiable_pos_idx - 1
251 |
252 | def metricFull(self):
253 | with self.lock:
254 | return self.bitmap + 1 == (1 << self.metrics_max_num)
255 |
256 | def metricEmpty(self):
257 | return not self.start_ts
258 |
259 | def canWrite(self, now):
260 | with self.lock:
261 | return self.start_ts and ((now - self.start_ts - self.retention) >=
262 | settings.DEFAULT_WAIT_TIME)
263 |
264 | def put(self, pos_idx, datapoint):
265 | with self.lock:
266 | try:
267 | base_idx = self.base_idxs[pos_idx]
268 | ts, val = datapoint
269 |
270 | self.max_ts = max(self.max_ts, ts)
271 | if self.start_ts is None:
272 | self.start_ts = ts - ts % self.resolution
273 | idx = base_idx
274 | else:
275 | offset = (ts - self.start_ts) / self.resolution
276 | idx = base_idx + (self.start_offset + offset) % self.cache_size
277 |
278 | self.points[idx] = val
279 | except Exception as e:
280 | log.err('put error in FileCache: %s' % e)
281 |
282 | def get_offset(self, ts):
283 | interval = (ts - self.start_ts) / self.resolution
284 | if interval >= self.cache_size:
285 | interval = self.cache_size - 1
286 | return (self.start_offset + interval) % self.cache_size
287 |
288 | def get(self, end_ts=None, clear=False):
289 | with self.lock:
290 | if self.metricEmpty():
291 | return []
292 | begin_offset = self.start_offset
293 | if end_ts:
294 | end_offset = self.get_offset(end_ts)
295 | else:
296 | end_offset = (begin_offset + self.points_num) % self.cache_size
297 |
298 | rs = [None] * self.metrics_max_num
299 | if begin_offset < end_offset:
300 | length = end_offset - begin_offset
301 | for i, base_idx in enumerate(self.base_idxs):
302 | begin_idx = base_idx + begin_offset
303 | end_idx = base_idx + end_offset
304 | val = self.points[begin_idx: end_idx]
305 | rs[i] = val
306 | if clear:
307 | self.clearPoint(begin_idx, end_idx)
308 | else:
309 | # wrap around
310 | length = self.cache_size - begin_offset + end_offset
311 | for i, base_idx in enumerate(self.base_idxs):
312 | begin_idx = base_idx + begin_offset
313 | end_idx = base_idx + end_offset
314 | val = self.points[begin_idx: base_idx+self.cache_size]
315 | val += self.points[base_idx: begin_idx]
316 | rs[i] = val
317 | if clear:
318 | self.clearPoint(begin_idx, base_idx+self.cache_size)
319 | self.clearPoint(base_idx, end_idx)
320 |
321 | # timestamps
322 | timestamps = [self.start_ts + i * self.resolution
323 | for i in range(length)]
324 |
325 | if clear:
326 | next_ts = timestamps[-1] + self.resolution
327 | if self.max_ts < next_ts:
328 | self.start_ts = None
329 | self.start_offset = 0
330 | else:
331 | self.start_ts = next_ts
332 | self.start_offset = end_offset
333 |
334 | return zip(timestamps, zip(*rs))
335 |
336 | def clearPoint(self, begin_idx, end_idx):
337 | for i in range(begin_idx, end_idx):
338 | self.points[i] = NULL_VALUE
339 |
340 |
341 | MetricCache = MetricCache()
342 |
--------------------------------------------------------------------------------
/rurouni/conf.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | import sys
4 | import errno
5 | from os.path import join, normpath, expanduser, dirname, exists, isdir
6 | from ConfigParser import ConfigParser
7 | from optparse import OptionParser
8 | from twisted.python import usage
9 |
10 | from rurouni.exceptions import RurouniException, ConfigException
11 | from rurouni import log
12 |
13 |
14 | defaults = dict(
15 | CACHE_QUERY_PORT = '7002',
16 | CACHE_QUERY_INTERFACE = '0.0.0.0',
17 |
18 | LINE_RECEIVER_PORT = '2003',
19 | LINE_RECEIVER_INTERFACE = '0.0.0.0',
20 |
21 | PICKLE_RECEIVER_PORT = '2004',
22 | PICKLE_RECEIVER_INTERFACE = '0.0.0.0',
23 |
24 | DEFAULT_WAIT_TIME = 10,
25 | RUROUNI_METRIC_INTERVAL = 60,
26 | RUROUNI_METRIC = 'rurouni',
27 |
28 | LOG_UPDATES = True,
29 | CONF_DIR = None,
30 | LOCAL_DATA_DIR = None,
31 | LOCAL_LINK_DIR = None,
32 | PID_DIR = None,
33 |
34 | MAX_CREATES_PER_MINUTE = float('inf'),
35 | NUM_ALL_INSTANCE = 1,
36 | )
37 |
38 |
39 | class Settings(dict):
40 | __getattr__ = dict.__getitem__
41 |
42 | def __init__(self):
43 | dict.__init__(self)
44 | self.update(defaults)
45 |
46 | def readFrom(self, path, section):
47 | parser = ConfigParser()
48 | if not parser.read(path):
49 | raise RurouniException("Failed to read config: %s" % path)
50 |
51 | if not parser.has_section(section):
52 | return
53 |
54 | for key, val in parser.items(section):
55 | key = key.upper()
56 | val_typ = type(defaults[key]) if key in defaults else str
57 |
58 | if val_typ is list:
59 | val = [v.strip() for v in val.split(',')]
60 | elif val_typ is bool:
61 | val = parser.getboolean(section, key)
62 | else:
63 | # attempt to figure out numeric types automatically
64 | try:
65 | val = int(val)
66 | except:
67 | try:
68 | val = float(val)
69 | except:
70 | pass
71 | self[key] = val
72 |
73 |
74 | settings = Settings()
75 |
76 |
77 | class OrderedConfigParser(ConfigParser):
78 | """
79 | Ordered Config Parser.
80 |
81 | http://stackoverflow.com/questions/1134071/keep-configparser-output-files-sorted.
82 |
83 | Acturally, from python 2.7 the ConfigParser default dict is `OrderedDict`,
84 | So we just rewrite the read method to check config file.
85 | """
86 | def read(self, path):
87 | if not os.access(path, os.R_OK):
88 | raise RurouniException(
89 | "Missing config file or wrong perm on %s" % path)
90 | return ConfigParser.read(self, path)
91 |
92 |
93 | class RurouniOptions(usage.Options):
94 |
95 | optFlags = [
96 | ["debug", "", "run in debug mode."],
97 | ]
98 |
99 | optParameters = [
100 | ['config', 'c', None, 'use the given config file.'],
101 | ['instance', '', 'a', 'manage a specific rurouni instance.'],
102 | ['logdir', '', None, 'write logs to the given directory.'],
103 | ]
104 |
105 | def postOptions(self):
106 | global settings
107 | pidfile = self.parent['pidfile']
108 | if pidfile.endswith('twistd.pid'):
109 | pidfile = None
110 | self['pidfile'] = pidfile
111 |
112 | # Enforce a default umask of '022' if none was set.
113 | if not self.parent.has_key('umask') or self.parent['umask'] is None:
114 | self.parent['umask'] = 022
115 |
116 | program = self.parent.subCommand
117 | settings['program'] = program
118 | program_settings = read_config(program, self)
119 | settings.update(program_settings)
120 |
121 | # normalize and expand path
122 | variables = ['STORAGE_DIR', 'LOCAL_DATA_DIR', 'LOCAL_LINK_DIR',
123 | 'PID_DIR', 'LOG_DIR', 'pidfile', 'INDEX_FILE']
124 | for var in variables:
125 | settings[var] = normpath(expanduser(settings[var]))
126 |
127 | storage_schemas = join(settings['CONF_DIR'], 'storage-schemas.conf')
128 | if not exists(storage_schemas):
129 | print 'Error missing config %s' % storage_schemas
130 | sys.exit(1)
131 |
132 | self.parent['pidfile'] = settings['pidfile']
133 |
134 | if not 'action' in self:
135 | self['action'] = 'start'
136 | self.handleAction()
137 |
138 | if self['debug']:
139 | log.setDebugEnabled(True)
140 | else:
141 | if self.parent.get('syslog', None):
142 | log.logToSyslog(self.parent['prefix'])
143 | elif not self.parent['nodaemon']:
144 | if not isdir(settings.LOG_DIR):
145 | os.makedirs(settings.LOG_DIR)
146 | log.logToDir(settings.LOG_DIR)
147 |
148 | @staticmethod
149 | def _normpath(path):
150 | return normpath(expanduser(path))
151 |
152 | def parseArgs(self, *action):
153 | if len(action) == 1:
154 | self["action"] = action[0]
155 |
156 | def handleAction(self):
157 | action = self['action']
158 | pidfile = self.parent["pidfile"]
159 | program = settings['program']
160 | instance = self['instance']
161 |
162 | if action == 'stop':
163 | if not exists(pidfile):
164 | print 'pidfile %s does not exist' % pidfile
165 | raise SystemExit(0)
166 | with open(pidfile) as f:
167 | pid = int(f.read().strip())
168 | print 'sending kill signal to pid %d' % pid
169 | try:
170 | os.kill(pid, 15)
171 | except OSError as e:
172 | if e.errno == errno.ESRCH:
173 | print 'no process with pid %d running' % pid
174 | else:
175 | raise
176 | raise SystemExit(0)
177 |
178 | elif action == 'start':
179 | if exists(pidfile):
180 | with open(pidfile) as f:
181 | pid = int(f.read().strip())
182 | if _process_alive(pid):
183 | print ('%s (instance %s) is already running with pid %d' %
184 | (program, instance, pid))
185 | raise SystemExit(1)
186 | else:
187 | print 'removing stale pidfile %s' % pidfile
188 | try:
189 | os.unlink(pidfile)
190 | except:
191 | print 'could not remove pidfile %s' % pidfile
192 | else:
193 | if not os.path.exists(settings['PID_DIR']):
194 | try:
195 | os.makedirs(settings['PID_DIR'])
196 | except OSError as e:
197 | if e.errno == errno.EEXIST and os.path.isdir(settings['PID_DIR']):
198 | pass
199 |
200 | elif action == 'status':
201 | if not exists(pidfile):
202 | print '%s (instance %s) is not running' % (program, instance)
203 | raise SystemExit(0)
204 | with open(pidfile) as f:
205 | pid = int(f.read().strip())
206 |
207 | if _process_alive(pid):
208 | print ('%s (instance %s) is running with pid %d' %
209 | (program, instance, pid))
210 | raise SystemExit(0)
211 | else:
212 | print "%s (instance %s) is not running" % (program, instance)
213 | raise SystemExit(1)
214 |
215 |
216 | def get_parser(usage="%prog [options] "):
217 | "Create a parser for command line options."
218 | parser = OptionParser(usage=usage)
219 | parser.add_option(
220 | "--debug", action="store_true",
221 | help="Run in the foreground, log to stdout")
222 | parser.add_option(
223 | "--nodaemon", action="store_true",
224 | help='Run in the foreground')
225 | parser.add_option(
226 | "--pidfile", default=None,
227 | help='Write pid to the given file')
228 | parser.add_option(
229 | "--umask", default=None,
230 | help="Use the given umask when creating files")
231 | parser.add_option(
232 | '--config', default=None,
233 | help="Use the given config file")
234 | parser.add_option(
235 | "--instance", default="a",
236 | help="Manage a specific rurouni instance")
237 | return parser
238 |
239 |
240 | def read_config(program, options):
241 | """
242 | Read settings for 'program' from configuration file specified by
243 | 'options["config"]', with missing values provide by 'defaults'.
244 | """
245 | settings = Settings()
246 |
247 | # os environ variables
248 | graphite_root = os.environ.get('GRAPHITE_ROOT')
249 | if graphite_root is None:
250 | raise ConfigException('GRAPHITE_ROOT needs to be provided.')
251 | settings['STORAGE_DIR'] = os.environ.get(
252 | 'STORAGE_DIR', join(graphite_root, 'storage'))
253 | settings['CONF_DIR'] = os.environ.get(
254 | 'CONF_DIR', join(graphite_root, 'conf'))
255 |
256 | # set default config variables
257 | settings['LOCAL_DATA_DIR'] = join(settings['STORAGE_DIR'], 'data')
258 | settings['LOCAL_LINK_DIR'] = join(settings['STORAGE_DIR'], 'link')
259 | settings['PID_DIR'] = join(settings['STORAGE_DIR'], 'run')
260 | settings['LOG_DIR'] = join(settings['STORAGE_DIR'], 'log', program)
261 |
262 | if options['config'] is None:
263 | options['config'] = join(settings['CONF_DIR'], 'rurouni.conf')
264 | else:
265 | settings['CONF_DIR'] = dirname(normpath(options['config']))
266 |
267 | # read configuration options from program-specific section.
268 | section = program[len('rurouni-'):]
269 | config = options['config']
270 | if not exists(config):
271 | raise ConfigException('Error: missing required config %s' % config)
272 |
273 | instance = options['instance']
274 | if not instance.isdigit():
275 | raise ConfigException('Error: instance must be digit %s' % instance)
276 | settings['instance'] = instance
277 |
278 | # read configuration file
279 | settings.readFrom(config, section)
280 | settings.readFrom(config, '%s:%s' % (section, instance))
281 |
282 | # check cache instance number
283 | parser = ConfigParser()
284 | parser.read(config)
285 | prefix = 'cache:'
286 | instances = {int(s[len(prefix):]) for s in parser.sections()
287 | if s.startswith(prefix)}
288 | if settings['NUM_ALL_INSTANCE'] != len(instances) or \
289 | settings['NUM_ALL_INSTANCE'] != max(instances) + 1:
290 | raise ConfigException(
291 | 'Error: cache instance not match NUM_ALL_INSTANCE')
292 |
293 | settings['pidfile'] = (
294 | options['pidfile'] or
295 | join(settings['PID_DIR'], '%s-%s.pid' % (program, instance))
296 | )
297 | settings['LOG_DIR'] = (
298 | options['logdir'] or
299 | join(settings['LOG_DIR'], '%s-%s' % (program, instance))
300 | )
301 |
302 | settings['INDEX_FILE'] = join(settings['LOCAL_DATA_DIR'],
303 | '%s.idx' % instance)
304 | return settings
305 |
306 |
307 | def _process_alive(pid):
308 | if exists('/proc'):
309 | return exists('/proc/%d' % pid)
310 | else:
311 | try:
312 | os.kill(int(pid), 0)
313 | return True
314 | except OSError as e:
315 | return e.errno == errno.EPERM
316 |
--------------------------------------------------------------------------------
/rurouni/exceptions.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 |
4 | class RurouniException(Exception):
5 | pass
6 |
7 | class ConfigException(RurouniException):
8 | pass
9 |
10 | class TokenBucketFull(RurouniException):
11 | pass
12 |
13 | class UnexpectedMetric(RurouniException):
14 | pass
--------------------------------------------------------------------------------
/rurouni/fnv1a.pyx:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from libc.stdint cimport uint8_t, uint32_t, int32_t
4 |
5 |
6 | cdef int32_t FNV32a(void* key, int size) nogil:
7 | cdef uint8_t* p = key
8 | cdef uint32_t h = 2166136261UL
9 |
10 | for i in range(size):
11 | h ^= p[i]
12 | h *= 16777619;
13 |
14 | return h
15 |
16 |
17 | def get_int32_hash(bytes b not None):
18 | """Return signed 32-bit fnv1a hash.
19 |
20 | NOTE: It's a historical reason that we (Douban) use signed 32-bit
21 | hash, you can change it if you want.
22 | """
23 | return FNV32a(b, len(b))
24 |
--------------------------------------------------------------------------------
/rurouni/log.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import time
4 | from zope.interface import implements
5 | from sys import stdout, stderr
6 |
7 | from twisted.python.log import startLoggingWithObserver, textFromEventDict, msg, err, ILogObserver
8 | from twisted.python.syslog import SyslogObserver
9 | from twisted.python.logfile import DailyLogFile
10 |
11 |
12 |
13 | class RurouniLogObserver(object):
14 | implements(ILogObserver)
15 |
16 | def __call__(self, event):
17 | return self.observer(event)
18 |
19 | def logToDir(self, logdir):
20 | self.logdir = logdir
21 | self.console_logfile = DailyLogFile('console.log', logdir)
22 | self.custom_logs = {}
23 | self.observer = self.logdirObserver
24 |
25 | def logToSyslog(self, prefix):
26 | observer = SyslogObserver(prefix).emit
27 | def log(event):
28 | event['system'] = event.get('type', 'console')
29 | observer(event)
30 | self.observer = log
31 |
32 | def stdoutObserver(self, event):
33 | stdout.write(formatEvent(event, includeType=True) + '\n')
34 | stdout.flush()
35 |
36 | def logdirObserver(self, event):
37 | msg = formatEvent(event)
38 | log_type = event.get('type')
39 |
40 | if log_type is not None and log_type not in self.custom_logs:
41 | self.custom_logs[log_type] = DailyLogFile(log_type + '.log', self.logdir)
42 |
43 | logfile = self.custom_logs.get(log_type, self.console_logfile)
44 | logfile.write(msg + '\n')
45 | logfile.flush()
46 |
47 | observer = stdoutObserver # default to stdout
48 |
49 |
50 | def formatEvent(event, includeType=False):
51 | event['isError'] = 'failure' in event
52 | msg = textFromEventDict(event)
53 |
54 | if includeType:
55 | type_tag = '[%s] ' % event.get('type', 'console')
56 | else:
57 | type_tag = ''
58 |
59 | timestamp = time.strftime("%d/%m/%Y %H:%M:%S")
60 | return "%s\t%s\t%s" % (timestamp, type_tag, msg)
61 |
62 |
63 | rurouniLogObserver = RurouniLogObserver()
64 | logToDir = rurouniLogObserver.logToDir
65 | logToSyslog = rurouniLogObserver.logToSyslog
66 | logToStdout = lambda: startLoggingWithObserver(rurouniLogObserver)
67 |
68 |
69 | def cache(message, **context):
70 | context['type'] = 'cache'
71 | msg(message, **context)
72 |
73 | def clients(message, **context):
74 | context['type'] = 'clients'
75 | msg(message, **context)
76 |
77 | def creates(message, **context):
78 | context['type'] = 'creates'
79 | msg(message, **context)
80 |
81 | def updates(message, **context):
82 | context['type'] = 'updates'
83 | msg(message, **context)
84 |
85 | def listener(message, **context):
86 | context['type'] = 'listener'
87 | msg(message, **context)
88 |
89 | def relay(message, **context):
90 | context['type'] = 'relay'
91 | msg(message, **context)
92 |
93 | def aggregator(message, **context):
94 | context['type'] = 'aggregator'
95 | msg(message, **context)
96 |
97 | def query(message, **context):
98 | context['type'] = 'query'
99 | msg(message, **context)
100 |
101 | def debug(message, **context):
102 | if debugEnabled:
103 | msg(message, **context)
104 |
105 | debugEnabled = False
106 | def setDebugEnabled(enabled):
107 | global debugEnabled
108 | debugEnabled = enabled
109 |
--------------------------------------------------------------------------------
/rurouni/protocols.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import cPickle as pickle
3 |
4 | from twisted.protocols.basic import LineOnlyReceiver, Int32StringReceiver
5 | from twisted.internet.error import ConnectionDone
6 |
7 | from rurouni.state import events
8 | from rurouni import log
9 | from rurouni.cache import MetricCache
10 |
11 |
12 | ### metric receiver
13 |
14 | class MetricReceiver:
15 | """ Base class for all metric receive protocols.
16 | """
17 | def connectionMade(self):
18 | self.peerName = self.getPeerName()
19 |
20 | def getPeerName(self):
21 | if hasattr(self.transport, 'getPeer'):
22 | peer = self.transport.getPeer()
23 | return '%s:%d' % (peer.host, peer.port)
24 | else:
25 | return 'peer'
26 |
27 | def metricReceived(self, metric, datapoint):
28 | events.metricReceived(metric, datapoint)
29 |
30 |
31 | class MetricLineReceiver(MetricReceiver, LineOnlyReceiver):
32 | delimiter = '\n'
33 |
34 | def lineReceived(self, line):
35 | try:
36 | metric, value, timestamp = line.strip().split()
37 | datapoint = (int(timestamp), float(value))
38 | except:
39 | log.msg('invalid line (%s) received from client %s' %
40 | (line, self.peerName))
41 | return
42 | self.metricReceived(metric, datapoint)
43 |
44 |
45 | class MetricPickleReceiver(MetricReceiver, Int32StringReceiver):
46 | MAX_LENGTH = 2<<20 # 2M
47 |
48 | def connectionMade(self):
49 | MetricReceiver.connectionMade(self)
50 |
51 | def stringReceived(self, data):
52 | try:
53 | datapoints = pickle.loads(data)
54 | except:
55 | log.listener("invalid pickle received from %s, ignoring"
56 | % self.peerName)
57 | for metric, (timestamp, value) in datapoints:
58 | try:
59 | datapoint = int(timestamp), float(value)
60 | except Exception as e:
61 | continue
62 | self.metricReceived(metric, datapoint)
63 |
64 |
65 | class CacheManagementHandler(Int32StringReceiver):
66 | MAX_LENGTH = 3<<20 # 3M
67 |
68 | def connectionMade(self):
69 | peer = self.transport.getPeer()
70 | self.peerAddr = "%s:%s" % (peer.host, peer.port)
71 | log.query("%s connected" % self.peerAddr)
72 |
73 | def connectionLost(self, reason):
74 | if reason.check(ConnectionDone):
75 | log.query("%s disconnected" % self.peerAddr)
76 | else:
77 | log.query("%s connection lost: %s" % (self.peerAddr, reason.value))
78 |
79 | def stringReceived(self, rawRequest):
80 | request = pickle.loads(rawRequest)
81 | datapoints = MetricCache.get(request['metric'])
82 | rs = dict(datapoints=datapoints)
83 | response = pickle.dumps(rs, protocol=-1)
84 | self.sendString(response)
85 |
--------------------------------------------------------------------------------
/rurouni/service.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | from twisted.application import internet, service
3 | from twisted.application.internet import TCPServer
4 | from twisted.plugin import IPlugin
5 | from twisted.internet.protocol import ServerFactory
6 | from twisted.python.log import ILogObserver
7 | from twisted.python.components import Componentized
8 |
9 | from rurouni import protocols
10 | from rurouni import state
11 | from rurouni.conf import settings
12 | from rurouni.log import rurouniLogObserver
13 |
14 |
15 | ### root serveice
16 |
17 | class RurouniRootService(service.MultiService):
18 | """ Root Service that properly configure twistd logging.
19 | """
20 |
21 | def setServiceParent(self, parent):
22 | service.MultiService.setServiceParent(self, parent)
23 | if isinstance(parent, Componentized):
24 | parent.setComponent(ILogObserver, rurouniLogObserver)
25 |
26 |
27 | def createBaseService(options):
28 | root_service = RurouniRootService()
29 | root_service.setName('rurouni')
30 |
31 | receive_services = (
32 | (settings.LINE_RECEIVER_INTERFACE,
33 | settings.LINE_RECEIVER_PORT,
34 | protocols.MetricLineReceiver
35 | ),
36 | (settings.PICKLE_RECEIVER_INTERFACE,
37 | settings.PICKLE_RECEIVER_PORT,
38 | protocols.MetricPickleReceiver
39 | ),
40 | )
41 | for interface, port, protocol in receive_services:
42 | if port:
43 | factory = ServerFactory()
44 | factory.protocol = protocol
45 | service = TCPServer(int(port), factory, interface=interface)
46 | service.setServiceParent(root_service)
47 |
48 | from rurouni.state.instrumentation import InstrumentationService
49 | service = InstrumentationService()
50 | service.setServiceParent(root_service)
51 |
52 | return root_service
53 |
54 |
55 | def createCacheService(options):
56 | from rurouni.cache import MetricCache
57 | from rurouni.protocols import CacheManagementHandler
58 |
59 | MetricCache.init()
60 | state.events.metricReceived.addHandler(MetricCache.put)
61 | root_service = createBaseService(options)
62 |
63 | factory = ServerFactory()
64 | factory.protocol = CacheManagementHandler
65 | service = TCPServer(int(settings.CACHE_QUERY_PORT), factory,
66 | interface=settings.CACHE_QUERY_INTERFACE)
67 | service.setServiceParent(root_service)
68 |
69 | from rurouni.writer import WriterService
70 | service = WriterService()
71 | service.setServiceParent(root_service)
72 |
73 | return root_service
74 |
--------------------------------------------------------------------------------
/rurouni/state/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This module exists for the purpose of tracking global state.
3 | """
4 | cacheTooFull = False
5 |
--------------------------------------------------------------------------------
/rurouni/state/events.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | from twisted.python.failure import Failure
3 |
4 | from rurouni import state, log
5 | from rurouni.state import instrumentation
6 |
7 |
8 | class Event(object):
9 |
10 | def __init__(self, name, default_handler=None):
11 | self.name = name
12 | self.handlers = [default_handler] if default_handler else []
13 |
14 | def addHandler(self, handler):
15 | if handler not in self.handlers:
16 | self.handlers.append(handler)
17 |
18 | def removeHandler(self, handler):
19 | if handler in self.handlers:
20 | self.handlers.remove(handler)
21 |
22 | def __call__(self, *args, **kwargs):
23 | for h in self.handlers:
24 | try:
25 | h(*args, **kwargs)
26 | except Exception as e:
27 | log.err(None,
28 | "Exception %s in %s event handler: args=%s, kwargs=%s"
29 | % (e, self.name, args, kwargs))
30 |
31 |
32 | metricReceived = Event('metricReceived',
33 | lambda *a, **ka: instrumentation.incr('metricReceived'))
34 |
35 | cacheFull = Event('cacheFull')
36 | cacheFull.addHandler(lambda *a, **ka: instrumentation.incr('cacheOverflow'))
37 | cacheFull.addHandler(lambda *a, **ka: setattr(state, 'cacheTooFull', True))
38 |
--------------------------------------------------------------------------------
/rurouni/state/instrumentation.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | import time
4 | import socket
5 | from resource import getrusage, RUSAGE_SELF
6 |
7 | from twisted.application.service import Service
8 | from twisted.internet.task import LoopingCall
9 |
10 | from rurouni.conf import settings
11 | from rurouni import log
12 |
13 |
14 | # consts
15 | HOSTNAME = socket.gethostname().replace('.', '_')
16 | PAGESIZE = os.sysconf('SC_PAGESIZE')
17 |
18 | # globals
19 | stats = {}
20 | prior_stats = {}
21 |
22 | def _get_usage_info():
23 | rusage = getrusage(RUSAGE_SELF)
24 | curr_usage = rusage.ru_utime + rusage.ru_stime
25 | curr_time = time.time()
26 | return curr_usage, curr_time
27 |
28 | last_usage, last_usage_time = _get_usage_info()
29 |
30 |
31 | def incr(stat, amount=1):
32 | stats.setdefault(stat, 0)
33 | stats[stat] += amount
34 |
35 |
36 | def max(stat, new_val):
37 | try:
38 | if stats[stat] < new_val:
39 | stats[stat] = new_val
40 | except KeyError:
41 | stats[stat] = new_val
42 |
43 |
44 | def append(stat, val):
45 | stats.setdefault(stat, [])
46 | stats[stat].append(val)
47 |
48 |
49 | def get_cpu_usage():
50 | global last_usage, last_usage_time
51 | curr_usage, curr_time = _get_usage_info()
52 |
53 | usage_diff = curr_usage - last_usage
54 | time_diff = curr_time - last_usage_time
55 | cpu_usage_percent = (usage_diff / time_diff) * 100.
56 |
57 | last_usage, last_usage_time = curr_usage, curr_time
58 | return cpu_usage_percent
59 |
60 |
61 | def get_mem_usage():
62 | rss_pages = int(open('/proc/self/statm').read().split()[1])
63 | return rss_pages * PAGESIZE
64 |
65 |
66 | def record_metrics():
67 | _stats = stats.copy()
68 | stats.clear()
69 |
70 | # rurouni cache
71 | record = cache_record
72 | update_times = _stats.get('updateTimes', [])
73 | committed_points = _stats.get('committedPoints', 0)
74 | creates = _stats.get('creates', 0)
75 | dropped_creates = _stats.get('droppedCreates', 0)
76 | errors = _stats.get('errors', 0)
77 | cache_queries = _stats.get('cacheQueries', 0)
78 | cache_overflow = _stats.get('cacheOverflow', 0)
79 |
80 | if update_times:
81 | avg_update_time = sum(update_times) / len(update_times)
82 | record('avgUpdateTime', avg_update_time)
83 |
84 | if committed_points:
85 | points_per_update = float(committed_points) / len(update_times)
86 | record('pointsPerUpdate', points_per_update)
87 |
88 | record('updateOperations', len(update_times))
89 | record('committedPoints', committed_points)
90 | record('creates', creates)
91 | record('droppedCreates', dropped_creates)
92 | record('errors', errors)
93 | record('cacheQueries', cache_queries)
94 | record('cacheOverflow', cache_overflow)
95 |
96 | record('metricReceived', _stats.get('metricReceived', 0))
97 | record('cpuUsage', get_cpu_usage())
98 | # this only workds on linux
99 | try:
100 | record('memUsage', get_mem_usage())
101 | except:
102 | pass
103 |
104 |
105 | def cache_record(metric_type, val):
106 | prefix = settings.RUROUNI_METRIC
107 | metric_tmpl = prefix + '.%s.%s.%s'
108 | if settings.instance is None:
109 | metric = metric_tmpl % (HOSTNAME, 'a', metric_type)
110 | else:
111 | metric = metric_tmpl % (HOSTNAME, settings.instance, metric_type)
112 | datapoint = int(time.time()), val
113 | cache.MetricCache.put(metric, datapoint)
114 |
115 |
116 | class InstrumentationService(Service):
117 | def __init__(self):
118 | self.record_task = LoopingCall(record_metrics)
119 | self.metric_interval = settings.RUROUNI_METRIC_INTERVAL
120 |
121 | def startService(self):
122 | if self.metric_interval > 0:
123 | self.record_task.start(self.metric_interval, False)
124 | Service.startService(self)
125 |
126 | def stopService(self):
127 | if self.metric_interval > 0:
128 | self.record_task.stop()
129 | Service.stopService(self)
130 |
131 |
132 | # avoid import circularities
133 | from rurouni import cache
134 |
--------------------------------------------------------------------------------
/rurouni/storage.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import os
4 | import re
5 | import glob
6 | import errno
7 | from os.path import join, sep, splitext, basename, dirname
8 |
9 | import kenshin
10 | from kenshin.utils import mkdir_p
11 | from rurouni import log
12 | from rurouni.conf import settings, OrderedConfigParser
13 |
14 |
15 | def getFilePath(schema_name, file_idx):
16 | return join(settings.LOCAL_DATA_DIR, settings['instance'],
17 | schema_name, '%d.hs' % file_idx)
18 |
19 |
20 | def getMetricPath(metric):
21 | path = metric.replace('.', sep)
22 | return join(settings.LOCAL_LINK_DIR, settings['instance'], path + '.hs')
23 |
24 |
25 | def createLink(metric, file_path):
26 | metric_path = getMetricPath(metric)
27 | try:
28 | _createLinkHelper(metric_path, file_path)
29 | except OSError as exc:
30 | if exc.errno == errno.ENAMETOOLONG:
31 | pass
32 | else:
33 | raise
34 |
35 |
36 | def _createLinkHelper(link_path, file_path):
37 | """
38 | Create symlink link_path -> file_path.
39 | """
40 | dir_ = dirname(link_path)
41 | mkdir_p(dir_)
42 | if os.path.lexists(link_path):
43 | os.rename(link_path, link_path + '.bak')
44 | os.symlink(file_path, link_path)
45 |
46 |
47 | def getFilePathByInstanceDir(instance_data_dir, schema_name, file_idx):
48 | return join(instance_data_dir, schema_name, "%d.hs" % file_idx)
49 |
50 |
51 | def getMetricPathByInstanceDir(instance_link_dir, metric):
52 | path = metric.replace(".", sep)
53 | return join(instance_link_dir, path + ".hs")
54 |
55 |
56 | def rebuildIndex(instance_data_dir, instance_index_file):
57 | """
58 | Rebuild index file from data file, if a data file has no valid metric,
59 | we will remove it.
60 | """
61 | out = open(instance_index_file, 'w')
62 | for schema_name in os.listdir(instance_data_dir):
63 | hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs')
64 | for fp in glob.glob(hs_file_pat):
65 | with open(fp) as f:
66 | empty_flag = True
67 | header = kenshin.header(f)
68 | metric_list = header['tag_list']
69 | file_id = splitext(basename(fp))[0]
70 | for i, metric in enumerate(metric_list):
71 | if metric != '':
72 | empty_flag = False
73 | out.write('%s %s %s %s\n' %
74 | (metric, schema_name, file_id, i))
75 | if empty_flag:
76 | os.remove(fp)
77 | out.close()
78 |
79 |
80 | def rebuildLink(instance_data_dir, instance_link_dir):
81 | for schema_name in os.listdir(instance_data_dir):
82 | hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs')
83 | for fp in glob.glob(hs_file_pat):
84 | with open(fp) as f:
85 | header = kenshin.header(f)
86 | metric_list = header['tag_list']
87 | for metric in metric_list:
88 | if metric != '':
89 | link_path = getMetricPathByInstanceDir(instance_link_dir, metric)
90 | try:
91 | _createLinkHelper(link_path, fp)
92 | except OSError as exc:
93 | if exc.errno == errno.ENAMETOOLONG:
94 | pass
95 | else:
96 | raise
97 |
98 |
99 | class Archive:
100 | def __init__(self, secPerPoint, points):
101 | self.secPerPoint = secPerPoint
102 | self.points = points
103 |
104 | def __str__(self):
105 | return 'Archive(%s, %s)' % (self.secPerPoint, self.points)
106 |
107 | def getTuple(self):
108 | return self.secPerPoint, self.points
109 |
110 | @staticmethod
111 | def fromString(retentionDef):
112 | rs = kenshin.parse_retention_def(retentionDef)
113 | return Archive(*rs)
114 |
115 |
116 | class Schema(object):
117 | def match(self, metric):
118 | raise NotImplementedError()
119 |
120 |
121 | class DefaultSchema(Schema):
122 | def __init__(self, name, xFilesFactor, aggregationMethod, archives,
123 | cache_retention, metrics_max_num, cache_ratio):
124 | self.name = name
125 | self.xFilesFactor = xFilesFactor
126 | self.aggregationMethod = aggregationMethod
127 | self.archives = archives
128 | self.cache_retention = cache_retention
129 | self.metrics_max_num = metrics_max_num
130 | self.cache_ratio = cache_ratio
131 |
132 | def match(self, metric):
133 | return True
134 |
135 |
136 | class PatternSchema(Schema):
137 | def __init__(self, name, pattern, xFilesFactor, aggregationMethod, archives,
138 | cache_retention, metrics_max_num, cache_ratio):
139 | self.name = name
140 | self.pattern = re.compile(pattern)
141 | self.xFilesFactor = xFilesFactor
142 | self.aggregationMethod = aggregationMethod
143 | self.archives = archives
144 | self.cache_retention = cache_retention
145 | self.metrics_max_num = metrics_max_num
146 | self.cache_ratio = cache_ratio
147 |
148 | def match(self, metric):
149 | return self.pattern.match(metric)
150 |
151 |
152 | def loadStorageSchemas(conf_file):
153 | schema_list = []
154 | config = OrderedConfigParser()
155 | config.read(conf_file)
156 |
157 | for section in config.sections():
158 | options = dict(config.items(section))
159 |
160 | pattern = options.get('pattern')
161 | xff = float(options.get('xfilesfactor'))
162 | agg = options.get('aggregationmethod')
163 | retentions = options.get('retentions').split(',')
164 | archives = [Archive.fromString(s).getTuple() for s in retentions]
165 | cache_retention = kenshin.RetentionParser.parse_time_str(
166 | options.get('cacheretention'))
167 | metrics_max_num = options.get('metricsperfile')
168 | cache_ratio = 1.2
169 |
170 | try:
171 | kenshin.validate_archive_list(archives, xff)
172 | except kenshin.InvalidConfig:
173 | log.err("Invalid schema found in %s." % section)
174 |
175 | schema = PatternSchema(section, pattern, float(xff), agg, archives,
176 | int(cache_retention), int(metrics_max_num),
177 | float(cache_ratio))
178 | schema_list.append(schema)
179 | schema_list.append(defaultSchema)
180 | return schema_list
181 |
182 |
183 | # default schema
184 |
185 | defaultSchema = DefaultSchema(
186 | 'default',
187 | 1.0,
188 | 'avg',
189 | ((60, 60 * 24 * 7)), # default retention (7 days of minutely data)
190 | 600,
191 | 40,
192 | 1.2
193 | )
194 |
195 |
196 | class StorageSchemas(object):
197 | def __init__(self, conf_file):
198 | self.schemas = loadStorageSchemas(conf_file)
199 |
200 | def getSchemaByMetric(self, metric):
201 | for schema in self.schemas:
202 | if schema.match(metric):
203 | return schema
204 | return defaultSchema
205 |
206 | def getSchemaByName(self, schema_name):
207 | for schema in self.schemas:
208 | if schema.name == schema_name:
209 | return schema
210 | return None
211 |
212 |
213 | if __name__ == '__main__':
214 | import sys
215 | loadStorageSchemas(sys.argv[1])
216 |
--------------------------------------------------------------------------------
/rurouni/utils.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | from time import time
4 | from os.path import dirname, basename, abspath, splitext
5 | from rurouni.fnv1a import get_int32_hash
6 |
7 |
8 | def run_twistd_plugin(filename):
9 | from twisted.scripts.twistd import runApp
10 | from twisted.scripts.twistd import ServerOptions
11 | from rurouni.conf import get_parser
12 |
13 | bin_dir = dirname(abspath(filename))
14 | root_dir = dirname(bin_dir)
15 | os.environ.setdefault('GRAPHITE_ROOT', root_dir)
16 |
17 | program = splitext(basename(filename))[0]
18 | parser = get_parser()
19 | (options, args) = parser.parse_args()
20 |
21 | if not args:
22 | parser.print_usage()
23 | return
24 |
25 | twistd_options = []
26 | try:
27 | from twisted.internet import epollreactor
28 | twistd_options.append('--reactor=epoll')
29 | except:
30 | pass
31 |
32 | if options.debug or options.nodaemon:
33 | twistd_options.append('--nodaemon')
34 | if options.pidfile:
35 | twistd_options.extend(['--pidfile', options.pidfile])
36 | if options.umask:
37 | twistd_options.extend(['--umask', options.umask])
38 |
39 | twistd_options.append(program)
40 |
41 | if options.debug:
42 | twistd_options.append('--debug')
43 | for name, value in vars(options).items():
44 | if (value is not None and
45 | name not in ('debug', 'nodaemon', 'pidfile', 'umask')):
46 | twistd_options.extend(["--%s" % name.replace("_", '-'),
47 | value])
48 |
49 | twistd_options.extend(args)
50 | config = ServerOptions()
51 | config.parseOptions(twistd_options)
52 | runApp(config)
53 |
54 |
55 | class TokenBucket(object):
56 | ''' Token Bucket algorithm for rate-limiting.
57 | URL: https://en.wikipedia.org/wiki/Token_bucket
58 |
59 | >>> bucket = TokenBucket(60, 1)
60 | >>> print bucket.consume(6)
61 | True
62 | >>> print bucket.consume(54)
63 | True
64 | >>> print bucket.consume(1)
65 | False
66 | >>> import time
67 | >>> time.sleep(1)
68 | >>> print bucket.consume(1)
69 | True
70 | '''
71 | def __init__(self, capacity, fill_rate):
72 | '''
73 | @capacity: total number of tokens in the bucket.
74 | @fill_rate: the rate in tokens/second that the bucket will be refilled.
75 | '''
76 | self.capacity = float(capacity)
77 | self._tokens = float(capacity)
78 | self.fill_rate = float(fill_rate)
79 | self.timestamp = time()
80 |
81 | def consume(self, tokens):
82 | ''' Consume tokens from the bucket.
83 |
84 | Return True if there were sufficient tokens otherwise False.
85 | '''
86 | if tokens <= self.tokens:
87 | self._tokens -= tokens
88 | return True
89 | else:
90 | return False
91 |
92 | @property
93 | def tokens(self):
94 | ''' Return the current number of tokens in the bucket. '''
95 | if self._tokens < self.capacity:
96 | now = time()
97 | delta = self.fill_rate * (now - self.timestamp)
98 | self._tokens = min(self.capacity, self._tokens + delta)
99 | self.timestamp = now
100 | return self._tokens
101 |
102 | def __repr__(self):
103 | return '<%s %.2f %.2f>' % (
104 | self.__class__.__name__, self.capacity, self.fill_rate)
105 |
106 |
107 | def get_instance_of_metric(metric, num_all_instance):
108 | return get_int32_hash(metric) % num_all_instance
109 |
110 |
111 | if __name__ == '__main__':
112 | import doctest
113 | doctest.testmod()
114 |
--------------------------------------------------------------------------------
/rurouni/writer.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import time
3 |
4 | from twisted.application.service import Service
5 | from twisted.internet import reactor
6 |
7 | import kenshin
8 | from rurouni.cache import MetricCache
9 | from rurouni import log
10 | from rurouni.conf import settings
11 | from rurouni.state import instrumentation
12 | from rurouni.storage import getFilePath
13 |
14 |
15 | class WriterService(Service):
16 |
17 | def __init__(self):
18 | pass
19 |
20 | def startService(self):
21 | reactor.callInThread(writeForever)
22 | Service.startService(self)
23 |
24 | def stopService(self):
25 | try:
26 | file_cache_idxs = MetricCache.getAllFileCaches()
27 | writeCachedDataPointsWhenStop(file_cache_idxs)
28 | except Exception as e:
29 | log.err('write error when stopping service: %s' % e)
30 | Service.stopService(self)
31 |
32 |
33 | def writeForever():
34 | while reactor.running:
35 | write = False
36 | try:
37 | file_cache_idxs = MetricCache.writableFileCaches()
38 | if file_cache_idxs:
39 | write = writeCachedDataPoints(file_cache_idxs)
40 | except Exception as e:
41 | log.err('write error: %s' % e)
42 | # The writer thread only sleeps when there is no write
43 | # or an error occurs
44 | if not write:
45 | time.sleep(1)
46 |
47 |
48 | def writeCachedDataPoints(file_cache_idxs):
49 | pop_func = MetricCache.pop
50 | for schema_name, file_idx in file_cache_idxs:
51 | datapoints = pop_func(schema_name, file_idx)
52 | file_path = getFilePath(schema_name, file_idx)
53 |
54 | try:
55 | t1 = time.time()
56 | kenshin.update(file_path, datapoints)
57 | update_time = time.time() - t1
58 | except Exception as e:
59 | log.err('Error writing to %s: %s' % (file_path, e))
60 | instrumentation.incr('errors')
61 | else:
62 | point_cnt = len(datapoints)
63 | instrumentation.incr('committedPoints', point_cnt)
64 | instrumentation.append('updateTimes', update_time)
65 |
66 | if settings.LOG_UPDATES:
67 | log.updates("wrote %d datapoints for %s in %.5f secs" %
68 | (point_cnt, schema_name, update_time))
69 |
70 | return True
71 |
72 |
73 | def writeCachedDataPointsWhenStop(file_cache_idxs):
74 | pop_func = MetricCache.pop
75 | for schema_name, file_idx in file_cache_idxs:
76 | datapoints = pop_func(schema_name, file_idx, int(time.time()), False)
77 | if datapoints:
78 | file_path = getFilePath(schema_name, file_idx)
79 | try:
80 | kenshin.update(file_path, datapoints)
81 | except Exception as e:
82 | log.err('Error writing to %s: %s' % (file_path, e))
83 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from setuptools import setup, Extension
4 |
5 | import re
6 | import os
7 | import sys
8 | import codecs
9 | from glob import glob
10 |
11 | here = os.path.abspath(os.path.dirname(__file__))
12 |
13 | if sys.version_info < (2,7):
14 | sys.exit('Sorry, Python < 2.7 is not supported')
15 |
16 |
17 | def read(*parts):
18 | # intentionally *not* adding an encoding option to open
19 | return codecs.open(os.path.join(here, *parts), 'r').read()
20 |
21 |
22 | def find_version(*file_paths):
23 | version_file = read(*file_paths)
24 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
25 | version_file, re.M)
26 | if version_match:
27 | return version_match.group(1)
28 | raise RuntimeError("Unable to find version string.")
29 |
30 |
31 | long_description = read('README.md')
32 |
33 | setup(
34 | name='kenshin',
35 | version=find_version('kenshin', '__init__.py'),
36 | description='A scalable time series database.',
37 | long_description=long_description,
38 | author='Zhaolong Zhu',
39 | url='https://github.com/douban/Kenshin',
40 | download_url='https://github.com/douban/Kenshin.git',
41 | author_email='zhuzhaolong0@gmail.com',
42 | install_requires=[],
43 | tests_require=['nose'],
44 | packages=['kenshin', 'kenshin.tools', 'rurouni', 'rurouni.state', 'twisted.plugins'],
45 | scripts=glob('bin/*'),
46 | zip_safe=False,
47 | platforms='any',
48 | setup_requires=['Cython'],
49 | ext_modules=[
50 | Extension(
51 | name='%s.%s' % ('rurouni', name),
52 | sources=['%s/%s.pyx' % ('rurouni', name)],
53 | extra_compile_args=['-O3', '-funroll-loops', '-Wall'],
54 | ) for name in ['fnv1a']
55 | ],
56 | )
57 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/tests/__init__.py
--------------------------------------------------------------------------------
/tests/test_agg.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import unittest
4 |
5 | from kenshin.agg import Agg
6 |
7 |
8 | class TestAgg(unittest.TestCase):
9 |
10 | def setUp(self):
11 | self.vals = map(float, range(10))
12 |
13 | def _get_agg_func_by_name(self, name):
14 | return Agg.get_agg_func(Agg.get_agg_id(name))
15 |
16 | def test_get_agg_id(self):
17 | for i, agg in enumerate(Agg.get_agg_type_list()):
18 | id_ = Agg.get_agg_id(agg)
19 | self.assertEqual(id_, i)
20 |
21 | def test_agg_avg(self):
22 | func = self._get_agg_func_by_name('average')
23 | self.assertEqual(func(self.vals), 4.5)
24 |
25 | def test_agg_sum(self):
26 | func = self._get_agg_func_by_name('sum')
27 | self.assertEqual(func(self.vals), 45.0)
28 |
29 | def test_agg_last(self):
30 | func = self._get_agg_func_by_name('last')
31 | self.assertEqual(func(self.vals), 9.0)
32 |
33 | def test_agg_max(self):
34 | func = self._get_agg_func_by_name('max')
35 | self.assertEqual(func(self.vals), 9.0)
36 |
37 | def test_agg_min(self):
38 | func = self._get_agg_func_by_name('min')
39 | self.assertEqual(func(self.vals), 0.0)
40 |
--------------------------------------------------------------------------------
/tests/test_fnv1a.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | import unittest
5 | from rurouni.fnv1a import get_int32_hash
6 |
7 |
8 | class TestFnv1a(unittest.TestCase):
9 |
10 | def _cmp_hash(self, int32_h, uint32_h):
11 | if uint32_h >= 0x80000000:
12 | uint32_h -= 0x100000000
13 | self.assertEqual(int32_h, uint32_h)
14 |
15 | def test_fnv1a_hash(self):
16 | test_cases = [
17 | ("", 0x811c9dc5),
18 | ("a", 0xe40c292c),
19 | ("foobar", 0xbf9cf968),
20 | ("hello", 0x4f9f2cab),
21 | (b"\xff\x00\x00\x01", 0xc48fb86d),
22 | ]
23 |
24 | for s, uint32_h in test_cases:
25 | int32_h = get_int32_hash(s)
26 | self._cmp_hash(int32_h, uint32_h)
27 |
--------------------------------------------------------------------------------
/tests/test_io_performance.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | import shutil
4 | import unittest
5 |
6 | import kenshin.storage
7 | from kenshin.storage import Storage, enable_debug, RetentionParser
8 | from kenshin.utils import mkdir_p
9 |
10 |
11 | class TestStorageIO(unittest.TestCase):
12 | data_dir = '/tmp/kenshin'
13 |
14 | def setUp(self):
15 | if os.path.exists(self.data_dir):
16 | shutil.rmtree(self.data_dir)
17 |
18 | mkdir_p(self.data_dir)
19 | self.storage = Storage(data_dir=self.data_dir)
20 | self.basic_setup = self._basic_setup()
21 | self.storage.create(*self.basic_setup)
22 |
23 | metric_name = self.basic_setup[0]
24 | self.path = self.storage.gen_path(self.data_dir, metric_name)
25 |
26 | def tearDown(self):
27 | shutil.rmtree(self.data_dir)
28 |
29 | def _basic_setup(self):
30 | metric_name = 'sys.cpu.user'
31 | self.file_cnt = 40
32 |
33 | tag_list = ['host=webserver%s,cpu=%s' % (i, i)
34 | for i in range(self.file_cnt)]
35 | archive_list = "1s:1h,60s:2d,300s:7d,15m:25w,12h:5y".split(',')
36 | archive_list = [RetentionParser.parse_retention_def(x)
37 | for x in archive_list]
38 |
39 | x_files_factor = 20
40 | agg_name = 'min'
41 | return [metric_name, tag_list, archive_list, x_files_factor, agg_name]
42 |
43 | def _gen_val(self, i):
44 | res = []
45 | for j in range(self.file_cnt):
46 | res.append(i + 10*j)
47 | return tuple(res)
48 |
49 | def test_io(self):
50 | """
51 | test io perfermance.
52 |
53 | (1000 io/s * 3600 s * 24) / (3*10**6 metric) / (40 metric/file) = 1152 io/file
54 | 由于 header 函数在一次写入中被调用了多次,而 header 数据较小,完全可以读取缓存数据,
55 | 因此 enable_debug 中忽略了 header 的读操作。
56 | """
57 | enable_debug(ignore_header=True)
58 |
59 | now_ts = 1411628779
60 | ten_min = 10 * RetentionParser.TIME_UNIT['minutes']
61 | one_day = RetentionParser.TIME_UNIT['days']
62 | from_ts = now_ts - one_day
63 |
64 | for i in range(one_day / ten_min):
65 | points = [(from_ts + i * ten_min + j, self._gen_val(i * ten_min + j))
66 | for j in range(ten_min)]
67 | self.storage.update(self.path, points, from_ts + (i+1) * ten_min)
68 |
69 | open_ = kenshin.storage.open
70 | io = open_.read_cnt + open_.write_cnt
71 | io_limit = 1152
72 | self.assertLessEqual(io, io_limit)
73 |
--------------------------------------------------------------------------------
/tests/test_storage.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import os
3 | import shutil
4 | import struct
5 | import unittest
6 |
7 | from kenshin.storage import Storage
8 | from kenshin.agg import Agg
9 | from kenshin.utils import mkdir_p, roundup
10 | from kenshin.consts import NULL_VALUE
11 |
12 |
13 | class TestStorageBase(unittest.TestCase):
14 | data_dir = '/tmp/kenshin'
15 |
16 | def setUp(self):
17 | if os.path.exists(self.data_dir):
18 | shutil.rmtree(self.data_dir)
19 |
20 | mkdir_p(self.data_dir)
21 | self.storage = Storage(data_dir=self.data_dir)
22 | self.basic_setup = self._basic_setup()
23 | self.storage.create(*self.basic_setup)
24 |
25 | metric_name = self.basic_setup[0]
26 | self.path = self.storage.gen_path(self.data_dir, metric_name)
27 | tag_list = self.basic_setup[1]
28 | self.null_point = (None,) * len(tag_list)
29 |
30 | def tearDown(self):
31 | shutil.rmtree(self.data_dir)
32 |
33 | @staticmethod
34 | def _gen_val(i, num=2):
35 | return [10 * j + i for j in range(num)]
36 |
37 | class TestStorage(TestStorageBase):
38 |
39 | def _basic_setup(self):
40 | metric_name = 'sys.cpu.user'
41 |
42 | tag_list = [
43 | 'host=webserver01,cpu=0',
44 | 'host=webserver01,cpu=1',
45 | ]
46 | archive_list = [
47 | (1, 6),
48 | (3, 6),
49 | ]
50 | x_files_factor = 1.0
51 | agg_name = 'min'
52 | return [metric_name, tag_list, archive_list, x_files_factor, agg_name]
53 |
54 | def test_gen_path(self):
55 | metric_name = 'a.b.c'
56 | data_dir = '/x/y'
57 | path = self.storage.gen_path(data_dir, metric_name)
58 | self.assertEqual(path, '/x/y/a/b/c.hs')
59 |
60 | def test_header(self):
61 | metric_name, tag_list, archive_list, x_files_factor, agg_name = self.basic_setup
62 | with open(self.path, 'rb') as f:
63 | header = self.storage.header(f)
64 |
65 | self.assertEqual(tag_list, header['tag_list'])
66 | self.assertEqual(x_files_factor, header['x_files_factor'])
67 | self.assertEqual(Agg.get_agg_id(agg_name), header['agg_id'])
68 |
69 | _archive_list = [(x['sec_per_point'], x['count'])
70 | for x in header['archive_list']]
71 | self.assertEqual(archive_list, _archive_list)
72 |
73 | def test_basic_update_fetch(self):
74 | now_ts = 1411628779
75 | num_points = 5
76 | points = [(now_ts - i, self._gen_val(i)) for i in range(1, num_points+1)]
77 | self.storage.update(self.path, points, now_ts)
78 |
79 | from_ts = now_ts - num_points
80 | series = self.storage.fetch(self.path, from_ts, now=now_ts)
81 |
82 | time_info = (from_ts, now_ts, 1)
83 | vals = [tuple(map(float, v)) for _, v in sorted(points)]
84 | expected = (time_info, vals)
85 | self.assertEqual(series[1:], expected)
86 |
87 | def test_update_propagate(self):
88 | now_ts = 1411628779
89 | num_points = 6
90 | points = [(now_ts - i, self._gen_val(i)) for i in range(1, num_points+1)]
91 | self.storage.update(self.path, points, now_ts)
92 |
93 | from_ts = now_ts - num_points - 1
94 | series = self.storage.fetch(self.path, from_ts, now=now_ts)
95 | time_info = (from_ts, roundup(now_ts, 3), 3)
96 | expected = time_info, [(5.0, 15.0), (2.0, 12.0), self.null_point]
97 | self.assertEqual(series[1:], expected)
98 |
99 | def test_null_point(self):
100 | now_ts = 1411628779
101 | num_points = 6
102 | points = [(now_ts - i, self._gen_val(i)) for i in range(1, num_points+1)]
103 | # change the last two points to null value
104 | points[4] = (now_ts - 5, (NULL_VALUE, NULL_VALUE))
105 | points[5] = (now_ts - 6, (NULL_VALUE, NULL_VALUE))
106 |
107 | self.storage.update(self.path, points, now_ts)
108 |
109 | from_ts = now_ts - num_points - 1
110 | series = self.storage.fetch(self.path, from_ts, now=now_ts)
111 | time_info = (from_ts, roundup(now_ts, 3), 3)
112 | expected = time_info, [self.null_point, (2.0, 12.0), self.null_point]
113 | self.assertEqual(series[1:], expected)
114 |
115 | def test_update_old_points(self):
116 | now_ts = 1411628779
117 | num_points = 12
118 | points = [(now_ts - i, self._gen_val(i)) for i in range(7, num_points+1)]
119 | self.storage.update(self.path, points, now_ts)
120 |
121 | from_ts = now_ts - num_points - 1
122 | series = self.storage.fetch(self.path, from_ts, now=now_ts)
123 | time_info = (from_ts, roundup(now_ts, 3), 3)
124 | expected = time_info, [(12.0, 22.0), (10.0, 20.0), (7.0, 17.0), self.null_point, self.null_point]
125 | self.assertEqual(series[1:], expected)
126 |
127 | def test_fetch_empty_metric(self):
128 | now_ts = 1411628779
129 | from_ts = 1411628775
130 | series = self.storage.fetch(self.path, from_ts, now=now_ts)
131 | time_info = (from_ts, now_ts, 1)
132 | expected = time_info, [self.null_point] * (now_ts - from_ts)
133 | self.assertEqual(series[1:], expected)
134 |
135 | def print_file_content(self):
136 | with open(self.path) as f:
137 | header = self.storage.header(f)
138 | archive_list = header['archive_list']
139 | for i, archive in enumerate(archive_list):
140 | print "--------- archive %d ------------" % i
141 | print archive
142 | f.seek(archive['offset'])
143 | series_str = f.read(archive['size'])
144 | point_format = header['point_format']
145 | series_format = point_format[0] + point_format[1:] * archive['count']
146 | unpacked_series = struct.unpack(series_format, series_str)
147 | print unpacked_series
148 |
149 |
150 | class TestLostPoint(TestStorageBase):
151 |
152 | def _basic_setup(self):
153 | metric_name = 'sys.cpu.user'
154 |
155 | tag_list = [
156 | 'host=webserver01,cpu=0',
157 | 'host=webserver01,cpu=1',
158 | ]
159 | archive_list = [
160 | (1, 60),
161 | (3, 60),
162 | ]
163 | x_files_factor = 5
164 | agg_name = 'min'
165 | return [metric_name, tag_list, archive_list, x_files_factor, agg_name]
166 |
167 | def test_update_propagate(self):
168 | now_ts = 1411628779
169 | point_seeds_list = [range(30, 45), range(15)]
170 | mtime = None
171 | for i, point_seeds in enumerate(point_seeds_list):
172 | if i != 0:
173 | mtime = now_ts - max(point_seeds_list[i - 1])
174 | points = [(now_ts - i, self._gen_val(i)) for i in point_seeds]
175 | self.storage.update(self.path, points, now_ts, mtime)
176 |
177 | from_ts = now_ts - 60 - 1
178 | series = self.storage.fetch(self.path, from_ts, now=now_ts)
179 | time_info = (from_ts, roundup(now_ts, 3), 3)
180 | null = self.null_point
181 | values = [null, null, null, null, null, (44.0, 54.0), (41.0, 51.0),
182 | (38.0, 48.0), (35.0, 45.0), (32.0, 42.0), (30.0, 40.0),
183 | null, null, null, null, (14.0, 24.0), (11.0, 21.0), (8.0, 18.0),
184 | (5.0, 15.0), null, null]
185 | expected = time_info, values
186 | self.assertEqual(series[1:], expected)
187 |
188 | def test_update_propagate_with_special_start_time(self):
189 | now_ts = 1411628779
190 | # start time is 1411628760
191 | point_seeds_list = [range(10, 20), range(1, 7)]
192 | mtime = None
193 | for i, point_seeds in enumerate(point_seeds_list):
194 | if i != 0:
195 | mtime = now_ts - max(point_seeds_list[i - 1])
196 | points = [(now_ts - i, self._gen_val(i)) for i in point_seeds]
197 | self.storage.update(self.path, points, now_ts, mtime)
198 | from_ts = 1411628760
199 | until_ts = from_ts + 15
200 | series = self.storage.fetch(self.path, from_ts, until_ts,
201 | now=from_ts + 60 + 1)
202 | time_info = (from_ts, roundup(until_ts, 3), 3)
203 | values = [(17.0, 27.0), (14.0, 24.0), (11.0, 21.0), (10.0, 20.0), (5.0, 15.0)]
204 | expected = (time_info, values)
205 | self.assertEqual(series[1:], expected)
206 |
207 | def test_basic_update(self):
208 | now_ts = 1411628779
209 | point_seeds = [1, 2, 4, 5]
210 | points = [(now_ts - i, self._gen_val(i)) for i in point_seeds]
211 | self.storage.update(self.path, points, now_ts)
212 |
213 | from_ts = now_ts - 5
214 | series = self.storage.fetch(self.path, from_ts, now=now_ts)
215 | time_info = (from_ts, now_ts, 1)
216 | vals = [(5.0, 15.0), (4.0, 14.0), self.null_point, (2.0, 12.0), (1.0, 11.0)]
217 | expected = time_info, vals
218 | self.assertEqual(series[1:], expected)
219 |
220 |
221 | class TestMultiArchive(TestStorageBase):
222 |
223 | def _basic_setup(self):
224 | metric_name = 'sys.cpu.user'
225 |
226 | tag_list = [
227 | 'host=webserver01,cpu=0',
228 | 'host=webserver01,cpu=1',
229 | 'host=webserver01,cpu=2',
230 | ]
231 | archive_list = [
232 | (1, 60),
233 | (3, 60),
234 | (6, 60),
235 | ]
236 | x_files_factor = 5
237 | agg_name = 'min'
238 | return [metric_name, tag_list, archive_list, x_files_factor, agg_name]
239 |
240 | def test_time_range(self):
241 | now_ts = 1411628779
242 | # downsample time of chive2: 1411628760 (floor(1411628779. / (6*5)))
243 | point_seeds_list = [range(19, 30), range(5, 2)]
244 | mtime = None
245 | for i, point_seeds in enumerate(point_seeds_list):
246 | if i != 0:
247 | mtime = now_ts - max(point_seeds_list[i - 1])
248 | points = [(now_ts - i, self._gen_val(i, num=3)) for i in point_seeds]
249 | self.storage.update(self.path, points, now_ts, mtime)
250 | from_ts = 1411628760 - 2 * 6
251 | until_ts = 1411628760
252 | series = self.storage.fetch(self.path, from_ts, until_ts,
253 | now=from_ts + 180 + 1)
254 | time_info = (from_ts, roundup(until_ts, 6), 6)
255 | values = [(26.0, 36.0, 46.0), (20.0, 30.0, 40.0)]
256 | expected = (time_info, values)
257 | self.assertEqual(series[1:], expected)
258 |
--------------------------------------------------------------------------------
/twisted/plugins/rurouni_cache_plugin.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | from zope.interface import implements
3 |
4 | from twisted.application.service import IServiceMaker
5 | from twisted.plugin import IPlugin
6 |
7 | from rurouni import service
8 | from rurouni import conf
9 |
10 |
11 | class RurouniServiceMaker(object):
12 | implements(IServiceMaker, IPlugin)
13 |
14 | tapname = 'rurouni-cache'
15 | description = 'Collect stats for graphite'
16 | options = conf.RurouniOptions
17 |
18 | def makeService(self, options):
19 | return service.createCacheService(options)
20 |
21 |
22 | serviceMaker = RurouniServiceMaker()
23 |
--------------------------------------------------------------------------------