├── .gitignore ├── .python-version ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── bin ├── kenshin-cache-query.py ├── kenshin-change-schema.py ├── kenshin-debug.py ├── kenshin-delete.py ├── kenshin-fetch.py ├── kenshin-find-file.py ├── kenshin-get-metrics.py ├── kenshin-info.py ├── kenshin-rebuild-index.py ├── kenshin-rebuild-link.py ├── kenshin-rebuild.py ├── kenshin-rehash.py ├── kenshin-restart.py ├── kenshin-send-zero-metric.py └── rurouni-cache.py ├── conf ├── rurouni.conf.example └── storage-schemas.conf.example ├── examples ├── metric_stresser.py ├── rurouni-client.py └── rurouni-pickle-client.py ├── img ├── docs │ ├── cluster-before.png │ ├── cluster-now.png │ ├── kenshin-structure.png │ └── query_perf.png ├── kenshin-perf.png └── kenshin.gif ├── kenshin ├── __init__.py ├── agg.py ├── consts.py ├── storage.py ├── tools │ ├── __init__.py │ ├── hash.py │ └── whisper_tool.py └── utils.py ├── misc ├── init_setup_demo.sh ├── update_version.sh └── versioning.py ├── requirements-dev.txt ├── requirements.txt ├── rurouni ├── __init__.py ├── cache.py ├── conf.py ├── exceptions.py ├── fnv1a.pyx ├── log.py ├── protocols.py ├── service.py ├── state │ ├── __init__.py │ ├── events.py │ └── instrumentation.py ├── storage.py ├── utils.py └── writer.py ├── setup.py ├── tests ├── __init__.py ├── test_agg.py ├── test_fnv1a.py ├── test_io_performance.py └── test_storage.py └── twisted └── plugins └── rurouni_cache_plugin.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *.confc 5 | *~ 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .cache 41 | nosetests.xml 42 | coverage.xml 43 | 44 | # Translations 45 | *.mo 46 | *.pot 47 | 48 | # Django stuff: 49 | *.log 50 | 51 | # Sphinx documentation 52 | docs/_build/ 53 | 54 | # PyBuilder 55 | target/ 56 | 57 | # twisted plugin 58 | *.cache 59 | 60 | # test data 61 | data/ 62 | 63 | # ctags 64 | .tags_sorted_by_file 65 | .tags 66 | 67 | # venv 68 | venv/ 69 | 70 | rurouni/fnv1a.c 71 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 2.7.12 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | 6 | install: 7 | - pip install --upgrade pip 8 | - pip install --upgrade setuptools 9 | - pip install -r requirements-dev.txt 10 | - python setup.py build_ext --inplace 11 | - python setup.py install 12 | 13 | script: nosetests 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2008-2012 Chris Davis; 2011-2016 The Graphite Project 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | init: 2 | test -d ${KenshinVenv} || virtualenv ${KenshinVenv} 3 | 4 | install: init 5 | @source ${KenshinVenv}/bin/activate; pip install -r requirements.txt 6 | @source ${KenshinVenv}/bin/activate; python setup.py install 7 | 8 | restart_rurouni: 9 | source ${KenshinVenv}/bin/activate; python bin/kenshin-restart.py 10 | 11 | restart_relay: 12 | find /service -name 'carbon-relay-*' | xargs -rL 1 svc -t 13 | 14 | restart_web: 15 | find /service -name 'graphite-*' | xargs -rL 1 svc -t 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Kenshin 2 | ============= 3 | 4 | [![travis-ci status](https://travis-ci.org/douban/Kenshin.svg)](https://travis-ci.org/douban/Kenshin) 5 | 6 | > Kenshin ([るろうに剣心](http://zh.wikipedia.org/wiki/%E6%B5%AA%E5%AE%A2%E5%89%91%E5%BF%83)) 7 | 8 | Kenshin project consists of two major components: 9 | 10 | - `Kenshin` is a fixed-size time-series database format, similar in design to [Whisper](https://github.com/graphite-project/whisper), it's an alternative to Whisper for [Graphite](https://github.com/graphite-project) storage component. Whisper performs lots of tiny I/O operations on lots of different files, Kenshin is aiming to improve the I/O performance. For more design details please refer to [design docs](https://github.com/douban/Kenshin/wiki/design) (Chinese) and QCon 2016 Presentation [slide](https://github.com/zzl0/zzl0.github.com/raw/master/files/QCon-Kenshin.pdf). 11 | 12 | - `Rurouni-cache` is a storage agent that sits in front of kenshin to batch up writes to files to make them more sequential, rurouni-cache is to kenshin as carbon-cache is to whisper. 13 | 14 | Kenshin is developing and maintaining by Douban Inc. Currently, it is working in production environment, powering all metrics (host, service, DAE app, user defined) in douban.com. 15 | 16 | 17 | ### What's the performance of Kenshin? 18 | 19 | 20 | In our environment, after using Kenshin, the IOPS is decreased by 97.5%, and the query latency is not significantly slower than Whisper. 21 | 22 | 23 | 24 | 25 | Quick Start 26 | ------------------ 27 | 28 | We recommend using virtualenv when installing dependencies: 29 | 30 | $ git clone https://github.com/douban/Kenshin.git 31 | $ cd Kenshin 32 | $ virtualenv venv 33 | $ source venv/bin/activate 34 | $ pip install -r requirements.txt 35 | 36 | Tests can be run using nosetests: 37 | 38 | $ nosetests -v 39 | 40 | Setup configuration 41 | 42 | $ misc/init_setup_demo.sh 43 | 44 | Setup Kenshin 45 | 46 | $ python setup.py build_ext --inplace && python setup.py install 47 | 48 | Start two rurouni-cache instances 49 | 50 | $ python bin/rurouni-cache.py --debug --config=conf/rurouni.conf --instance=0 start 51 | $ python bin/rurouni-cache.py --debug --config=conf/rurouni.conf --instance=1 start 52 | 53 | Then go to [Graphite-Kenshin](https://github.com/douban/graphite-kenshin) for starting Web instances. 54 | 55 | FAQ 56 | ---------- 57 | 58 | 59 | ### Why don't you just use whisper? 60 | 61 | Whisper is great, and initially we did use it. Over time though, we ran into several issues: 62 | 63 | 1. Whisper using a lot of IO. There are serval reasons: 64 | - Using one file per metric. 65 | - Realtime downsample feature (different data resolutions based on age) causes a lot of extra IO 66 | 2. Carbon-cache & Carbon-relay is inefficient and even is cpu-bound. We didn't write our own carbon-relay, but replaced carbon-relay with [carbon-c-relay](https://github.com/grobian/carbon-c-relay). 67 | 68 | 69 | ### Why did you totally rewrite whisper? Couldn't you just submit a patch? 70 | 71 | The reason I didn't simply submit a patch for Whisper is that kenshin's design is incompatible with Whisper's design. Whisper using one file per metric. Kenshin on the other hand merge N metrics into one file. 72 | 73 | ### How to intergrate Kenshin with Graphite-Web? 74 | 75 | We use write a plugin for Graphite-API named [Graphite-Kenshin](https://github.com/douban/graphite-kenshin) 76 | 77 | Acknowledgments 78 | ------------------ 79 | 80 | - Thanks for the [Graphite](https://github.com/graphite-project) project. 81 | - Thanks to [@grobian](https://github.com/grobian) for the [carbon-c-relay](https://github.com/grobian/carbon-c-relay) project. 82 | 83 | 84 | Contributors 85 | --------------- 86 | 87 | - [@zzl0](https://github.com/zzl0) 88 | - [@mckelvin](https://github.com/mckelvin) 89 | - [@windreamer](https://github.com/windreamer) 90 | - [@youngsofun](https://github.com/youngsofun) 91 | - [@camper42](https://github.com/camper42) 92 | 93 | License 94 | ------- 95 | 96 | Kenshin is licensed under version 2.0 of the Apache License. See the [LICENSE](/LICENSE) file for details. 97 | -------------------------------------------------------------------------------- /bin/kenshin-cache-query.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import argparse 5 | import struct 6 | import socket 7 | import cPickle as pickle 8 | from ConfigParser import ConfigParser 9 | 10 | from rurouni.utils import get_instance_of_metric 11 | 12 | 13 | def gen_rurouni_query_port(conf_file): 14 | parser = ConfigParser() 15 | parser.read(conf_file) 16 | prefix = 'cache:' 17 | rs = {} 18 | for s in parser.sections(): 19 | if s.startswith(prefix): 20 | instance = int(s[len(prefix):]) 21 | for k, v in parser.items(s): 22 | k = k.upper() 23 | if k == 'CACHE_QUERY_PORT': 24 | rs[instance] = int(v) 25 | return rs 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument('--server', default='127.0.0.1', 31 | help="server's host(or ip).") 32 | parser.add_argument('--num', type=int, default=3, 33 | help='number of rurouni caches.') 34 | parser.add_argument('--conf', 35 | help='rurouni-cache conf file path.') 36 | parser.add_argument('metric', help="metric name.") 37 | args = parser.parse_args() 38 | 39 | rurouni_query_ports = gen_rurouni_query_port(args.conf) 40 | server = args.server 41 | metric = args.metric 42 | num = args.num 43 | port_idx = get_instance_of_metric(metric, num) 44 | port = rurouni_query_ports[port_idx] 45 | 46 | conn = socket.socket() 47 | try: 48 | conn.connect((server, port)) 49 | except socket.error: 50 | raise SystemError("Couldn't connect to %s on port %s" % 51 | (server, port)) 52 | 53 | request = { 54 | 'type': 'cache-query', 55 | 'metric': metric, 56 | } 57 | 58 | serialized_request = pickle.dumps(request, protocol=-1) 59 | length = struct.pack('!L', len(serialized_request)) 60 | request_packet = length + serialized_request 61 | 62 | try: 63 | conn.sendall(request_packet) 64 | rs = recv_response(conn) 65 | print rs 66 | except Exception as e: 67 | raise e 68 | 69 | 70 | def recv_response(conn): 71 | length = recv_exactly(conn, 4) 72 | body_size = struct.unpack('!L', length)[0] 73 | body = recv_exactly(conn, body_size) 74 | return pickle.loads(body) 75 | 76 | 77 | def recv_exactly(conn, num_bytes): 78 | buf = '' 79 | while len(buf) < num_bytes: 80 | data = conn.recv(num_bytes - len(buf)) 81 | if not data: 82 | raise Exception("Connection lost.") 83 | buf += data 84 | return buf 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /bin/kenshin-change-schema.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | import sys 4 | import os 5 | import time 6 | import glob 7 | import struct 8 | 9 | import kenshin 10 | from kenshin.consts import NULL_VALUE 11 | from kenshin.agg import Agg 12 | from rurouni.storage import loadStorageSchemas 13 | 14 | 15 | # Three action types. 16 | NO_OPERATION, CHANGE_META, REBUILD = range(3) 17 | 18 | 19 | def get_schema(storage_schemas, schema_name): 20 | for schema in storage_schemas: 21 | if schema.name == schema_name: 22 | return schema 23 | 24 | 25 | def resize_data_file(schema, data_file): 26 | print data_file 27 | with open(data_file) as f: 28 | header = kenshin.header(f) 29 | retentions = schema.archives 30 | old_retentions = [(x['sec_per_point'], x['count']) 31 | for x in header['archive_list']] 32 | msg = [] 33 | action = NO_OPERATION 34 | 35 | # x files factor 36 | if schema.xFilesFactor != header['x_files_factor']: 37 | action = CHANGE_META 38 | msg.append("x_files_factor: %f -> %f" % 39 | (header['x_files_factor'], schema.xFilesFactor)) 40 | 41 | # agg method 42 | old_agg_name = Agg.get_agg_name(header['agg_id']) 43 | if schema.aggregationMethod != old_agg_name: 44 | action = CHANGE_META 45 | msg.append("agg_name: %s -> %s" % 46 | (old_agg_name, schema.aggregationMethod)) 47 | 48 | # retentions 49 | if retentions != old_retentions: 50 | action = REBUILD 51 | msg.append("retentions: %s -> %s" % (old_retentions, retentions)) 52 | 53 | if action == NO_OPERATION: 54 | print "No operation needed." 55 | return 56 | 57 | elif action == CHANGE_META: 58 | print 'Change Meta.' 59 | print '\n'.join(msg) 60 | change_meta(data_file, schema, header['max_retention']) 61 | return 62 | 63 | elif action == REBUILD: 64 | print 'Rebuild File.' 65 | print '\n'.join(msg) 66 | rebuild(data_file, schema, header, retentions) 67 | 68 | else: 69 | raise ValueError(action) 70 | 71 | 72 | def change_meta(data_file, schema, max_retention): 73 | with open(data_file, 'r+b') as f: 74 | format = '!2Lf' 75 | agg_id = Agg.get_agg_id(schema.aggregationMethod) 76 | xff = schema.xFilesFactor 77 | packed_data = struct.pack(format, agg_id, max_retention, xff) 78 | f.write(packed_data) 79 | 80 | 81 | def rebuild(data_file, schema, header, retentions): 82 | now = int(time.time()) 83 | tmpfile = data_file + '.tmp' 84 | if os.path.exists(tmpfile): 85 | print "Removing previous temporary database file: %s" % tmpfile 86 | os.unlink(tmpfile) 87 | 88 | print "Creating new kenshin database: %s" % tmpfile 89 | kenshin.create(tmpfile, 90 | header['tag_list'], 91 | retentions, 92 | schema.xFilesFactor, 93 | schema.aggregationMethod) 94 | 95 | size = os.stat(tmpfile).st_size 96 | old_size = os.stat(data_file).st_size 97 | 98 | print "Created: %s (%d bytes, was %d bytes)" % ( 99 | tmpfile, size, old_size) 100 | 101 | print "Migrating data to new kenshin database ..." 102 | for archive in header['archive_list']: 103 | from_time = now - archive['retention'] + archive['sec_per_point'] 104 | until_time = now 105 | _, timeinfo, values = kenshin.fetch(data_file, from_time, until_time) 106 | datapoints = zip(range(*timeinfo), values) 107 | datapoints = [[p[0], list(p[1])] for p in datapoints if p[1]] 108 | for _, values in datapoints: 109 | for i, v in enumerate(values): 110 | if v is None: 111 | values[i] = NULL_VALUE 112 | kenshin.update(tmpfile, datapoints) 113 | backup = data_file + ".bak" 114 | 115 | print 'Renaming old database to: %s' % backup 116 | os.rename(data_file, backup) 117 | 118 | print "Renaming new database to: %s" % data_file 119 | try: 120 | os.rename(tmpfile, data_file) 121 | except Exception as e: 122 | print "Operation failed, restoring backup" 123 | os.rename(backup, data_file) 124 | raise e 125 | # Notice: by default, '.bak' files are not deleted. 126 | 127 | 128 | def main(): 129 | usage = ("e.g: kenshin-change-schema.py -d ../graphite-root/conf/ -n default -f '../graphite-root/storage/data/*/default/*.hs'\n" 130 | "Note: kenshin combined many metrics to one file, " 131 | " please check file's meta data before you change it. " 132 | " (use keshin-info.py to view file's meta data)") 133 | 134 | import argparse 135 | parser = argparse.ArgumentParser(description=usage, 136 | formatter_class=argparse.RawTextHelpFormatter) 137 | parser.add_argument( 138 | "-d", "--conf-dir", required=True, help="kenshin conf directory.") 139 | parser.add_argument( 140 | "-n", "--schema-name", required=True, help="schema name.") 141 | parser.add_argument( 142 | "-f", "--files", required=True, 143 | help="metric data file paterns. (e.g. /data/kenshin/storage/data/*/mfs/*.hs)") 144 | args = parser.parse_args() 145 | 146 | storage_conf_path = os.path.join(args.conf_dir, 'storage-schemas.conf') 147 | storage_schemas = loadStorageSchemas(storage_conf_path) 148 | schema = get_schema(storage_schemas, args.schema_name) 149 | if not schema: 150 | print 'not matched schema name: %s' % args.schema_name 151 | sys.exit(1) 152 | for f in sorted(glob.glob(args.files)): 153 | resize_data_file(schema, os.path.abspath(f)) 154 | 155 | 156 | if __name__ == '__main__': 157 | main() 158 | -------------------------------------------------------------------------------- /bin/kenshin-debug.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import argparse 5 | import struct 6 | import kenshin 7 | from datetime import datetime 8 | from kenshin.utils import get_metric 9 | 10 | 11 | def timestamp_to_datestr(ts): 12 | try: 13 | d = datetime.fromtimestamp(ts) 14 | return d.strftime('%Y-%m-%d %H:%M:%S') 15 | except: 16 | return 'invalid timestamp' 17 | 18 | 19 | def get_point(fh, offset, size, format): 20 | fh.seek(offset) 21 | data = fh.read(size) 22 | return struct.unpack(format, data) 23 | 24 | 25 | def run(filepath, archive_idx, point_idx, error): 26 | with open(filepath) as f: 27 | header = kenshin.header(f) 28 | archive = header['archive_list'][archive_idx] 29 | point_size = header['point_size'] 30 | point_format = header['point_format'] 31 | 32 | start_offset = archive['offset'] + point_idx * point_size 33 | if point_idx < 0: 34 | start_offset += archive['size'] 35 | 36 | point = get_point(f, start_offset, point_size, point_format) 37 | print 'count: %s' % archive['count'] 38 | 39 | if not error: 40 | metric = get_metric(filepath) 41 | date_str = timestamp_to_datestr(point[0]) 42 | if metric: 43 | idx = header['tag_list'].index(metric) 44 | return (point[0], point[idx + 1]), date_str 45 | 46 | else: 47 | return point, date_str 48 | else: 49 | sec_per_point = archive['sec_per_point'] 50 | ts = point[0] 51 | start_offset += point_size 52 | point_idx += 1 53 | while start_offset < archive['size'] + archive['offset']: 54 | point = get_point(f, start_offset, point_size, point_format) 55 | if point[0] != ts + sec_per_point: 56 | return point_idx 57 | start_offset += point_size 58 | point_idx += 1 59 | ts = point[0] 60 | return 'No error!' 61 | 62 | 63 | def main(): 64 | parser = argparse.ArgumentParser(description="debug kenshin file") 65 | parser.add_argument('filepath', help="metric file path") 66 | parser.add_argument('archive_idx', type=int, help="the archive index") 67 | parser.add_argument('point_idx', type=int, help="the point index") 68 | parser.add_argument('-e', '--error', action="store_true", help="run until meet an unexpected point (empty or error)") 69 | 70 | args = parser.parse_args() 71 | print run(args.filepath, args.archive_idx, args.point_idx, args.error) 72 | 73 | 74 | if __name__ == '__main__': 75 | main() 76 | -------------------------------------------------------------------------------- /bin/kenshin-delete.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | import os 4 | import sys 5 | import glob 6 | import shutil 7 | from subprocess import check_output 8 | 9 | from kenshin import header, pack_header 10 | from kenshin.agg import Agg 11 | 12 | from rurouni.storage import getFilePathByInstanceDir, getMetricPathByInstanceDir 13 | 14 | 15 | METRIC_NAME, SCHEMA_NAME, FILE_ID, POS_IDX = range(4) 16 | 17 | 18 | def try_to_delete_empty_directory(path): 19 | dirname = os.path.dirname(path) 20 | try: 21 | os.rmdir(dirname) 22 | try_to_delete_empty_directory(dirname) 23 | except OSError: 24 | pass 25 | 26 | 27 | def delete_links(storage_dir, metric_file): 28 | with open(metric_file) as f: 29 | for line in f: 30 | line = line.strip() 31 | bucket, schema_name, fid, pos, metric = line.split(" ") 32 | bucket_link_dir = os.path.join(storage_dir, 'link', bucket) 33 | path = getMetricPathByInstanceDir(bucket_link_dir, metric) 34 | if os.path.exists(path): 35 | os.remove(path) 36 | try_to_delete_empty_directory(path) 37 | 38 | 39 | def delete_file(storage_dir, index, pos_metrics): 40 | """ 41 | Note: We do not delete the data file, just delete the tags in data file, 42 | so the space can reused by new metric. 43 | """ 44 | bucket, schema_name, fid = index 45 | bucket_data_dir = os.path.join(storage_dir, 'data', bucket) 46 | filepath = getFilePathByInstanceDir(bucket_data_dir, schema_name, fid) 47 | 48 | with open(filepath, "r+b") as fh: 49 | header_info = header(fh) 50 | tag_list = header_info["tag_list"] 51 | reserved_size = header_info["reserved_size"] 52 | archive_list = [(a["sec_per_point"], a["count"]) 53 | for a in header_info["archive_list"]] 54 | agg_name = Agg.get_agg_name(header_info["agg_id"]) 55 | 56 | released_size = 0 57 | for pos_idx, tag in pos_metrics: 58 | if tag == tag_list[pos_idx]: 59 | tag_list[pos_idx] = "" 60 | released_size += len(tag) 61 | elif tag_list[pos_idx] != "": 62 | print >>sys.stderr, "tag not match: (%s, %d)" % (tag, pos_idx) 63 | 64 | if released_size != 0: 65 | inter_tag_list = tag_list + ["N" * (reserved_size + released_size)] 66 | packed_header, _ = pack_header(inter_tag_list, 67 | archive_list, 68 | header_info["x_files_factor"], 69 | agg_name) 70 | fh.write(packed_header) 71 | 72 | 73 | def delete(storage_dir, metric_file): 74 | with open(metric_file) as f: 75 | group = [] 76 | last_index = None 77 | for line in f: 78 | line = line.strip() 79 | bucket, schema_name, fid, pos, metric = line.split(" ") 80 | fid = int(fid) 81 | pos = int(pos) 82 | index = (bucket, schema_name, fid) 83 | if index == last_index: 84 | group.append((pos, metric)) 85 | else: 86 | if last_index is not None: 87 | delete_file(storage_dir, last_index, group) 88 | group = [(pos, metric)] 89 | last_index = index 90 | if last_index is not None: 91 | delete_file(storage_dir, last_index, group) 92 | 93 | # delete metric-test directory 94 | metric_test_dirs = glob.glob(os.path.join(storage_dir, '*', 'metric-test')) 95 | for d in metric_test_dirs: 96 | shutil.rmtree(d) 97 | 98 | 99 | def sort_metric_file(metric_file): 100 | sorted_metric_file = "%s.sorted" % metric_file 101 | check_output("sort %s > %s" % (metric_file, sorted_metric_file), shell=True) 102 | return sorted_metric_file 103 | 104 | 105 | def main(): 106 | import argparse 107 | parser = argparse.ArgumentParser() 108 | parser.add_argument( 109 | "-s", "--storage-dir", 110 | help="Kenshin storage directory." 111 | ) 112 | parser.add_argument( 113 | "-m", "--metric-file", 114 | help=("Metrics that need to be deleted, " 115 | "use kenshin-get-metrics.py to generate this file") 116 | ) 117 | parser.add_argument( 118 | "--only-link", 119 | action="store_true", 120 | help="Only delete link files." 121 | ) 122 | args = parser.parse_args() 123 | 124 | sorted_metric_file = sort_metric_file(args.metric_file) 125 | delete_links(args.storage_dir, sorted_metric_file) 126 | if not args.only_link: 127 | delete(args.storage_dir, sorted_metric_file) 128 | 129 | 130 | if __name__ == '__main__': 131 | main() 132 | -------------------------------------------------------------------------------- /bin/kenshin-fetch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | import sys 4 | import time 5 | import optparse 6 | import signal 7 | import kenshin 8 | from kenshin.utils import get_metric 9 | 10 | signal.signal(signal.SIGPIPE, signal.SIG_DFL) 11 | 12 | 13 | def main(): 14 | NOW = int(time.time()) 15 | YESTERDAY = NOW - 24 * 60 * 60 16 | 17 | usage = "%prog [options] path" 18 | option_parser = optparse.OptionParser(usage=usage) 19 | option_parser.add_option('--from', 20 | default=YESTERDAY, 21 | type=int, 22 | dest='_from', 23 | help="begin timestamp(default: 24 hours ago)") 24 | option_parser.add_option('--until', 25 | default=NOW, 26 | type=int, 27 | help="end timestamp") 28 | 29 | (options, args) = option_parser.parse_args() 30 | if len(args) != 1: 31 | option_parser.print_help() 32 | sys.exit(1) 33 | 34 | path = args[0] 35 | metric = get_metric(path) 36 | from_time = int(options._from) 37 | until_time = int(options.until) 38 | 39 | header, timeinfo, points = kenshin.fetch(path, from_time, until_time, NOW) 40 | start, end, step = timeinfo 41 | 42 | if metric: 43 | idx = header['tag_list'].index(metric) 44 | points = (p[idx] if p else None for p in points) 45 | 46 | t = start 47 | for p in points: 48 | print "%s\t%s" % (t, p) 49 | t += step 50 | 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /bin/kenshin-find-file.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | import os 4 | import re 5 | import glob 6 | from collections import defaultdict 7 | 8 | default_black_list = [ 9 | '.*metric_test.*', 10 | '^rurouni\.', 11 | '^carbon\.', 12 | '^stats\.counters\..*\.count$', 13 | ] 14 | 15 | 16 | def main(): 17 | import argparse 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument( 20 | '-d', '--data-dir', required=True, 21 | help="data directory.") 22 | parser.add_argument( 23 | '-f', '--black-list-file', 24 | help="black list (regular expression for metric) file." 25 | ) 26 | args = parser.parse_args() 27 | 28 | idx_files = glob.glob(os.path.join(args.data_dir, '*.idx')) 29 | black_list_pattern = gen_black_list_pattern(args.black_list_file) 30 | for idx_file in idx_files: 31 | dir_, filename = os.path.split(idx_file) 32 | instance = os.path.splitext(filename)[0] 33 | for p in yield_kenshin_files(dir_, instance, idx_file, black_list_pattern): 34 | print p 35 | 36 | 37 | def gen_black_list_pattern(black_list_file): 38 | rs = [] 39 | if not black_list_file: 40 | for x in default_black_list: 41 | rs.append(re.compile(x)) 42 | else: 43 | with open(black_list_file) as f: 44 | for line in f: 45 | line = line.strip() 46 | if line: 47 | rs.append(re.compile(line)) 48 | return rs 49 | 50 | 51 | def yield_kenshin_files(dir_, instance, idx_file, black_list_pattern): 52 | all_fids = defaultdict(set) 53 | del_fids = defaultdict(set) 54 | with open(idx_file) as f: 55 | for line in f: 56 | line = line.strip() 57 | if not line: 58 | continue 59 | try: 60 | metric, schema, fid, _ = line.split() 61 | fid = int(fid) 62 | except Exception: 63 | continue 64 | all_fids[schema].add(fid) 65 | for p in black_list_pattern: 66 | if p.match(metric): 67 | del_fids[schema].add(fid) 68 | break 69 | for schema in all_fids: 70 | valid_fids = all_fids[schema] - del_fids[schema] 71 | for i in sorted(valid_fids)[:-1]: 72 | path = os.path.join(dir_, instance, schema, '%s.hs' % i) 73 | yield path 74 | 75 | 76 | if __name__ == '__main__': 77 | main() 78 | -------------------------------------------------------------------------------- /bin/kenshin-get-metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | import re 4 | import os 5 | import glob 6 | 7 | 8 | def match_metrics(index_dir, regexps): 9 | index_files = glob.glob(os.path.join(index_dir, '*.idx')) 10 | for index in index_files: 11 | bucket = os.path.splitext(os.path.basename(index))[0] 12 | with open(index) as f: 13 | for line in f: 14 | line = line.strip() 15 | try: 16 | metric, schema_name, fid, pos = line.split(' ') 17 | except ValueError: 18 | pass 19 | for p in regexps: 20 | if p.match(metric): 21 | yield ' '.join([bucket, schema_name, fid, pos, metric]) 22 | break 23 | 24 | 25 | def compile_regexp(regexp_file): 26 | with open(regexp_file) as f: 27 | for line in f: 28 | line = line.strip() 29 | if line and not line.startswith("#"): 30 | yield re.compile(line) 31 | 32 | 33 | def main(): 34 | import argparse 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument( 37 | '-d', '--dirs', 38 | required=True, 39 | help='directories that contain kenshin index files, seperated by comma.' 40 | ) 41 | parser.add_argument( 42 | '-f', '--regexp-file', 43 | required=True, 44 | help='file that contain regular expressions.' 45 | ) 46 | args = parser.parse_args() 47 | 48 | regexps = list(compile_regexp(args.regexp_file)) 49 | 50 | for dir_ in args.dirs.split(","): 51 | for m in match_metrics(dir_, regexps): 52 | print m 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /bin/kenshin-info.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | 5 | from pprint import pprint 6 | import kenshin 7 | 8 | 9 | if __name__ == '__main__': 10 | import sys 11 | if len(sys.argv) < 2: 12 | print 'Usage: kenshin-info.py ' 13 | sys.exit(1) 14 | path = sys.argv[1] 15 | with open(path) as f: 16 | pprint(kenshin.header(f)) 17 | -------------------------------------------------------------------------------- /bin/kenshin-rebuild-index.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import sys 5 | from rurouni.storage import rebuildIndex 6 | 7 | 8 | def main(): 9 | if len(sys.argv) < 3: 10 | print 'need bucket_data_dir and bucket_index_file' 11 | sys.exit(1) 12 | 13 | data_dir, index_file = sys.argv[1:] 14 | rebuildIndex(data_dir, index_file) 15 | 16 | 17 | if __name__ == '__main__': 18 | main() 19 | -------------------------------------------------------------------------------- /bin/kenshin-rebuild-link.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os 5 | import sys 6 | import glob 7 | import errno 8 | 9 | import kenshin 10 | from kenshin.utils import mkdir_p 11 | 12 | 13 | def main(): 14 | if len(sys.argv) < 3: 15 | print('Need data_dir and link_dir.\n' 16 | 'e.g.: kenshin-rebuild-link.py /kenshin/data/a /kenshin/link/a') 17 | sys.exit(1) 18 | 19 | data_dir, link_dir = sys.argv[1:] 20 | data_dir = os.path.abspath(data_dir) 21 | link_dir = os.path.abspath(link_dir) 22 | 23 | for schema_name in os.listdir(data_dir): 24 | hs_file_pat = os.path.join(data_dir, schema_name, '*.hs') 25 | for fp in glob.glob(hs_file_pat): 26 | with open(fp) as f: 27 | header = kenshin.header(f) 28 | metric_list = header['tag_list'] 29 | for metric in metric_list: 30 | if metric != '': 31 | try: 32 | create_link(metric, link_dir, fp) 33 | except OSError as exc: 34 | if exc.errno == errno.ENAMETOOLONG: 35 | pass 36 | else: 37 | raise 38 | 39 | 40 | def create_link(metric, link_dir, file_path): 41 | link_path = metric.replace('.', os.path.sep) 42 | link_path = os.path.join(link_dir, link_path + '.hs') 43 | dirname = os.path.dirname(link_path) 44 | mkdir_p(dirname) 45 | if os.path.exists(link_path): 46 | os.remove(link_path) 47 | os.symlink(file_path, link_path) 48 | 49 | 50 | if __name__ == '__main__': 51 | main() 52 | -------------------------------------------------------------------------------- /bin/kenshin-rebuild.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | import re 4 | import time 5 | from os.path import join 6 | from subprocess import check_output 7 | 8 | 9 | class Status(object): 10 | def __init__(self, status, pid, time): 11 | self.status = status 12 | self.pid = pid 13 | self.time = int(time) 14 | 15 | def __str__(self): 16 | return '' % (self.status, self.pid, self.time) 17 | 18 | 19 | def get_service_status(service_name): 20 | """Return Status(status, pid, time). 21 | e.g. Status('up', 1024, 12342), Status('down', None, 2) 22 | """ 23 | cmd = ['svstat', service_name] 24 | out = check_output(cmd) 25 | 26 | down_pattern = r'down (\d+) seconds, normally up' 27 | up_pattern = r'up \(pid (\d+)\) (\d+) seconds' 28 | 29 | if re.search(up_pattern, out): 30 | pid, t = re.search(up_pattern, out).groups() 31 | return Status('up', pid, t) 32 | elif re.search(down_pattern, out): 33 | (t,) = re.search(down_pattern, out).groups() 34 | return Status('down', None, t) 35 | else: 36 | raise Exception('Unkown service status, service=%s, status=%s', service_name, out) 37 | 38 | 39 | def run_cmd(cmd, user=None): 40 | if user: 41 | cmd = 'sudo -u %s %s' % (user, cmd) 42 | print cmd 43 | return check_output(cmd, shell=True) 44 | 45 | 46 | def main(): 47 | import argparse 48 | parser = argparse.ArgumentParser() 49 | parser.add_argument( 50 | '-s', '--storage-dir', 51 | help='Kenshine storage directory.' 52 | ) 53 | parser.add_argument( 54 | '-b', '--begin-bucket', 55 | type=int, 56 | help='Begin bucket number.' 57 | ) 58 | parser.add_argument( 59 | '-e', '--end-bucket', 60 | type=int, 61 | help='End bucket number.' 62 | ) 63 | parser.add_argument( 64 | '--skip-rebuild-link', 65 | action='store_true', 66 | help='Skip rebuild link.' 67 | ) 68 | args = parser.parse_args() 69 | 70 | storage_dir = args.storage_dir 71 | begin = args.begin_bucket 72 | end = args.end_bucket 73 | 74 | for i in range(begin, end + 1): 75 | if i != begin: 76 | time.sleep(10) 77 | 78 | bucket = str(i) 79 | data_dir = join(storage_dir, 'data', bucket) 80 | data_idx = join(storage_dir, 'data', bucket + '.idx') 81 | link_dir = join(storage_dir, 'link', bucket) 82 | service = '/service/rurouni-cache-%s' % bucket 83 | 84 | run_cmd('svc -d %s' % service) 85 | while get_service_status(service).status != 'down': 86 | print 'wating for service down' 87 | time.sleep(5) 88 | 89 | run_cmd('rm %s' % data_idx) 90 | run_cmd('kenshin-rebuild-index.py %s %s' % (data_dir, data_idx), 91 | 'graphite') 92 | 93 | if not args.skip_rebuild_link: 94 | run_cmd('rm -r %s/*' % link_dir) 95 | run_cmd('kenshin-rebuild-link.py %s %s' % (data_dir, link_dir), 96 | 'graphite') 97 | 98 | run_cmd('svc -u %s' % service) 99 | while get_service_status(service).status != 'up': 100 | print 'wating for service up' 101 | time.sleep(5) 102 | 103 | 104 | if __name__ == '__main__': 105 | main() 106 | -------------------------------------------------------------------------------- /bin/kenshin-rehash.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | import os 4 | import sys 5 | import time 6 | import urllib 7 | import struct 8 | import StringIO 9 | from multiprocessing import Process, Queue 10 | 11 | from kenshin import storage 12 | from kenshin.agg import Agg 13 | from kenshin.storage import Storage 14 | from kenshin.consts import NULL_VALUE 15 | from rurouni.utils import get_instance_of_metric 16 | from kenshin.tools.whisper_tool import ( 17 | read_header as whisper_read_header, 18 | pointFormat as whisperPointFormat, 19 | ) 20 | 21 | 22 | def main(): 23 | import argparse 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument( 26 | '-t', '--src-type', required=True, 27 | choices=['whisper', 'kenshin'], 28 | help="src storage type" 29 | ) 30 | parser.add_argument( 31 | '-d', '--src-data-dir', required=True, 32 | help="src data directory (http address)." 33 | ) 34 | parser.add_argument( 35 | '-n', '--src-instance-num', type=int, 36 | help=('src rurouni cache instance number (required when src_type ' 37 | 'is kenshin)') 38 | ) 39 | parser.add_argument( 40 | '-m', '--kenshin-file', required=True, 41 | help=('kenshin data files that we want to add the history, ' 42 | 'use kenshin-find-file.py to generate this file.') 43 | ) 44 | parser.add_argument( 45 | '-p', '--processes', default=10, type=int, 46 | help="number of processes." 47 | ) 48 | args = parser.parse_args() 49 | 50 | if args.src_type == 'kenshin' and args.src_instance_num is None: 51 | parser.error('src-instance-num is required') 52 | 53 | # start processes 54 | processes = [] 55 | queue = Queue() 56 | for w in xrange(args.processes): 57 | p = Process(target=worker, args=(queue,)) 58 | p.start() 59 | processes.append(p) 60 | 61 | # generate data 62 | with open(args.kenshin_file) as f: 63 | for line in f: 64 | kenshin_filepath = line.strip() 65 | if not kenshin_filepath: 66 | continue 67 | with open(kenshin_filepath) as f: 68 | header = Storage.header(f) 69 | metrics = header['tag_list'] 70 | if args.src_type == 'kenshin': 71 | metric_paths = [ 72 | metric_to_filepath(args.src_data_dir, m, args.src_instance_num) 73 | for m in metrics 74 | ] 75 | else: # whisper 76 | metric_paths = [ 77 | metric_to_whisper_filepath(args.src_data_dir, m) 78 | for m in metrics 79 | ] 80 | item = (args.src_type, header, metric_paths, metrics, kenshin_filepath) 81 | queue.put(item) 82 | 83 | # stop processes 84 | for _ in xrange(args.processes): 85 | queue.put("STOP") 86 | for p in processes: 87 | p.join() 88 | 89 | 90 | def worker(queue): 91 | for (src_type, meta, metric_paths, metrics, dst_file) in iter(queue.get, 'STOP'): 92 | try: 93 | tmp_file = dst_file + '.tmp' 94 | merge_metrics(src_type, meta, metric_paths, metrics, tmp_file) 95 | os.rename(tmp_file, dst_file) 96 | except Exception as e: 97 | print >>sys.stderr, '[merge error] %s: %s' % (dst_file, e) 98 | if os.path.exists(tmp_file): 99 | os.remove(tmp_file) 100 | return True 101 | 102 | 103 | def merge_metrics(src_type, meta, metric_paths, metric_names, output_file): 104 | ''' Merge metrics to a kenshin file. 105 | ''' 106 | # Get content(data points grouped by archive) of each metric. 107 | if src_type == 'kenshin': 108 | metrics_archives_points = [ 109 | get_metric_content(path, metric) 110 | for (path, metric) in zip(metric_paths, metric_names) 111 | ] 112 | else: # whipser 113 | metrics_archives_points = [ 114 | get_whisper_metric_content(path) 115 | for path in metric_paths 116 | ] 117 | 118 | # Merge metrics to a kenshin file 119 | with open(output_file, 'wb') as f: 120 | archives = meta['archive_list'] 121 | archive_info = [(archive['sec_per_point'], archive['count']) 122 | for archive in archives] 123 | inter_tag_list = metric_names + [''] # for reserved space 124 | 125 | # header 126 | packed_kenshin_header = Storage.pack_header( 127 | inter_tag_list, 128 | archive_info, 129 | meta['x_files_factor'], 130 | Agg.get_agg_name(meta['agg_id']), 131 | )[0] 132 | f.write(packed_kenshin_header) 133 | 134 | for i, archive in enumerate(archives): 135 | archive_points = [x[i] for x in metrics_archives_points] 136 | merged_points = merge_points(archive_points) 137 | points = fill_gap(merged_points, archive, len(meta['tag_list'])) 138 | packed_str = packed_kenshin_points(points) 139 | f.write(packed_str) 140 | 141 | 142 | def metric_to_filepath(data_dir, metric, instance_num): 143 | if metric.startswith('rurouni.'): 144 | instance = metric.split('.')[2] 145 | else: 146 | instance = str(get_instance_of_metric(metric, instance_num)) 147 | return os.path.sep.join([data_dir, instance] + metric.split('.')) + '.hs' 148 | 149 | 150 | def metric_to_whisper_filepath(data_dir, metric): 151 | return os.path.sep.join([data_dir] + metric.split('.')) + '.wsp' 152 | 153 | 154 | def merge_points(metrics_archive_points): 155 | ''' Merge metrics' archive points to kenshin points. 156 | 157 | >>> whisper_points = [ 158 | ... [[1421830133, 0], [1421830134, 1], [1421830135, 2]], 159 | ... [[1421830134, 4], [1421830135, 5]], 160 | ... [[1421830133, 6], [1421830134, 7], [1421830135, 8]] 161 | ... ] 162 | >>> merge_points(whisper_points) 163 | [(1421830133, [0, -4294967296.0, 6]), (1421830134, [1, 4, 7]), (1421830135, [2, 5, 8])] 164 | ''' 165 | length = len(metrics_archive_points) 166 | d = {} 167 | for i, points in enumerate(metrics_archive_points): 168 | for t, v in points: 169 | if not t: 170 | continue 171 | if t not in d: 172 | d[t] = [NULL_VALUE] * length 173 | d[t][i] = v 174 | return sorted(d.items()) 175 | 176 | 177 | def fill_gap(archive_points, archive, metric_num): 178 | EMPTY_POINT = (0, (0,) * metric_num) 179 | if not archive_points: 180 | return [EMPTY_POINT] * archive['count'] 181 | step = archive['sec_per_point'] 182 | rs = [archive_points[0]] 183 | prev_ts = archive_points[0][0] 184 | for ts, point in archive_points[1:]: 185 | if prev_ts + step == ts: 186 | rs.append((ts, point)) 187 | else: 188 | rs.extend([EMPTY_POINT] * ((ts-prev_ts) / step)) 189 | prev_ts = ts 190 | if len(rs) < archive['count']: 191 | rs.extend([EMPTY_POINT] * (archive['count'] - len(rs))) 192 | else: 193 | rs = rs[:archive['count']] 194 | return rs 195 | 196 | 197 | def packed_kenshin_points(points): 198 | point_format = storage.POINT_FORMAT % len(points[0][1]) 199 | str_format = point_format[0] + point_format[1:] * len(points) 200 | return struct.pack(str_format, *flatten(points)) 201 | 202 | 203 | def flatten(iterable): 204 | """ Recursively iterate lists and tuples. 205 | 206 | >>> list(flatten([1, (2, 3, [4]), 5])) 207 | [1, 2, 3, 4, 5] 208 | """ 209 | for elm in iterable: 210 | if isinstance(elm, (list, tuple)): 211 | for relm in flatten(elm): 212 | yield relm 213 | else: 214 | yield elm 215 | 216 | 217 | def get_metric_content(metric_path, metric_name): 218 | ''' Return data points of each archive of the metric. 219 | ''' 220 | conn = urllib.urlopen(metric_path) 221 | if conn.code == 200: 222 | content = conn.read() 223 | else: 224 | raise Exception('HTTP Error Code %s for %s' % (conn.code, metric_path)) 225 | 226 | header = Storage.header(StringIO.StringIO(content)) 227 | metric_list = header['tag_list'] 228 | metric_cnt = len(metric_list) 229 | metric_idx = metric_list.index(metric_name) 230 | step = metric_cnt + 1 231 | point_format = header['point_format'] 232 | byte_order, point_type = point_format[0], point_format[1:] 233 | metric_content = [] 234 | now = int(time.time()) 235 | 236 | for archive in header['archive_list']: 237 | ts_min = now - archive['retention'] 238 | archive_points = [] 239 | series_format = byte_order + (point_type * archive['count']) 240 | packed_str = content[archive['offset']: archive['offset'] + archive['size']] 241 | unpacked_series = struct.unpack(series_format, packed_str) 242 | for i in xrange(0, len(unpacked_series), step): 243 | ts = unpacked_series[i] 244 | if ts > ts_min: 245 | # (timestamp, value) 246 | datapoint = (ts, unpacked_series[i+1+metric_idx]) 247 | archive_points.append(datapoint) 248 | metric_content.append(archive_points) 249 | 250 | return metric_content 251 | 252 | 253 | def get_whisper_metric_content(metric_path): 254 | conn = urllib.urlopen(metric_path) 255 | if conn.code == 200: 256 | content = conn.read() 257 | else: 258 | raise Exception('HTTP Error Code %s for %s' % (conn.code, metric_path)) 259 | 260 | header = whisper_read_header(StringIO.StringIO(content)) 261 | byte_order, point_type = whisperPointFormat[0], whisperPointFormat[1:] 262 | metric_content = [] 263 | now = int(time.time()) 264 | step = 2 265 | 266 | for archive in header['archives']: 267 | ts_min = now - archive['retention'] 268 | archive_points = [] 269 | series_format = byte_order + (point_type * archive['count']) 270 | packed_str = content[archive['offset']: archive['offset'] + archive['size']] 271 | unpacked_series = struct.unpack(series_format, packed_str) 272 | for i in xrange(0, len(unpacked_series), step): 273 | ts = unpacked_series[i] 274 | if ts > ts_min: 275 | datapoint = (ts, unpacked_series[i+1]) 276 | archive_points.append(datapoint) 277 | metric_content.append(archive_points) 278 | 279 | return metric_content 280 | 281 | 282 | if __name__ == '__main__': 283 | main() 284 | -------------------------------------------------------------------------------- /bin/kenshin-restart.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | ''' 4 | 目前 kenshin 不支持动态加载配置,在配置变更时需要重启,此脚本用于重启所有 kenshin 实例。 5 | 6 | $ sudo kenshin-restart.py 7 | ''' 8 | 9 | import re 10 | import time 11 | import glob 12 | from subprocess import check_output 13 | 14 | 15 | class Status(object): 16 | def __init__(self, status, pid, time): 17 | self.status = status 18 | self.pid = pid 19 | self.time = int(time) 20 | 21 | def __str__(self): 22 | return '' % (self.status, self.pid, self.time) 23 | 24 | 25 | def find_cache_services(start_num): 26 | def get_instance_num(service_path): 27 | return int(service_path.rsplit('-', 1)[1]) 28 | 29 | services = glob.glob('/service/rurouni-cache-*') 30 | services = [x for x in services if get_instance_num(x) >= start_num] 31 | return sorted(services, key=get_instance_num) 32 | 33 | 34 | def get_service_status(service_name): 35 | """Return Status(status, pid, time). 36 | e.g. Status('up', 1024, 12342), Status('down', None, 2) 37 | """ 38 | cmd = ['svstat', service_name] 39 | out = check_output(cmd) 40 | 41 | down_pattern = r'down (\d+) seconds, normally up' 42 | up_pattern = r'up \(pid (\d+)\) (\d+) seconds' 43 | 44 | if re.search(up_pattern, out): 45 | pid, t = re.search(up_pattern, out).groups() 46 | return Status('up', pid, t) 47 | elif re.search(down_pattern, out): 48 | (t,) = re.search(down_pattern, out).groups() 49 | return Status('down', None, t) 50 | else: 51 | raise Exception('Unkown service status, service=%s, status=%s', service_name, out) 52 | 53 | 54 | def svc(service_name, arg): 55 | cmd = ['svc', arg, service_name] 56 | return check_output(cmd) 57 | 58 | 59 | def svc_restart(service_name): 60 | return svc(service_name, '-t') 61 | 62 | 63 | def restart_service(service_name): 64 | old = get_service_status(service_name) 65 | assert old.status == 'up' 66 | svc_restart(service_name) 67 | i = 0 68 | while True: 69 | time.sleep(2) 70 | new = get_service_status(service_name) 71 | print i, new 72 | if new.status == 'up': 73 | # 重启成功需要满足下面两个条件: 74 | # 1. pid 已经发生变化 75 | # 2. 新的服务已经可用 76 | # 77 | # 关于第 2 点,由于 kenshin 没有对外的接口可以查到内部状态, 78 | # 所以目前是靠时间来估计服务状态。未来计划把 pickle 格式端口去掉, 79 | # 用 pickle 格式端口作为内部状态查询的接口,也可以通过该接口 80 | # 实现动态加载配置文件,到时这个脚本的就可以下岗了。 81 | # PS: 时间的单位是"秒" 82 | if new.pid != old.pid and new.time > 10: 83 | break 84 | i += 1 85 | 86 | def main(): 87 | import argparse 88 | parser = argparse.ArgumentParser() 89 | parser.add_argument("-t", "--time-interval", default=60, type=int, 90 | help="time interval between two restarting operations.") 91 | parser.add_argument("-s", "--start-num", default=0, type=int, 92 | help="start instance number") 93 | args = parser.parse_args() 94 | services = find_cache_services(args.start_num) 95 | for s in services: 96 | print 'restarting %s' % s 97 | print get_service_status(s) 98 | restart_service(s) 99 | print get_service_status(s) 100 | print 101 | time.sleep(args.time_interval) 102 | 103 | 104 | if __name__ == '__main__': 105 | main() 106 | -------------------------------------------------------------------------------- /bin/kenshin-send-zero-metric.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | """ 4 | 为了在迁移数据时减少不必要的指标(metric),目前只迁移一周内发生过的指标(即至少有一点)。 5 | 但是一些类似 5XX 这样的 web error 相关的指标,可能会有一段时间没有发生(例如,一周), 6 | 所以新的节点上没有这些指标,但是 SA 那边希望保留这些指标。那么该脚本的作用就是给 Graphite 7 | 发送这些指标,每个指标发送一个 val(默认为0). 8 | 9 | # 指标的获取 10 | 11 | $ kenshin-get-metrics.py -d /data/kenshin/storage/data/ -f error_code.re | awk '{print $5}' 12 | $ cat error_code.re 13 | .*\.code\.\d+\.rate$ 14 | 15 | # 使用方法 16 | 17 | $ kenshin-send-zero-metric.py -a : -m error_metric.src -b error_metric.dst 18 | """ 19 | 20 | import sys 21 | import socket 22 | import time 23 | import argparse 24 | 25 | 26 | def run(sock, interval, metrics): 27 | now = int(time.time()) 28 | for m in metrics: 29 | line = '%s %s %d\n' % (m, 0, now) 30 | print line, 31 | sock.sendall(line) 32 | time.sleep(interval) 33 | 34 | 35 | def get_metrics(filename): 36 | metrics = [] 37 | with open(filename) as f: 38 | for line in f: 39 | line = line.strip() 40 | if line: 41 | metrics.append(line) 42 | return set(metrics) 43 | 44 | 45 | def parse_addr(addr): 46 | try: 47 | host, port = addr.split(":") 48 | return host, int(port) 49 | except ValueError: 50 | msg = "%r is not a valid addr" % addr 51 | raise argparse.ArgumentTypeError(msg) 52 | 53 | 54 | def main(): 55 | parser = argparse.ArgumentParser() 56 | parser.add_argument('-a', '--addr', required=True, type=parse_addr, 57 | help="addr of carbon relay, it is format host:port") 58 | parser.add_argument('-i', '--interval', default=0.01, type=float, 59 | help="time interval between two send.") 60 | parser.add_argument('-m', '--metric-file', required=True, 61 | help="file that contains metric name to send.") 62 | parser.add_argument('-b', '--black-list-file', default=None, 63 | help="file that contains black list of metrics.") 64 | 65 | args = parser.parse_args() 66 | host, port = args.addr 67 | 68 | sock = socket.socket() 69 | try: 70 | sock.connect((host, port)) 71 | except socket.error: 72 | raise SystemError("Couldn't connect to %s on port %d" % 73 | (host, port)) 74 | metrics = get_metrics(args.metric_file) 75 | if args.black_list_file: 76 | metrics -= get_metrics(args.black_list_file) 77 | try: 78 | run(sock, args.interval, metrics) 79 | except KeyboardInterrupt: 80 | sys.stderr.write("\nexiting on CTRL+c\n") 81 | sys.exit(0) 82 | 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /bin/rurouni-cache.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import os.path 5 | 6 | BIN_DIR = os.path.dirname(os.path.abspath(__file__)) 7 | ROOT_DIR = os.path.dirname(BIN_DIR) 8 | 9 | from rurouni.utils import run_twistd_plugin 10 | from rurouni.exceptions import RurouniException 11 | 12 | try: 13 | run_twistd_plugin(__file__) 14 | except RurouniException as e: 15 | raise SystemError(e) 16 | -------------------------------------------------------------------------------- /conf/rurouni.conf.example: -------------------------------------------------------------------------------- 1 | [cache] 2 | # Configure rurouni-cache directories. 3 | 4 | CONF_DIR = /data/kenshin/conf 5 | LOCAL_DATA_DIR = /data/kenshin/storage/data 6 | LOCAL_LINK_DIR = /data/kenshin/storage/link 7 | LOG_DIR = /data/kenshin/storage/log 8 | PID_DIR = /data/kenshin/storage/run 9 | 10 | LINE_RECEIVER_INTERFACE = 0.0.0.0 11 | PICKLE_RECEIVER_INTERFACE = 0.0.0.0 12 | CACHE_QUERY_INTERFACE = 0.0.0.0 13 | 14 | LOG_UPDATES = True 15 | MAX_CREATES_PER_MINUTE = 1000 16 | NUM_ALL_INSTANCE = 2 17 | 18 | DEFAULT_WAIT_TIME = 1 19 | 20 | 21 | [cache:0] 22 | LINE_RECEIVER_PORT = 2003 23 | PICKLE_RECEIVER_PORT = 2004 24 | CACHE_QUERY_PORT = 7002 25 | 26 | [cache:1] 27 | LINE_RECEIVER_PORT = 2013 28 | PICKLE_RECEIVER_PORT = 2014 29 | CACHE_QUERY_PORT = 7012 30 | -------------------------------------------------------------------------------- /conf/storage-schemas.conf.example: -------------------------------------------------------------------------------- 1 | # Schema definitions for kenshin files. Entries are scanned in order, 2 | # and first match wins. This file is scanned for changes every 60 seconds. 3 | # 4 | # Definition Syntax: 5 | # 6 | # [name] 7 | # pattern = regex 8 | # xFilesFactor = xff 9 | # aggregationMethod = agg 10 | # retentions = timePerPoint:timeToStore, timePerPoint:timeToStore, ... 11 | # cacheRetention = seconds 12 | # metricsPerFile = num 13 | # 14 | # Remember: To support accurate aggregation from higher to lower resolution 15 | # archives, the precision of a longer retention archive must be 16 | # cleanly divisible by precision of next lower retention archive. 17 | # 18 | # Valid: 60s:7d,300s:30d (300/60 = 5) 19 | # Invalid: 180s:7d,300s:30d (300/180 = 3.333) 20 | # 21 | 22 | [metric-test] 23 | pattern = .*metric_test.* 24 | xFilesFactor = 1 25 | aggregationMethod = average 26 | retentions = 1s:1h,6s:2h 27 | cacheRetention = 10s 28 | metricsPerFile = 8 29 | 30 | [rurouni-stats] 31 | pattern = ^rurouni\. 32 | xFilesFactor = 20.0 33 | aggregationMethod = average 34 | retentions = 60s:2d,300s:7d,15m:25w,12h:5y 35 | cacheRetention = 600s 36 | metricsPerFile = 8 37 | 38 | [default] 39 | pattern = .* 40 | retentions = 10s:12h,60s:2d,300s:7d,15m:25w,12h:5y 41 | xFilesFactor = 20 42 | aggregationMethod = average 43 | metricsPerFile = 8 44 | cacheRetention = 620s 45 | -------------------------------------------------------------------------------- /examples/metric_stresser.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import time 3 | import random 4 | import socket 5 | import struct 6 | import cPickle as pickle 7 | from multiprocessing import Process 8 | 9 | 10 | def main(): 11 | import argparse 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("-a", "--address", type=str, help="host:port pair.") 14 | parser.add_argument("-f", "--format", type=str, choices=["line", "pickle"], help="Format of data.") 15 | parser.add_argument("-p", "--process", type=int, default=1, help="Number of processes.") 16 | parser.add_argument("-m", "--metric", type=int, default=1000, help="Number of metrics for one process.") 17 | parser.add_argument("-i", "--interval", type=int, default=10, help="Publish time interval.") 18 | parser.add_argument("-d", "--debug", action='store_true', help="Debug mode, send the metrics to terminal.") 19 | args = parser.parse_args() 20 | 21 | stresser(args) 22 | 23 | 24 | def stresser(args): 25 | host, port = args.address.split(":") 26 | port = int(port) 27 | metric_args = (host, port, args.format, args.metric, args.interval, args.debug) 28 | 29 | processes = [] 30 | for i in xrange(args.process): 31 | pname = 'process_%s' % i 32 | p = Process(target=send_metrics, args=(pname,) + metric_args) 33 | p.start() 34 | processes.append(p) 35 | 36 | try: 37 | for p in processes: 38 | p.join() 39 | except KeyboardInterrupt: 40 | for p in processes: 41 | p.terminate() 42 | print 'KeyboardInterrupt' 43 | 44 | 45 | def send_metrics(pname, host, port, format, num_metrics, interval, debug): 46 | time.sleep(random.random() * interval) 47 | sock = socket.socket() 48 | try: 49 | sock.connect((host, port)) 50 | except socket.error: 51 | if not debug: 52 | raise SystemError("Couldn't connect to %s on port %s" % 53 | (host, port)) 54 | metrics = list(gen_metrics(pname, num_metrics)) 55 | while True: 56 | points = gen_metric_points(metrics, format) 57 | if debug: 58 | print '\n'.join(map(str, points)) 59 | else: 60 | if format == 'line': 61 | msg = '\n'.join(points) + '\n' # all lines end in a newline 62 | sock.sendall(msg) 63 | else: 64 | # pickle 65 | package = pickle.dumps(points, 1) 66 | size = struct.pack('!L', len(package)) 67 | sock.sendall(size) 68 | sock.sendall(package) 69 | time.sleep(interval) 70 | 71 | 72 | def gen_metrics(id_, num_metrics): 73 | METRIC_PATTERN = 'metric_stresser.{0}.metric_id.%s'.format(id_) 74 | for i in xrange(num_metrics): 75 | yield METRIC_PATTERN % str(i) 76 | 77 | 78 | def gen_metric_points(metrics, format): 79 | base_val = random.random() 80 | now = int(time.time()) 81 | points = [] 82 | for i, m in enumerate(metrics): 83 | val = base_val + i 84 | if format == 'line': 85 | points.append("%s %s %s" % (m, val, now)) 86 | else: 87 | points.append((m, (now, val))) 88 | return points 89 | 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /examples/rurouni-client.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import re 4 | import sys 5 | import socket 6 | import time 7 | import subprocess 8 | 9 | RUROUNI_SERVER = '127.0.0.1' 10 | RUROUNI_PORT = 2003 11 | DELAY = 60 12 | 13 | idx = 0 14 | 15 | def get_loadavg(): 16 | cmd = 'uptime' 17 | output = subprocess.check_output(cmd, shell=True).strip() 18 | output = re.split("\s+", output) 19 | # return output[-3:] 20 | # 发送伪造数据,容易肉眼验证处理结果是否正确 21 | global idx 22 | idx += 1 23 | return idx, 100+idx, 200+idx 24 | 25 | 26 | def run(sock, delay): 27 | while True: 28 | now = int(time.time()) 29 | loadavg = get_loadavg() 30 | 31 | lines = [] 32 | idx2min = [1, 5, 15] 33 | for i, val in enumerate(loadavg): 34 | line = "test.system.loadavg.min_%s %s %d" % (idx2min[i], val, now) 35 | lines.append(line) 36 | msg = '\n'.join(lines) + '\n' # all lines must end in a newline 37 | print 'sending message' 38 | print '-' * 80 39 | print msg 40 | sock.sendall(msg) 41 | time.sleep(delay) 42 | 43 | 44 | def main(): 45 | if len(sys.argv) > 1: 46 | delay = int(sys.argv[1]) 47 | else: 48 | delay = DELAY 49 | 50 | sock = socket.socket() 51 | try: 52 | sock.connect((RUROUNI_SERVER, RUROUNI_PORT)) 53 | except socket.error: 54 | raise SystemError("Couldn't connect to %s on port %s" % 55 | (RUROUNI_SERVER, RUROUNI_PORT)) 56 | 57 | try: 58 | run(sock, delay) 59 | except KeyboardInterrupt: 60 | sys.stderr.write("\nexiting on CTRL+c\n") 61 | sys.exit(0) 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /examples/rurouni-pickle-client.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import re 4 | import sys 5 | import socket 6 | import time 7 | import subprocess 8 | import pickle 9 | import struct 10 | 11 | RUROUNI_SERVER = '127.0.0.1' 12 | RUROUNI_PORT = 2004 13 | DELAY = 60 14 | 15 | idx = 0 16 | 17 | 18 | def get_loadavg(): 19 | cmd = 'uptime' 20 | output = subprocess.check_output(cmd, shell=True).strip() 21 | output = re.split("\s+", output) 22 | # return output[-3:] 23 | # 发送伪造数据,容易肉眼验证处理结果是否正确 24 | global idx 25 | idx += 1 26 | return idx, 100+idx, 200+idx 27 | 28 | 29 | def run(sock, delay): 30 | while True: 31 | now = int(time.time()) 32 | loadavg = get_loadavg() 33 | 34 | lines = [] # for print info 35 | tuples = [] 36 | idx2min = [1, 5, 15] 37 | for i, val in enumerate(loadavg): 38 | line = "system.loadavg.min_%s.metric_test %s %d" % (idx2min[i], val, now) 39 | lines.append(line) 40 | tuples.append(('system.loadavg.min_%s.metric_test' % idx2min[i], (now, val))) 41 | msg = '\n'.join(lines) + '\n' # all lines must end in a newline 42 | print 'sending message' 43 | print '-' * 80 44 | print msg 45 | package = pickle.dumps(tuples, 1) 46 | size = struct.pack('!L', len(package)) 47 | sock.sendall(size) 48 | sock.sendall(package) 49 | time.sleep(delay) 50 | 51 | 52 | def main(): 53 | if len(sys.argv) > 1: 54 | delay = int(sys.argv[1]) 55 | else: 56 | delay = DELAY 57 | 58 | sock = socket.socket() 59 | try: 60 | sock.connect((RUROUNI_SERVER, RUROUNI_PORT)) 61 | except socket.error: 62 | raise SystemError("Couldn't connect to %s on port %s" % 63 | (RUROUNI_SERVER, RUROUNI_PORT)) 64 | 65 | try: 66 | run(sock, delay) 67 | except KeyboardInterrupt: 68 | sys.stderr.write("\nexiting on CTRL+c\n") 69 | sys.exit(0) 70 | 71 | 72 | if __name__ == '__main__': 73 | main() 74 | -------------------------------------------------------------------------------- /img/docs/cluster-before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/docs/cluster-before.png -------------------------------------------------------------------------------- /img/docs/cluster-now.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/docs/cluster-now.png -------------------------------------------------------------------------------- /img/docs/kenshin-structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/docs/kenshin-structure.png -------------------------------------------------------------------------------- /img/docs/query_perf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/docs/query_perf.png -------------------------------------------------------------------------------- /img/kenshin-perf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/kenshin-perf.png -------------------------------------------------------------------------------- /img/kenshin.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/img/kenshin.gif -------------------------------------------------------------------------------- /kenshin/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from kenshin.storage import ( 4 | Storage, KenshinException, InvalidConfig, InvalidTime, 5 | RetentionParser) 6 | 7 | __version__ = "0.3.1" 8 | __commit__ = "9b67db3" 9 | __author__ = "zzl0" 10 | __email__ = "zhuzhaolong0@gmail.com" 11 | __date__ = "Sun Dec 18 16:09:53 2016 +0800" 12 | 13 | 14 | _storage = Storage() 15 | validate_archive_list = _storage.validate_archive_list 16 | create = _storage.create 17 | update = _storage.update 18 | fetch = _storage.fetch 19 | header = _storage.header 20 | pack_header = _storage.pack_header 21 | add_tag = _storage.add_tag 22 | 23 | parse_retention_def = RetentionParser.parse_retention_def 24 | -------------------------------------------------------------------------------- /kenshin/agg.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # 3 | # This module implements various aggregation method. 4 | # 5 | 6 | import operator 7 | 8 | 9 | class Agg(object): 10 | agg_funcs = [ 11 | ['average', lambda x: sum(x) / len(x)], 12 | ['sum', sum], 13 | ['last', operator.itemgetter(-1)], 14 | ['max', max], 15 | ['min', min], 16 | ] 17 | 18 | agg_type_list = [typ for typ, _ in agg_funcs] 19 | agg_func_dict = dict(agg_funcs) 20 | 21 | @classmethod 22 | def get_agg_id(cls, agg_name): 23 | return cls.agg_type_list.index(agg_name) 24 | 25 | @classmethod 26 | def get_agg_func(cls, agg_id): 27 | agg_type = cls.agg_type_list[agg_id] 28 | return cls.agg_func_dict[agg_type] 29 | 30 | @classmethod 31 | def get_agg_type_list(cls): 32 | return cls.agg_type_list 33 | 34 | @classmethod 35 | def get_agg_name(cls, agg_id): 36 | return cls.agg_type_list[agg_id] 37 | -------------------------------------------------------------------------------- /kenshin/consts.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | 4 | NULL_VALUE = -4294967296.0 5 | DEFAULT_TAG_LENGTH = 96 6 | CHUNK_SIZE = 16384 -------------------------------------------------------------------------------- /kenshin/storage.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """ 4 | Copyright 2015-Present Douban Inc. 5 | Copyright 2009-2014 The Graphite Development Team 6 | Copyright 2008 Orbitz WorldWide 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | """ 20 | 21 | # 22 | # This module is an implementation of storage API. Here is the 23 | # basic layout of fileformat. 24 | # 25 | # File = Header, Data 26 | # Header = Metadata, Tag+, ArchiveInfo+ 27 | # Metadata = agg_id, max_retention, x_files_factor, archive_count, tag_size, point_size 28 | # Tag = metric 29 | # ArchiveInfo = offset, seconds_per_point, point_count 30 | # Data = Archive+ 31 | # Archive = Point+ 32 | # Point = timestamp, value 33 | # 34 | 35 | import os 36 | import re 37 | import time 38 | import numpy as np 39 | import math 40 | import struct 41 | import operator 42 | import inspect 43 | 44 | from kenshin.agg import Agg 45 | from kenshin.utils import mkdir_p, roundup 46 | from kenshin.consts import DEFAULT_TAG_LENGTH, NULL_VALUE, CHUNK_SIZE 47 | 48 | 49 | LONG_FORMAT = "!L" 50 | LONG_SIZE = struct.calcsize(LONG_FORMAT) 51 | FLOAT_FORMAT = "!f" 52 | FLOAT_SIZE = struct.calcsize(FLOAT_FORMAT) 53 | VALUE_FORMAT = "!d" 54 | VALUE_SIZE = struct.calcsize(VALUE_FORMAT) 55 | POINT_FORMAT = "!L%dd" 56 | METADATA_FORMAT = "!2Lf3L" 57 | METADATA_SIZE = struct.calcsize(METADATA_FORMAT) 58 | ARCHIVEINFO_FORMAT = "!3L" 59 | ARCHIVEINFO_SIZE = struct.calcsize(ARCHIVEINFO_FORMAT) 60 | 61 | # reserved tag index for reserved space, 62 | # this is usefull when adding a tag to a file. 63 | RESERVED_INDEX = -1 64 | 65 | 66 | ### Exceptions 67 | 68 | class KenshinException(Exception): 69 | pass 70 | 71 | 72 | class InvalidTime(KenshinException): 73 | pass 74 | 75 | 76 | class InvalidConfig(KenshinException): 77 | pass 78 | 79 | 80 | ### debug tool 81 | 82 | debug = lambda *a, **kw: None 83 | 84 | 85 | def enable_debug(ignore_header=False): 86 | """ 87 | 监控读写操作. 88 | 89 | 由于 header 函数在一次写入中被调用了多次,而 header 数据较小,完全可以读取缓存数据, 90 | 因此 enable_debug 中使用 ignore_header 来忽略了 header 的读操作,从而方便 io 91 | 性能的测试. 92 | """ 93 | global open, debug 94 | 95 | if not ignore_header: 96 | def debug(msg): 97 | print "DEBUG :: %s" % msg 98 | 99 | class open(file): 100 | write_cnt = 0 101 | read_cnt = 0 102 | 103 | def __init__(self, *args, **kwargs): 104 | caller = self.get_caller() 105 | debug("=========== open in %s ===========" % caller) 106 | file.__init__(self, *args, **kwargs) 107 | 108 | def write(self, data): 109 | caller = self.get_caller() 110 | open.write_cnt += 1 111 | debug("Write %d bytes #%d in %s" % (len(data), 112 | self.write_cnt, caller)) 113 | return file.write(self, data) 114 | 115 | def read(self, bytes): 116 | caller = self.get_caller() 117 | if ignore_header and caller == "header": 118 | pass 119 | else: 120 | open.read_cnt += 1 121 | debug("Read %d bytes #%d in %s" % (bytes, self.read_cnt, caller)) 122 | return file.read(self, bytes) 123 | 124 | def get_caller(self): 125 | return inspect.stack()[2][3] 126 | 127 | 128 | ### retention parser 129 | 130 | class RetentionParser(object): 131 | TIME_UNIT = { 132 | 'seconds': 1, 133 | 'minutes': 60, 134 | 'hours': 3600, 135 | 'days': 86400, 136 | 'weeks': 86400 * 7, 137 | 'years': 86400 * 365, 138 | } 139 | # time pattern (e.g. 60s, 12h) 140 | pat = re.compile(r'^(\d+)([a-z]+)$') 141 | 142 | @classmethod 143 | def get_time_unit_name(cls, s): 144 | for k in cls.TIME_UNIT.keys(): 145 | if k.startswith(s): 146 | return k 147 | raise InvalidTime("Invalid time unit: '%s'" % s) 148 | 149 | @classmethod 150 | def get_seconds(cls, time_unit): 151 | return cls.TIME_UNIT[cls.get_time_unit_name(time_unit)] 152 | 153 | @classmethod 154 | def parse_time_str(cls, s): 155 | """ 156 | Parse time string to seconds. 157 | 158 | >>> RetentionParser.parse_time_str('12h') 159 | 43200 160 | """ 161 | if s.isdigit(): 162 | return int(s) 163 | 164 | m = cls.pat.match(s) 165 | if m: 166 | num, unit = m.groups() 167 | return int(num) * cls.get_seconds(unit) 168 | else: 169 | raise InvalidTime("Invalid rention specification '%s'" % s) 170 | 171 | @classmethod 172 | def parse_retention_def(cls, retention_def): 173 | precision, point_cnt = retention_def.strip().split(':') 174 | precision = cls.parse_time_str(precision) 175 | 176 | if point_cnt.isdigit(): 177 | point_cnt = int(point_cnt) 178 | else: 179 | point_cnt = cls.parse_time_str(point_cnt) / precision 180 | 181 | return precision, point_cnt 182 | 183 | 184 | ### Storage 185 | 186 | class Storage(object): 187 | 188 | def __init__(self, data_dir=''): 189 | self.data_dir = data_dir 190 | 191 | def create(self, metric_name, tag_list, archive_list, x_files_factor=None, 192 | agg_name=None): 193 | Storage.validate_archive_list(archive_list, x_files_factor) 194 | 195 | path = self.gen_path(self.data_dir, metric_name) 196 | if os.path.exists(path): 197 | raise IOError('file %s already exits.' % path) 198 | else: 199 | mkdir_p(os.path.dirname(path)) 200 | 201 | # inter_tag_list[RESERVED_INDEX] is reserved space 202 | # to avoid move data points. 203 | empty_tag_cnt = sum(1 for t in tag_list if not t) 204 | inter_tag_list = tag_list + ['N' * DEFAULT_TAG_LENGTH * empty_tag_cnt] 205 | 206 | with open(path, 'wb') as f: 207 | packed_header, end_offset = self.pack_header( 208 | inter_tag_list, archive_list, x_files_factor, agg_name) 209 | f.write(packed_header) 210 | 211 | # init data 212 | remaining = end_offset - f.tell() 213 | zeroes = '\x00' * CHUNK_SIZE 214 | while remaining > CHUNK_SIZE: 215 | f.write(zeroes) 216 | remaining -= CHUNK_SIZE 217 | f.write(zeroes[:remaining]) 218 | 219 | @staticmethod 220 | def validate_archive_list(archive_list, xff): 221 | """ 222 | Validates an archive_list. 223 | 224 | An archive_list must: 225 | 1. Have at least one archive config. 226 | 2. No duplicates. 227 | 3. Higher precision archives' precision must evenly divide 228 | all lower precison archives' precision. 229 | 4. Lower precision archives must cover larger time intervals 230 | than higher precision archives. 231 | 5. Each archive must have at least enough points to the next 232 | archive. 233 | """ 234 | 235 | # 1 236 | if not archive_list: 237 | raise InvalidConfig("must specify at least one archive config") 238 | 239 | archive_list.sort(key=operator.itemgetter(0)) 240 | 241 | for i, archive in enumerate(archive_list): 242 | try: 243 | next_archive = archive_list[i+1] 244 | except: 245 | break 246 | # 2 247 | if not archive[0] < next_archive[0]: 248 | raise InvalidConfig("two same precision config: '%s' and '%s'" % 249 | (archive, next_archive)) 250 | # 3 251 | if next_archive[0] % archive[0] != 0: 252 | raise InvalidConfig("higher precision must evenly divide lower " 253 | "precision: %s and %s" % 254 | (archive[0], next_archive[0])) 255 | # 4 256 | retention = archive[0] * archive[1] 257 | next_retention = next_archive[0] * next_archive[1] 258 | if not next_retention > retention: 259 | raise InvalidConfig("lower precision archive must cover " 260 | "larger time intervals that higher " 261 | "precision archive: (%d, %s) and (%d, %s)" % 262 | (i, retention, i+1, next_retention)) 263 | # 5 264 | archive_point_cnt = archive[1] 265 | point_per_consolidation = next_archive[0] / archive[0] 266 | if not (archive_point_cnt / xff) >= point_per_consolidation: 267 | raise InvalidConfig("each archive must have at least enough " 268 | "points to consolidate to the next archive: " 269 | "(%d, %s) and (%d, %s) xff=%s" % 270 | (i, retention, i+1, next_retention, xff)) 271 | 272 | @staticmethod 273 | def gen_path(data_dir, metric_name): 274 | """ 275 | Generate file path of `metric_name`. 276 | 277 | eg, metric_name is `sys.cpu.user`, the absolute file path will be 278 | `self.data_dir/sys/cpu/user.hs` 279 | """ 280 | if metric_name[0] == '/': 281 | return metric_name 282 | parts = metric_name.split('.') 283 | parts[-1] = parts[-1] + '.hs' 284 | file_path = os.path.sep.join(parts) 285 | return os.path.join(data_dir, file_path) 286 | 287 | @staticmethod 288 | def pack_header(inter_tag_list, archive_list, x_files_factor, agg_name): 289 | # tag 290 | tag = str('\t'.join(inter_tag_list)) 291 | 292 | # metadata 293 | agg_id = Agg.get_agg_id(agg_name) 294 | max_retention = reduce(operator.mul, archive_list[-1], 1) 295 | xff = x_files_factor 296 | archive_cnt = len(archive_list) 297 | tag_size = len(tag) 298 | point_size = struct.calcsize(POINT_FORMAT % (len(inter_tag_list) - 1)) 299 | metadata = struct.pack(METADATA_FORMAT, agg_id, max_retention, 300 | xff, archive_cnt, tag_size, point_size) 301 | 302 | # archive_info 303 | header = [metadata, tag] 304 | offset = METADATA_SIZE + len(tag) + ARCHIVEINFO_SIZE * len(archive_list) 305 | 306 | for sec, cnt in archive_list: 307 | archive_info = struct.pack(ARCHIVEINFO_FORMAT, offset, sec, cnt) 308 | header.append(archive_info) 309 | offset += point_size * cnt 310 | return ''.join(header), offset 311 | 312 | @staticmethod 313 | def header(fh): 314 | origin_offset = fh.tell() 315 | if origin_offset != 0: 316 | fh.seek(0) 317 | packed_metadata = fh.read(METADATA_SIZE) 318 | agg_id, max_retention, xff, archive_cnt, tag_size, point_size = struct.unpack( 319 | METADATA_FORMAT, packed_metadata) 320 | inter_tag_list = fh.read(tag_size).split('\t') 321 | 322 | archives = [] 323 | for i in xrange(archive_cnt): 324 | packed_archive_info = fh.read(ARCHIVEINFO_SIZE) 325 | offset, sec, cnt = struct.unpack( 326 | ARCHIVEINFO_FORMAT, packed_archive_info) 327 | archive_info = { 328 | 'offset': offset, 329 | 'sec_per_point': sec, 330 | 'count': cnt, 331 | 'size': point_size * cnt, 332 | 'retention': sec * cnt, 333 | } 334 | archives.append(archive_info) 335 | 336 | fh.seek(origin_offset) 337 | tag_list = inter_tag_list[:RESERVED_INDEX] 338 | info = { 339 | 'agg_id': agg_id, 340 | 'max_retention': max_retention, 341 | 'x_files_factor': xff, 342 | 'tag_list': tag_list, 343 | 'reserved_size': len(inter_tag_list[RESERVED_INDEX]), 344 | 'point_size': point_size, 345 | 'point_format': POINT_FORMAT % len(tag_list), 346 | 'archive_list': archives, 347 | } 348 | return info 349 | 350 | @staticmethod 351 | def add_tag(tag, path, pos_idx): 352 | with open(path, 'r+b') as fh: 353 | header_info = Storage.header(fh) 354 | tag_list = header_info['tag_list'] 355 | reserved_size = header_info['reserved_size'] 356 | 357 | archive_list = [(a['sec_per_point'], a['count']) 358 | for a in header_info['archive_list']] 359 | agg_name = Agg.get_agg_name(header_info['agg_id']) 360 | 361 | if len(tag) <= len(tag_list[pos_idx]) + reserved_size: 362 | diff = len(tag_list[pos_idx]) + reserved_size - len(tag) 363 | tag_list[pos_idx] = tag 364 | inter_tag_list = tag_list + ['N' * diff] 365 | packed_header, _ = Storage.pack_header( 366 | inter_tag_list, archive_list, header_info['x_files_factor'], agg_name) 367 | fh.write(packed_header) 368 | else: 369 | tag_list[pos_idx] = tag 370 | inter_tag_list = tag_list + [''] 371 | packed_header, _ = Storage.pack_header( 372 | inter_tag_list, archive_list, header_info['x_files_factor'], agg_name) 373 | tmpfile = path + '.tmp' 374 | with open(tmpfile, 'wb') as fh_tmp: 375 | fh_tmp.write(packed_header) 376 | fh.seek(header_info['archive_list'][0]['offset']) 377 | while True: 378 | bytes = fh.read(CHUNK_SIZE) 379 | if not bytes: 380 | break 381 | fh_tmp.write(bytes) 382 | os.rename(tmpfile, path) 383 | 384 | def update(self, path, points, now=None, mtime=None): 385 | # order points by timestamp, newest first 386 | points.sort(key=operator.itemgetter(0), reverse=True) 387 | mtime = mtime or int(os.stat(path).st_mtime) 388 | with open(path, 'r+b') as f: 389 | header = self.header(f) 390 | if now is None: 391 | now = int(time.time()) 392 | archive_list = header['archive_list'] 393 | i = 0 394 | curr_archive = archive_list[i] 395 | curr_points = [] 396 | 397 | for point in points: 398 | age = now - point[0] 399 | 400 | while age > curr_archive['retention']: 401 | # we can't fit any more points in archive i 402 | if curr_points: 403 | timestamp_range = (min(mtime, curr_points[-1][0]), 404 | curr_points[0][0]) 405 | self._update_archive(f, header, curr_archive, 406 | curr_points, i, timestamp_range) 407 | curr_points = [] 408 | try: 409 | curr_archive = archive_list[i+1] 410 | i += 1 411 | except IndexError: 412 | curr_archive = None 413 | break 414 | 415 | if not curr_archive: 416 | # drop remaining points that don't fit in the database 417 | break 418 | 419 | curr_points.append(point) 420 | 421 | if curr_archive and curr_points: 422 | timestamp_range = (min(mtime, curr_points[-1][0]), 423 | curr_points[0][0]) 424 | self._update_archive(f, header, curr_archive, curr_points, i, 425 | timestamp_range) 426 | 427 | def _update_archive(self, fh, header, archive, points, archive_idx, timestamp_range): 428 | step = archive['sec_per_point'] 429 | aligned_points = sorted((p[0] - (p[0] % step), p[1]) 430 | for p in points if p) 431 | if not aligned_points: 432 | return 433 | 434 | # create a packed string for each contiguous sequence of points 435 | point_format = header['point_format'] 436 | packed_strings = [] 437 | curr_strings = [] 438 | previous_ts = None 439 | len_aligned_points = len(aligned_points) 440 | for i in xrange(0, len_aligned_points): 441 | # take last val of duplicates 442 | if (i+1 < len_aligned_points and 443 | aligned_points[i][0] == aligned_points[i+1][0]): 444 | continue 445 | (ts, val) = aligned_points[i] 446 | packed_str = struct.pack(point_format, ts, *val) 447 | if (not previous_ts) or (ts == previous_ts + step): 448 | curr_strings.append(packed_str) 449 | else: 450 | start_ts = previous_ts - (step * (len(curr_strings) - 1)) 451 | packed_strings.append((start_ts, ''.join(curr_strings))) 452 | curr_strings = [packed_str] 453 | previous_ts = ts 454 | 455 | if curr_strings: 456 | start_ts = previous_ts - (step * (len(curr_strings) - 1)) 457 | packed_strings.append((start_ts, ''.join(curr_strings))) 458 | 459 | # read base point and determine where our writes will start 460 | base_point = self._read_base_point(fh, archive, header) 461 | base_ts = base_point[0] 462 | 463 | first_ts = aligned_points[0][0] 464 | if base_ts == 0: 465 | # this file's first update, so set it to first timestamp 466 | base_ts = first_ts 467 | 468 | # write all of our packed strings in locations 469 | # determined by base_ts 470 | archive_end = archive['offset'] + archive['size'] 471 | for (ts, packed_str) in packed_strings: 472 | offset = self._timestamp2offset(ts, base_ts, header, archive) 473 | bytes_beyond = (offset + len(packed_str)) - archive_end 474 | fh.seek(offset) 475 | if bytes_beyond > 0: 476 | fh.write(packed_str[:-bytes_beyond]) 477 | fh.seek(archive['offset']) 478 | fh.write(packed_str[-bytes_beyond:]) 479 | else: 480 | fh.write(packed_str) 481 | 482 | # now we propagate the updates to lower-precision archives 483 | archive_list = header['archive_list'] 484 | next_archive_idx = archive_idx + 1 485 | if next_archive_idx < len(archive_list): 486 | # update timestamp_range 487 | time_start, time_end = timestamp_range 488 | time_end = max(time_end, aligned_points[-1][0]) 489 | time_start = min(time_start, aligned_points[0][0]) 490 | timestamp_range = (time_start, time_end) 491 | self._propagate(fh, header, archive, archive_list[next_archive_idx], 492 | timestamp_range, next_archive_idx) 493 | 494 | def _read_base_point(self, fh, archive, header): 495 | fh.seek(archive['offset']) 496 | base_point = fh.read(header['point_size']) 497 | return struct.unpack(header['point_format'], base_point) 498 | 499 | def _timestamp2offset(self, ts, base_ts, header, archive): 500 | time_distance = ts - base_ts 501 | point_distince = time_distance / archive['sec_per_point'] 502 | byte_distince = point_distince * header['point_size'] 503 | return archive['offset'] + (byte_distince % archive['size']) 504 | 505 | @staticmethod 506 | def get_propagate_timeunit(low_sec_per_point, high_sec_per_point, xff): 507 | num_point = low_sec_per_point / high_sec_per_point 508 | return int(math.ceil(num_point * xff)) * high_sec_per_point 509 | 510 | def _propagate(self, fh, header, higher, lower, timestamp_range, lower_idx): 511 | """ 512 | propagte update to low precision archives. 513 | """ 514 | from_time, until_time = timestamp_range 515 | timeunit = Storage.get_propagate_timeunit(lower['sec_per_point'], 516 | higher['sec_per_point'], 517 | header['x_files_factor']) 518 | from_time_boundary = from_time / timeunit 519 | until_time_boundary = until_time / timeunit 520 | if (from_time_boundary == until_time_boundary) and (from_time % timeunit) != 0: 521 | return False 522 | 523 | if lower['sec_per_point'] <= timeunit: 524 | lower_interval_end = until_time_boundary * timeunit 525 | lower_interval_start = min(lower_interval_end-timeunit, from_time_boundary*timeunit) 526 | else: 527 | lower_interval_end = roundup(until_time, lower['sec_per_point']) 528 | lower_interval_start = from_time - from_time % lower['sec_per_point'] 529 | 530 | fh.seek(higher['offset']) 531 | packed_base_interval = fh.read(LONG_SIZE) 532 | higher_base_interval = struct.unpack(LONG_FORMAT, packed_base_interval)[0] 533 | 534 | if higher_base_interval == 0: 535 | higher_first_offset = higher['offset'] 536 | else: 537 | higher_first_offset = self._timestamp2offset(lower_interval_start, 538 | higher_base_interval, 539 | header, 540 | higher) 541 | 542 | higher_point_num = (lower_interval_end - lower_interval_start) / higher['sec_per_point'] 543 | higher_size = higher_point_num * header['point_size'] 544 | relative_first_offset = higher_first_offset - higher['offset'] 545 | relative_last_offset = (relative_first_offset + higher_size) % higher['size'] 546 | higher_last_offset = relative_last_offset + higher['offset'] 547 | 548 | # get unpacked series str 549 | # TODO: abstract this to a function 550 | fh.seek(higher_first_offset) 551 | if higher_first_offset < higher_last_offset: 552 | series_str = fh.read(higher_last_offset - higher_first_offset) 553 | else: 554 | higher_end = higher['offset'] + higher['size'] 555 | series_str = fh.read(higher_end - higher_first_offset) 556 | fh.seek(higher['offset']) 557 | series_str += fh.read(higher_last_offset - higher['offset']) 558 | 559 | # now we unpack the series data we just read 560 | point_format = header['point_format'] 561 | byte_order, point_type = point_format[0], point_format[1:] 562 | point_num = len(series_str) / header['point_size'] 563 | # assert point_num == higher_point_num 564 | series_format = byte_order + (point_type * point_num) 565 | unpacked_series = struct.unpack(series_format, series_str) 566 | 567 | # and finally we construct a list of values 568 | point_cnt = (lower_interval_end - lower_interval_start) / lower['sec_per_point'] 569 | tag_cnt = len(header['tag_list']) 570 | agg_cnt = lower['sec_per_point'] / higher['sec_per_point'] 571 | step = (tag_cnt + 1) * agg_cnt 572 | lower_points = [None] * point_cnt 573 | 574 | unpacked_series = unpacked_series[::-1] 575 | ts = lower_interval_end 576 | for i in xrange(0, len(unpacked_series), step): 577 | higher_points = unpacked_series[i: i+step] 578 | ts -= higher['sec_per_point'] * agg_cnt 579 | agg_value = self._get_agg_value(higher_points, tag_cnt, header['agg_id'], 580 | lower_interval_start, lower_interval_end) 581 | lower_points[i/step] = (ts, agg_value) 582 | 583 | lower_points = [x for x in lower_points if x and x[0]] # filter zero item 584 | timestamp_range = (lower_interval_start, max(lower_interval_end, until_time)) 585 | self._update_archive(fh, header, lower, lower_points, lower_idx, 586 | timestamp_range) 587 | 588 | def _get_agg_value(self, higher_points, tag_cnt, agg_id, ts_start, ts_end): 589 | higher_points = higher_points[::-1] 590 | agg_func = Agg.get_agg_func(agg_id) 591 | step = tag_cnt + 1 592 | 593 | # points format: 594 | # t1 v11 v12, 595 | # t2 v21 v22, 596 | # t3 v31 v32, 597 | points = [higher_points[i: i+step] 598 | for i in xrange(0, len(higher_points), step)] 599 | valid_points = self.filter_points_by_time(points, ts_start, ts_end) 600 | if not valid_points: 601 | val = [NULL_VALUE] * tag_cnt 602 | else: 603 | points = np.array(valid_points) 604 | points = points.transpose() 605 | val = [agg_func(self.filter_values(x)) for x in points[1:]] 606 | return val 607 | 608 | @staticmethod 609 | def filter_points_by_time(points, ts_start, ts_end): 610 | return [p for p in points if ts_start <= p[0] < ts_end] 611 | 612 | @staticmethod 613 | def filter_values(points): 614 | rs = [p for p in points if p != NULL_VALUE] 615 | return rs if rs else [NULL_VALUE] 616 | 617 | def fetch(self, path, from_time, until_time=None, now=None): 618 | with open(path, 'rb') as f: 619 | header = self.header(f) 620 | 621 | # validate timestamp 622 | if now is None: 623 | now = int(time.time()) 624 | if until_time is None: 625 | until_time = now 626 | if from_time >= until_time: 627 | raise InvalidTime("from_time '%s' is after unitl_time '%s'" % 628 | (from_time, until_time)) 629 | 630 | oldest_time = now - header['max_retention'] 631 | if from_time > now: 632 | return None 633 | if until_time < oldest_time: 634 | return None 635 | 636 | until_time = min(now, until_time) 637 | from_time = max(oldest_time, from_time) 638 | 639 | diff = now - from_time 640 | for archive in header['archive_list']: 641 | if archive['retention'] >= diff: 642 | break 643 | 644 | return self._archive_fetch(f, header, archive, from_time, until_time) 645 | 646 | def _archive_fetch(self, fh, header, archive, from_time, until_time): 647 | from_time = roundup(from_time, archive['sec_per_point']) 648 | until_time = roundup(until_time, archive['sec_per_point']) 649 | tag_cnt = len(header['tag_list']) 650 | null_point = (None,) * tag_cnt 651 | 652 | base_point = self._read_base_point(fh, archive, header) 653 | base_ts = base_point[0] 654 | 655 | if base_ts == 0: 656 | step = archive['sec_per_point'] 657 | cnt = (until_time - from_time) / step 658 | time_info = (from_time, until_time, step) 659 | val_list = [null_point] * cnt 660 | return (header, time_info, val_list) 661 | 662 | from_offset = self._timestamp2offset(from_time, base_ts, header, archive) 663 | until_offset = self._timestamp2offset(until_time, base_ts, header, archive) 664 | 665 | fh.seek(from_offset) 666 | if from_offset < until_offset: 667 | series_str = fh.read(until_offset - from_offset) 668 | else: 669 | archive_end = archive['offset'] + archive['size'] 670 | series_str = fh.read(archive_end - from_offset) 671 | fh.seek(archive['offset']) 672 | series_str += fh.read(until_offset - archive['offset']) 673 | 674 | ## unpack series string 675 | point_format = header['point_format'] 676 | byte_order, point_type = point_format[0], point_format[1:] 677 | cnt = len(series_str) / header['point_size'] 678 | series_format = byte_order + point_type * cnt 679 | unpacked_series = struct.unpack(series_format, series_str) 680 | 681 | ## construct value list 682 | # pre-allocate entire list or speed 683 | val_list = [null_point] * cnt 684 | step = tag_cnt + 1 685 | sec_per_point = archive['sec_per_point'] 686 | for i in xrange(0, len(unpacked_series), step): 687 | point_ts = unpacked_series[i] 688 | if from_time <= point_ts < until_time: 689 | val = unpacked_series[i+1: i+step] 690 | idx = (point_ts - from_time) / sec_per_point 691 | val_list[idx] = self._conver_null_value(val) 692 | 693 | time_info = (from_time, until_time, sec_per_point) 694 | return header, time_info, val_list 695 | 696 | @staticmethod 697 | def _conver_null_value(point_val): 698 | val = [None if x == NULL_VALUE else x 699 | for x in point_val] 700 | return tuple(val) 701 | -------------------------------------------------------------------------------- /kenshin/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/kenshin/tools/__init__.py -------------------------------------------------------------------------------- /kenshin/tools/hash.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from rurouni.fnv1a import get_int32_hash 3 | 4 | class Hash: 5 | def __init__(self, nodes): 6 | self.nodes = nodes 7 | 8 | def add_node(self, node): 9 | self.nodes.append(node) 10 | 11 | def remove_code(self, node): 12 | self.nodes.remove(node) 13 | 14 | def get_node(self, key): 15 | idx = get_int32_hash(key) % len(self.nodes) 16 | return self.nodes[idx] 17 | 18 | def get_nodes(self, key): 19 | idx = get_int32_hash(key) % len(self.nodes) 20 | return self.nodes[idx:] + self.nodes[:idx] 21 | -------------------------------------------------------------------------------- /kenshin/tools/whisper_tool.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import re 4 | import struct 5 | 6 | import kenshin 7 | from rurouni.conf import OrderedConfigParser 8 | 9 | 10 | longFormat = "!L" 11 | longSize = struct.calcsize(longFormat) 12 | floatFormat = "!f" 13 | floatSize = struct.calcsize(floatFormat) 14 | valueFormat = "!d" 15 | valueSize = struct.calcsize(valueFormat) 16 | pointFormat = "!Ld" 17 | pointSize = struct.calcsize(pointFormat) 18 | metadataFormat = "!2LfL" 19 | metadataSize = struct.calcsize(metadataFormat) 20 | archiveInfoFormat = "!3L" 21 | archiveInfoSize = struct.calcsize(archiveInfoFormat) 22 | 23 | agg_type_dict = dict({ 24 | 1: 'average', 25 | 2: 'sum', 26 | 3: 'last', 27 | 4: 'max', 28 | 5: 'min' 29 | }) 30 | 31 | 32 | def get_agg_name(agg_id): 33 | return agg_type_dict[agg_id] 34 | 35 | 36 | def remote_url(filepath): 37 | return filepath.startswith('http://') 38 | 39 | 40 | def read_header(fh): 41 | packed_meta = fh.read(metadataSize) 42 | agg_type, max_ret, xff, archive_cnt = struct.unpack( 43 | metadataFormat, packed_meta) 44 | 45 | archives = [] 46 | for i in xrange(archive_cnt): 47 | packed_archive_info = fh.read(archiveInfoSize) 48 | off, sec, cnt = struct.unpack(archiveInfoFormat, packed_archive_info) 49 | archive_info = { 50 | 'offset': off, 51 | 'sec_per_point': sec, 52 | 'count': cnt, 53 | 'size': pointSize * cnt, 54 | 'retention': sec * cnt, 55 | } 56 | archives.append(archive_info) 57 | 58 | info = { 59 | 'xff': xff, 60 | 'archives': archives, 61 | 'agg_type': agg_type, 62 | } 63 | fh.close() 64 | return info 65 | 66 | 67 | ### schema (copy from carbon with some small change) 68 | 69 | class Schema: 70 | def match(self, metric): 71 | raise NotImplementedError() 72 | 73 | 74 | class DefaultSchema(Schema): 75 | def __init__(self, name, archives): 76 | self.name = name 77 | self.archives = archives 78 | 79 | def match(self, metric): 80 | return True 81 | 82 | 83 | class PatternSchema(Schema): 84 | def __init__(self, name, pattern, archives): 85 | self.name = name 86 | self.pattern = pattern 87 | self.regex = re.compile(pattern) 88 | self.archives = archives 89 | 90 | def match(self, metric): 91 | return self.regex.search(metric) 92 | 93 | class Archive: 94 | def __init__(self, secondsPerPoint, points): 95 | self.secondsPerPoint = int(secondsPerPoint) 96 | self.points = int(points) 97 | 98 | def __str__(self): 99 | return "Archive = (Seconds per point: %d, Datapoints to save: %d)" % ( 100 | self.secondsPerPoint, self.points) 101 | 102 | def getTuple(self): 103 | return (self.secondsPerPoint, self.points) 104 | 105 | @staticmethod 106 | def fromString(retentionDef): 107 | rs = kenshin.parse_retention_def(retentionDef) 108 | return Archive(*rs) 109 | 110 | 111 | def loadStorageSchemas(storage_schemas_conf): 112 | schemaList = [] 113 | config = OrderedConfigParser() 114 | config.read(storage_schemas_conf) 115 | 116 | for section in config.sections(): 117 | options = dict(config.items(section)) 118 | pattern = options.get('pattern') 119 | 120 | retentions = options['retentions'].split(',') 121 | archives = [Archive.fromString(s).getTuple() for s in retentions] 122 | 123 | mySchema = PatternSchema(section, pattern, archives) 124 | schemaList.append(mySchema) 125 | 126 | schemaList.append(defaultSchema) 127 | return schemaList 128 | 129 | 130 | def loadAggregationSchemas(storage_aggregation_conf): 131 | schemaList = [] 132 | config = OrderedConfigParser() 133 | config.read(storage_aggregation_conf) 134 | 135 | for section in config.sections(): 136 | options = dict(config.items(section)) 137 | pattern = options.get('pattern') 138 | aggregationMethod = options.get('aggregationmethod') 139 | archives = aggregationMethod 140 | mySchema = PatternSchema(section, pattern, archives) 141 | schemaList.append(mySchema) 142 | 143 | schemaList.append(defaultAggregation) 144 | return schemaList 145 | 146 | defaultArchive = Archive(60, 60 * 24 * 7) # default retention for unclassified data (7 days of minutely data) 147 | defaultSchema = DefaultSchema('default', [defaultArchive]) 148 | defaultAggregation = DefaultSchema('default', (None, None)) 149 | 150 | 151 | class NewSchema(Schema): 152 | def __init__(self, name, archives, aggregationMethod): 153 | self.name = name 154 | self.archives = archives 155 | self.aggregationMethod = aggregationMethod 156 | 157 | 158 | def gen_whisper_schema_func(whisper_conf_dir): 159 | storage_schemas_conf = os.path.join(whisper_conf_dir, 'storage-schemas.conf') 160 | storage_aggregation_conf = os.path.join(whisper_conf_dir, 'storage-aggregation.conf') 161 | storage_schemas = loadStorageSchemas(storage_schemas_conf) 162 | storage_aggs = loadAggregationSchemas(storage_aggregation_conf) 163 | 164 | def get_schema(schemas, metric): 165 | for schema in schemas: 166 | if schema.match(metric): 167 | return schema 168 | 169 | def _(metric): 170 | schema = get_schema(storage_schemas, metric) 171 | agg = get_schema(storage_aggs, metric) 172 | return NewSchema(schema.name, schema.archives, agg.archives) 173 | return _ 174 | -------------------------------------------------------------------------------- /kenshin/utils.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import os 4 | import errno 5 | 6 | 7 | def get_metric(path): 8 | import re 9 | abspath = os.path.abspath(path) 10 | realpath = os.path.realpath(path) 11 | metric = None 12 | if abspath != realpath: 13 | try: 14 | metric = re.split('/link/[a-z0-9]+/', abspath)[1] 15 | metric = metric[:-3] # remove .hs 16 | metric = metric.replace('/', '.') 17 | except IndexError: 18 | pass 19 | return metric 20 | 21 | 22 | def mkdir_p(path): 23 | try: 24 | os.makedirs(path) 25 | except OSError as exc: 26 | if exc.errno == errno.EEXIST and os.path.isdir(path): 27 | pass 28 | else: 29 | raise 30 | 31 | 32 | def roundup(x, base): 33 | """ 34 | Roundup to nearest multiple of `base`. 35 | 36 | >>> roundup(21, 10) 37 | 30 38 | >>> roundup(20, 10) 39 | 20 40 | >>> roundup(19, 10) 41 | 20 42 | """ 43 | t = x % base 44 | return (x - t + base) if t else x 45 | 46 | 47 | if __name__ == '__main__': 48 | import doctest 49 | doctest.testmod() 50 | -------------------------------------------------------------------------------- /misc/init_setup_demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PWD=`pwd` 4 | 5 | # init config 6 | 7 | cp conf/storage-schemas.conf.example conf/storage-schemas.conf 8 | cp conf/rurouni.conf.example conf/rurouni.conf 9 | sed -i".bak" 's?/data/kenshin?'$PWD'?g' conf/rurouni.conf 10 | 11 | # init storage directory 12 | 13 | mkdir -p $PWD/storage/data 14 | mkdir -p $PWD/storage/link 15 | mkdir -p $PWD/storage/log 16 | mkdir -p $PWD/storage/run 17 | touch $PWD/storage/index 18 | -------------------------------------------------------------------------------- /misc/update_version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # ln -sf ../../misc/update_version.sh .git/hooks/pre-commit 3 | VERSIONING_SCRIPT="`pwd`/misc/versioning.py" 4 | VERSIONING_FILE="`pwd`/kenshin/__init__.py" 5 | TMPFILE=$VERSIONING_FILE".tmp" 6 | cat $VERSIONING_FILE | python $VERSIONING_SCRIPT --clean | python $VERSIONING_SCRIPT > $TMPFILE 7 | mv $TMPFILE $VERSIONING_FILE 8 | git add $VERSIONING_FILE -------------------------------------------------------------------------------- /misc/versioning.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # from: https://gist.githubusercontent.com/pkrusche/7369262/raw/5bf2dc8afb88d3fdde7be6d16ee4290db6735f37/versioning.py 3 | 4 | """ Git Versioning Script 5 | 6 | Will transform stdin to expand some keywords with git version/author/date information. 7 | 8 | Specify --clean to remove this information before commit. 9 | 10 | Setup: 11 | 12 | 1. Copy versioning.py into your git repository 13 | 14 | 2. Run: 15 | 16 | git config filter.versioning.smudge 'python versioning.py' 17 | git config filter.versioning.clean 'python versioning.py --clean' 18 | echo 'version.py filter=versioning' >> .gitattributes 19 | git add versioning.py 20 | 21 | 22 | 3. add a version.py file with this contents: 23 | 24 | __commit__ = "" 25 | __author__ = "" 26 | __email__ = "" 27 | __date__ = "" 28 | 29 | """ 30 | 31 | import sys 32 | import subprocess 33 | import re 34 | 35 | 36 | def main(): 37 | clean = False 38 | if len(sys.argv) > 1: 39 | if sys.argv[1] == '--clean': 40 | clean = True 41 | 42 | # initialise empty here. Otherwise: forkbomb through the git calls. 43 | subst_list = { 44 | "commit": "", 45 | "date": "", 46 | "author": "", 47 | "email": "" 48 | } 49 | 50 | for line in sys.stdin: 51 | if not clean: 52 | subst_list = { 53 | "commit": subprocess.check_output(['git', 'describe', '--always']), 54 | "date": subprocess.check_output(['git', 'log', '--pretty=format:"%ad"', '-1']), 55 | "author": subprocess.check_output(['git', 'log', '--pretty=format:"%an"', '-1']), 56 | "email": subprocess.check_output(['git', 'log', '--pretty=format:"%ae"', '-1']) 57 | } 58 | for k, v in subst_list.iteritems(): 59 | v = re.sub(r'[\n\r\t"\']', "", v) 60 | rexp = "__%s__\s*=[\s'\"]+" % k 61 | line = re.sub(rexp, "__%s__ = \"%s\"\n" % (k, v), line) 62 | sys.stdout.write(line) 63 | else: 64 | for k in subst_list: 65 | rexp = "__%s__\s*=.*" % k 66 | line = re.sub(rexp, "__%s__ = \"\"" % k, line) 67 | sys.stdout.write(line) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | nose==1.3.7 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython 2 | numpy==1.8.0 3 | zope.interface==4.1.1 4 | Twisted==13.1 5 | -------------------------------------------------------------------------------- /rurouni/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/rurouni/__init__.py -------------------------------------------------------------------------------- /rurouni/cache.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """ 4 | Copyright 2015-Present Douban Inc. 5 | Copyright 2009 Chris Davis 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | """ 16 | 17 | import os 18 | import time 19 | from threading import Lock 20 | 21 | import kenshin 22 | from kenshin.consts import NULL_VALUE 23 | from rurouni import log 24 | from rurouni.conf import settings 25 | from rurouni.storage import ( 26 | getFilePath, createLink, StorageSchemas, rebuildIndex, rebuildLink 27 | ) 28 | from rurouni.utils import TokenBucket, get_instance_of_metric 29 | from rurouni.exceptions import TokenBucketFull, UnexpectedMetric 30 | 31 | 32 | class MetricCache(object): 33 | """ 34 | (schema, file_idx, pos_idx) 35 | """ 36 | def __init__(self): 37 | self.lock = Lock() 38 | self.metric_idxs = {} 39 | self.schema_caches = {} 40 | self.metrics_fh = None 41 | self.storage_schemas = None 42 | self.token_bucket = None 43 | 44 | def __del__(self): 45 | if self.metrics_fh is not None: 46 | self.metrics_fh.close() 47 | 48 | def init(self): 49 | with self.lock: 50 | index_file = settings.INDEX_FILE 51 | instance_data_dir = os.path.join( 52 | settings.LOCAL_DATA_DIR, settings.instance) 53 | instance_link_dir = os.path.join( 54 | settings.LOCAL_LINK_DIR, settings.instance) 55 | 56 | if os.path.exists(instance_data_dir): 57 | if not os.path.exists(index_file): 58 | rebuildIndex(instance_data_dir, index_file) 59 | if not os.path.exists(instance_link_dir): 60 | rebuildLink(instance_data_dir, instance_link_dir) 61 | 62 | # init token bucket 63 | capacity = settings.MAX_CREATES_PER_MINUTE 64 | fill_rate = float(capacity) / 60 65 | self.token_bucket = TokenBucket(capacity, fill_rate) 66 | 67 | self._initCache(index_file) 68 | 69 | def _initCache(self, index_file): 70 | # avoid repeated call 71 | if self.metrics_fh is not None: 72 | return 73 | 74 | self._initStorageSchemas() 75 | if os.path.exists(index_file): 76 | MAX_ALLOW_ERR_LINE = 1 77 | err_line_cnt = 0 78 | with open(index_file) as f: 79 | for line in f: 80 | line = line.strip('\n') 81 | try: 82 | metric, schema_name, file_idx, file_pos = line.split(" ") 83 | file_idx = int(file_idx) 84 | file_pos = int(file_pos) 85 | except Exception as e: 86 | if err_line_cnt < MAX_ALLOW_ERR_LINE: 87 | err_line_cnt += 1 88 | continue 89 | else: 90 | raise Exception('Index file has many error: %s' % e) 91 | 92 | schema = self.storage_schemas.getSchemaByName(schema_name) 93 | schema_cache = self.getSchemaCache(schema) 94 | schema_cache.add(schema, file_idx, file_pos) 95 | self.metric_idxs[metric] = (schema.name, file_idx, file_pos) 96 | 97 | self.metrics_fh = open(index_file, 'a') 98 | 99 | def _initStorageSchemas(self): 100 | if self.storage_schemas is None: 101 | conf_file = os.path.join(settings.CONF_DIR, 'storage-schemas.conf') 102 | self.storage_schemas = StorageSchemas(conf_file) 103 | 104 | def put(self, metric, datapoint): 105 | try: 106 | (schema_name, file_idx, pos_idx) = self.getMetricIdx(metric) 107 | except (TokenBucketFull, UnexpectedMetric): 108 | return 109 | file_cache = self.schema_caches[schema_name][file_idx] 110 | file_cache.put(pos_idx, datapoint) 111 | 112 | def getMetricIdx(self, metric): 113 | with self.lock: 114 | if metric in self.metric_idxs: 115 | return self.metric_idxs[metric] 116 | else: 117 | from rurouni.state import instrumentation 118 | 119 | if not self.token_bucket.consume(1): 120 | instrumentation.incr('droppedCreates') 121 | raise TokenBucketFull() 122 | 123 | instance = get_instance_of_metric(metric, settings['NUM_ALL_INSTANCE']) 124 | if (instance != int(settings['instance']) and 125 | not metric.startswith(settings.RUROUNI_METRIC)): 126 | log.cache("UnexpectedMetric: %s" % metric) 127 | instrumentation.incr('droppedCreates') 128 | raise UnexpectedMetric() 129 | 130 | instrumentation.incr('creates') 131 | 132 | schema = self.storage_schemas.getSchemaByMetric(metric) 133 | schema_cache = self.getSchemaCache(schema) 134 | file_idx = schema_cache.getFileCacheIdx(schema) 135 | pos_idx = schema_cache[file_idx].getPosIdx() 136 | 137 | # create file 138 | file_path = getFilePath(schema.name, file_idx) 139 | if not os.path.exists(file_path): 140 | tags = [''] * schema.metrics_max_num 141 | kenshin.create(file_path, tags, schema.archives, schema.xFilesFactor, 142 | schema.aggregationMethod) 143 | # update file metadata 144 | kenshin.add_tag(metric, file_path, pos_idx) 145 | # create link 146 | createLink(metric, file_path) 147 | # create index 148 | self.metrics_fh.write("%s %s %s %s\n" % (metric, schema.name, file_idx, pos_idx)) 149 | 150 | self.metric_idxs[metric] = (schema.name, file_idx, pos_idx) 151 | return self.metric_idxs[metric] 152 | 153 | def getSchemaCache(self, schema): 154 | try: 155 | return self.schema_caches[schema.name] 156 | except: 157 | schema_cache = SchemaCache() 158 | self.schema_caches[schema.name] = schema_cache 159 | return schema_cache 160 | 161 | def get(self, metric): 162 | if metric not in self.metric_idxs: 163 | return [] 164 | (schema_name, file_idx, pos_idx) = self.metric_idxs[metric] 165 | file_cache = self.schema_caches[schema_name][file_idx] 166 | now = int(time.time()) 167 | data = file_cache.get(end_ts=now) 168 | return [(ts, val[pos_idx]) for ts, val in data 169 | if val[pos_idx] != NULL_VALUE] 170 | 171 | def pop(self, schema_name, file_idx, end_ts=None, clear=True): 172 | file_cache = self.schema_caches[schema_name][file_idx] 173 | datapoints = file_cache.get(end_ts=end_ts, clear=clear) 174 | return datapoints 175 | 176 | def writableFileCaches(self): 177 | now = int(time.time()) 178 | with self.lock: 179 | return[(schema_name, file_idx) 180 | for (schema_name, schema_cache) in self.schema_caches.items() 181 | for file_idx in range(schema_cache.size()) 182 | if schema_cache[file_idx].canWrite(now)] 183 | 184 | def getAllFileCaches(self): 185 | return [(schema_name, file_idx) 186 | for (schema_name, schema_cache) in self.schema_caches.iteritems() 187 | for file_idx in range(schema_cache.size())] 188 | 189 | 190 | class SchemaCache(object): 191 | def __init__(self): 192 | self.file_caches = [] 193 | self.curr_idx = 0 194 | 195 | def __getitem__(self, idx): 196 | return self.file_caches[idx] 197 | 198 | def size(self): 199 | return len(self.file_caches) 200 | 201 | def getFileCacheIdx(self, schema): 202 | while self.curr_idx < len(self.file_caches): 203 | if not self.file_caches[self.curr_idx].metricFull(): 204 | return self.curr_idx 205 | else: 206 | self.curr_idx += 1 207 | # there is no file cache avaiable, we create a new one 208 | cache = FileCache(schema) 209 | self.file_caches.append(cache) 210 | return self.curr_idx 211 | 212 | def add(self, schema, file_idx, file_pos): 213 | if len(self.file_caches) <= file_idx: 214 | for _ in range(len(self.file_caches), file_idx + 1): 215 | self.file_caches.append(FileCache(schema)) 216 | self.file_caches[file_idx].add(file_pos) 217 | 218 | 219 | class FileCache(object): 220 | def __init__(self, schema): 221 | self.lock = Lock() 222 | self.metrics_max_num = schema.metrics_max_num 223 | self.bitmap = 0 224 | self.avaiable_pos_idx = 0 225 | self.resolution = schema.archives[0][0] 226 | self.retention = schema.cache_retention 227 | 228 | # +1 to avoid self.points_num == 0 229 | self.points_num = self.retention / self.resolution + 1 230 | self.cache_size = int(self.points_num * schema.cache_ratio) 231 | self.points = [NULL_VALUE] * self.metrics_max_num * self.cache_size 232 | self.base_idxs = [i * self.cache_size for i in xrange(self.metrics_max_num)] 233 | 234 | self.start_ts = None 235 | self.max_ts = 0 236 | self.start_offset = 0 237 | 238 | def add(self, file_pos): 239 | with self.lock: 240 | self.bitmap |= (1 << file_pos) 241 | 242 | def getPosIdx(self): 243 | with self.lock: 244 | while True: 245 | if self.bitmap & (1 << self.avaiable_pos_idx): 246 | self.avaiable_pos_idx += 1 247 | else: 248 | self.bitmap |= (1 << self.avaiable_pos_idx) 249 | self.avaiable_pos_idx += 1 250 | return self.avaiable_pos_idx - 1 251 | 252 | def metricFull(self): 253 | with self.lock: 254 | return self.bitmap + 1 == (1 << self.metrics_max_num) 255 | 256 | def metricEmpty(self): 257 | return not self.start_ts 258 | 259 | def canWrite(self, now): 260 | with self.lock: 261 | return self.start_ts and ((now - self.start_ts - self.retention) >= 262 | settings.DEFAULT_WAIT_TIME) 263 | 264 | def put(self, pos_idx, datapoint): 265 | with self.lock: 266 | try: 267 | base_idx = self.base_idxs[pos_idx] 268 | ts, val = datapoint 269 | 270 | self.max_ts = max(self.max_ts, ts) 271 | if self.start_ts is None: 272 | self.start_ts = ts - ts % self.resolution 273 | idx = base_idx 274 | else: 275 | offset = (ts - self.start_ts) / self.resolution 276 | idx = base_idx + (self.start_offset + offset) % self.cache_size 277 | 278 | self.points[idx] = val 279 | except Exception as e: 280 | log.err('put error in FileCache: %s' % e) 281 | 282 | def get_offset(self, ts): 283 | interval = (ts - self.start_ts) / self.resolution 284 | if interval >= self.cache_size: 285 | interval = self.cache_size - 1 286 | return (self.start_offset + interval) % self.cache_size 287 | 288 | def get(self, end_ts=None, clear=False): 289 | with self.lock: 290 | if self.metricEmpty(): 291 | return [] 292 | begin_offset = self.start_offset 293 | if end_ts: 294 | end_offset = self.get_offset(end_ts) 295 | else: 296 | end_offset = (begin_offset + self.points_num) % self.cache_size 297 | 298 | rs = [None] * self.metrics_max_num 299 | if begin_offset < end_offset: 300 | length = end_offset - begin_offset 301 | for i, base_idx in enumerate(self.base_idxs): 302 | begin_idx = base_idx + begin_offset 303 | end_idx = base_idx + end_offset 304 | val = self.points[begin_idx: end_idx] 305 | rs[i] = val 306 | if clear: 307 | self.clearPoint(begin_idx, end_idx) 308 | else: 309 | # wrap around 310 | length = self.cache_size - begin_offset + end_offset 311 | for i, base_idx in enumerate(self.base_idxs): 312 | begin_idx = base_idx + begin_offset 313 | end_idx = base_idx + end_offset 314 | val = self.points[begin_idx: base_idx+self.cache_size] 315 | val += self.points[base_idx: begin_idx] 316 | rs[i] = val 317 | if clear: 318 | self.clearPoint(begin_idx, base_idx+self.cache_size) 319 | self.clearPoint(base_idx, end_idx) 320 | 321 | # timestamps 322 | timestamps = [self.start_ts + i * self.resolution 323 | for i in range(length)] 324 | 325 | if clear: 326 | next_ts = timestamps[-1] + self.resolution 327 | if self.max_ts < next_ts: 328 | self.start_ts = None 329 | self.start_offset = 0 330 | else: 331 | self.start_ts = next_ts 332 | self.start_offset = end_offset 333 | 334 | return zip(timestamps, zip(*rs)) 335 | 336 | def clearPoint(self, begin_idx, end_idx): 337 | for i in range(begin_idx, end_idx): 338 | self.points[i] = NULL_VALUE 339 | 340 | 341 | MetricCache = MetricCache() 342 | -------------------------------------------------------------------------------- /rurouni/conf.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import sys 4 | import errno 5 | from os.path import join, normpath, expanduser, dirname, exists, isdir 6 | from ConfigParser import ConfigParser 7 | from optparse import OptionParser 8 | from twisted.python import usage 9 | 10 | from rurouni.exceptions import RurouniException, ConfigException 11 | from rurouni import log 12 | 13 | 14 | defaults = dict( 15 | CACHE_QUERY_PORT = '7002', 16 | CACHE_QUERY_INTERFACE = '0.0.0.0', 17 | 18 | LINE_RECEIVER_PORT = '2003', 19 | LINE_RECEIVER_INTERFACE = '0.0.0.0', 20 | 21 | PICKLE_RECEIVER_PORT = '2004', 22 | PICKLE_RECEIVER_INTERFACE = '0.0.0.0', 23 | 24 | DEFAULT_WAIT_TIME = 10, 25 | RUROUNI_METRIC_INTERVAL = 60, 26 | RUROUNI_METRIC = 'rurouni', 27 | 28 | LOG_UPDATES = True, 29 | CONF_DIR = None, 30 | LOCAL_DATA_DIR = None, 31 | LOCAL_LINK_DIR = None, 32 | PID_DIR = None, 33 | 34 | MAX_CREATES_PER_MINUTE = float('inf'), 35 | NUM_ALL_INSTANCE = 1, 36 | ) 37 | 38 | 39 | class Settings(dict): 40 | __getattr__ = dict.__getitem__ 41 | 42 | def __init__(self): 43 | dict.__init__(self) 44 | self.update(defaults) 45 | 46 | def readFrom(self, path, section): 47 | parser = ConfigParser() 48 | if not parser.read(path): 49 | raise RurouniException("Failed to read config: %s" % path) 50 | 51 | if not parser.has_section(section): 52 | return 53 | 54 | for key, val in parser.items(section): 55 | key = key.upper() 56 | val_typ = type(defaults[key]) if key in defaults else str 57 | 58 | if val_typ is list: 59 | val = [v.strip() for v in val.split(',')] 60 | elif val_typ is bool: 61 | val = parser.getboolean(section, key) 62 | else: 63 | # attempt to figure out numeric types automatically 64 | try: 65 | val = int(val) 66 | except: 67 | try: 68 | val = float(val) 69 | except: 70 | pass 71 | self[key] = val 72 | 73 | 74 | settings = Settings() 75 | 76 | 77 | class OrderedConfigParser(ConfigParser): 78 | """ 79 | Ordered Config Parser. 80 | 81 | http://stackoverflow.com/questions/1134071/keep-configparser-output-files-sorted. 82 | 83 | Acturally, from python 2.7 the ConfigParser default dict is `OrderedDict`, 84 | So we just rewrite the read method to check config file. 85 | """ 86 | def read(self, path): 87 | if not os.access(path, os.R_OK): 88 | raise RurouniException( 89 | "Missing config file or wrong perm on %s" % path) 90 | return ConfigParser.read(self, path) 91 | 92 | 93 | class RurouniOptions(usage.Options): 94 | 95 | optFlags = [ 96 | ["debug", "", "run in debug mode."], 97 | ] 98 | 99 | optParameters = [ 100 | ['config', 'c', None, 'use the given config file.'], 101 | ['instance', '', 'a', 'manage a specific rurouni instance.'], 102 | ['logdir', '', None, 'write logs to the given directory.'], 103 | ] 104 | 105 | def postOptions(self): 106 | global settings 107 | pidfile = self.parent['pidfile'] 108 | if pidfile.endswith('twistd.pid'): 109 | pidfile = None 110 | self['pidfile'] = pidfile 111 | 112 | # Enforce a default umask of '022' if none was set. 113 | if not self.parent.has_key('umask') or self.parent['umask'] is None: 114 | self.parent['umask'] = 022 115 | 116 | program = self.parent.subCommand 117 | settings['program'] = program 118 | program_settings = read_config(program, self) 119 | settings.update(program_settings) 120 | 121 | # normalize and expand path 122 | variables = ['STORAGE_DIR', 'LOCAL_DATA_DIR', 'LOCAL_LINK_DIR', 123 | 'PID_DIR', 'LOG_DIR', 'pidfile', 'INDEX_FILE'] 124 | for var in variables: 125 | settings[var] = normpath(expanduser(settings[var])) 126 | 127 | storage_schemas = join(settings['CONF_DIR'], 'storage-schemas.conf') 128 | if not exists(storage_schemas): 129 | print 'Error missing config %s' % storage_schemas 130 | sys.exit(1) 131 | 132 | self.parent['pidfile'] = settings['pidfile'] 133 | 134 | if not 'action' in self: 135 | self['action'] = 'start' 136 | self.handleAction() 137 | 138 | if self['debug']: 139 | log.setDebugEnabled(True) 140 | else: 141 | if self.parent.get('syslog', None): 142 | log.logToSyslog(self.parent['prefix']) 143 | elif not self.parent['nodaemon']: 144 | if not isdir(settings.LOG_DIR): 145 | os.makedirs(settings.LOG_DIR) 146 | log.logToDir(settings.LOG_DIR) 147 | 148 | @staticmethod 149 | def _normpath(path): 150 | return normpath(expanduser(path)) 151 | 152 | def parseArgs(self, *action): 153 | if len(action) == 1: 154 | self["action"] = action[0] 155 | 156 | def handleAction(self): 157 | action = self['action'] 158 | pidfile = self.parent["pidfile"] 159 | program = settings['program'] 160 | instance = self['instance'] 161 | 162 | if action == 'stop': 163 | if not exists(pidfile): 164 | print 'pidfile %s does not exist' % pidfile 165 | raise SystemExit(0) 166 | with open(pidfile) as f: 167 | pid = int(f.read().strip()) 168 | print 'sending kill signal to pid %d' % pid 169 | try: 170 | os.kill(pid, 15) 171 | except OSError as e: 172 | if e.errno == errno.ESRCH: 173 | print 'no process with pid %d running' % pid 174 | else: 175 | raise 176 | raise SystemExit(0) 177 | 178 | elif action == 'start': 179 | if exists(pidfile): 180 | with open(pidfile) as f: 181 | pid = int(f.read().strip()) 182 | if _process_alive(pid): 183 | print ('%s (instance %s) is already running with pid %d' % 184 | (program, instance, pid)) 185 | raise SystemExit(1) 186 | else: 187 | print 'removing stale pidfile %s' % pidfile 188 | try: 189 | os.unlink(pidfile) 190 | except: 191 | print 'could not remove pidfile %s' % pidfile 192 | else: 193 | if not os.path.exists(settings['PID_DIR']): 194 | try: 195 | os.makedirs(settings['PID_DIR']) 196 | except OSError as e: 197 | if e.errno == errno.EEXIST and os.path.isdir(settings['PID_DIR']): 198 | pass 199 | 200 | elif action == 'status': 201 | if not exists(pidfile): 202 | print '%s (instance %s) is not running' % (program, instance) 203 | raise SystemExit(0) 204 | with open(pidfile) as f: 205 | pid = int(f.read().strip()) 206 | 207 | if _process_alive(pid): 208 | print ('%s (instance %s) is running with pid %d' % 209 | (program, instance, pid)) 210 | raise SystemExit(0) 211 | else: 212 | print "%s (instance %s) is not running" % (program, instance) 213 | raise SystemExit(1) 214 | 215 | 216 | def get_parser(usage="%prog [options] "): 217 | "Create a parser for command line options." 218 | parser = OptionParser(usage=usage) 219 | parser.add_option( 220 | "--debug", action="store_true", 221 | help="Run in the foreground, log to stdout") 222 | parser.add_option( 223 | "--nodaemon", action="store_true", 224 | help='Run in the foreground') 225 | parser.add_option( 226 | "--pidfile", default=None, 227 | help='Write pid to the given file') 228 | parser.add_option( 229 | "--umask", default=None, 230 | help="Use the given umask when creating files") 231 | parser.add_option( 232 | '--config', default=None, 233 | help="Use the given config file") 234 | parser.add_option( 235 | "--instance", default="a", 236 | help="Manage a specific rurouni instance") 237 | return parser 238 | 239 | 240 | def read_config(program, options): 241 | """ 242 | Read settings for 'program' from configuration file specified by 243 | 'options["config"]', with missing values provide by 'defaults'. 244 | """ 245 | settings = Settings() 246 | 247 | # os environ variables 248 | graphite_root = os.environ.get('GRAPHITE_ROOT') 249 | if graphite_root is None: 250 | raise ConfigException('GRAPHITE_ROOT needs to be provided.') 251 | settings['STORAGE_DIR'] = os.environ.get( 252 | 'STORAGE_DIR', join(graphite_root, 'storage')) 253 | settings['CONF_DIR'] = os.environ.get( 254 | 'CONF_DIR', join(graphite_root, 'conf')) 255 | 256 | # set default config variables 257 | settings['LOCAL_DATA_DIR'] = join(settings['STORAGE_DIR'], 'data') 258 | settings['LOCAL_LINK_DIR'] = join(settings['STORAGE_DIR'], 'link') 259 | settings['PID_DIR'] = join(settings['STORAGE_DIR'], 'run') 260 | settings['LOG_DIR'] = join(settings['STORAGE_DIR'], 'log', program) 261 | 262 | if options['config'] is None: 263 | options['config'] = join(settings['CONF_DIR'], 'rurouni.conf') 264 | else: 265 | settings['CONF_DIR'] = dirname(normpath(options['config'])) 266 | 267 | # read configuration options from program-specific section. 268 | section = program[len('rurouni-'):] 269 | config = options['config'] 270 | if not exists(config): 271 | raise ConfigException('Error: missing required config %s' % config) 272 | 273 | instance = options['instance'] 274 | if not instance.isdigit(): 275 | raise ConfigException('Error: instance must be digit %s' % instance) 276 | settings['instance'] = instance 277 | 278 | # read configuration file 279 | settings.readFrom(config, section) 280 | settings.readFrom(config, '%s:%s' % (section, instance)) 281 | 282 | # check cache instance number 283 | parser = ConfigParser() 284 | parser.read(config) 285 | prefix = 'cache:' 286 | instances = {int(s[len(prefix):]) for s in parser.sections() 287 | if s.startswith(prefix)} 288 | if settings['NUM_ALL_INSTANCE'] != len(instances) or \ 289 | settings['NUM_ALL_INSTANCE'] != max(instances) + 1: 290 | raise ConfigException( 291 | 'Error: cache instance not match NUM_ALL_INSTANCE') 292 | 293 | settings['pidfile'] = ( 294 | options['pidfile'] or 295 | join(settings['PID_DIR'], '%s-%s.pid' % (program, instance)) 296 | ) 297 | settings['LOG_DIR'] = ( 298 | options['logdir'] or 299 | join(settings['LOG_DIR'], '%s-%s' % (program, instance)) 300 | ) 301 | 302 | settings['INDEX_FILE'] = join(settings['LOCAL_DATA_DIR'], 303 | '%s.idx' % instance) 304 | return settings 305 | 306 | 307 | def _process_alive(pid): 308 | if exists('/proc'): 309 | return exists('/proc/%d' % pid) 310 | else: 311 | try: 312 | os.kill(int(pid), 0) 313 | return True 314 | except OSError as e: 315 | return e.errno == errno.EPERM 316 | -------------------------------------------------------------------------------- /rurouni/exceptions.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | 4 | class RurouniException(Exception): 5 | pass 6 | 7 | class ConfigException(RurouniException): 8 | pass 9 | 10 | class TokenBucketFull(RurouniException): 11 | pass 12 | 13 | class UnexpectedMetric(RurouniException): 14 | pass -------------------------------------------------------------------------------- /rurouni/fnv1a.pyx: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from libc.stdint cimport uint8_t, uint32_t, int32_t 4 | 5 | 6 | cdef int32_t FNV32a(void* key, int size) nogil: 7 | cdef uint8_t* p = key 8 | cdef uint32_t h = 2166136261UL 9 | 10 | for i in range(size): 11 | h ^= p[i] 12 | h *= 16777619; 13 | 14 | return h 15 | 16 | 17 | def get_int32_hash(bytes b not None): 18 | """Return signed 32-bit fnv1a hash. 19 | 20 | NOTE: It's a historical reason that we (Douban) use signed 32-bit 21 | hash, you can change it if you want. 22 | """ 23 | return FNV32a(b, len(b)) 24 | -------------------------------------------------------------------------------- /rurouni/log.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import time 4 | from zope.interface import implements 5 | from sys import stdout, stderr 6 | 7 | from twisted.python.log import startLoggingWithObserver, textFromEventDict, msg, err, ILogObserver 8 | from twisted.python.syslog import SyslogObserver 9 | from twisted.python.logfile import DailyLogFile 10 | 11 | 12 | 13 | class RurouniLogObserver(object): 14 | implements(ILogObserver) 15 | 16 | def __call__(self, event): 17 | return self.observer(event) 18 | 19 | def logToDir(self, logdir): 20 | self.logdir = logdir 21 | self.console_logfile = DailyLogFile('console.log', logdir) 22 | self.custom_logs = {} 23 | self.observer = self.logdirObserver 24 | 25 | def logToSyslog(self, prefix): 26 | observer = SyslogObserver(prefix).emit 27 | def log(event): 28 | event['system'] = event.get('type', 'console') 29 | observer(event) 30 | self.observer = log 31 | 32 | def stdoutObserver(self, event): 33 | stdout.write(formatEvent(event, includeType=True) + '\n') 34 | stdout.flush() 35 | 36 | def logdirObserver(self, event): 37 | msg = formatEvent(event) 38 | log_type = event.get('type') 39 | 40 | if log_type is not None and log_type not in self.custom_logs: 41 | self.custom_logs[log_type] = DailyLogFile(log_type + '.log', self.logdir) 42 | 43 | logfile = self.custom_logs.get(log_type, self.console_logfile) 44 | logfile.write(msg + '\n') 45 | logfile.flush() 46 | 47 | observer = stdoutObserver # default to stdout 48 | 49 | 50 | def formatEvent(event, includeType=False): 51 | event['isError'] = 'failure' in event 52 | msg = textFromEventDict(event) 53 | 54 | if includeType: 55 | type_tag = '[%s] ' % event.get('type', 'console') 56 | else: 57 | type_tag = '' 58 | 59 | timestamp = time.strftime("%d/%m/%Y %H:%M:%S") 60 | return "%s\t%s\t%s" % (timestamp, type_tag, msg) 61 | 62 | 63 | rurouniLogObserver = RurouniLogObserver() 64 | logToDir = rurouniLogObserver.logToDir 65 | logToSyslog = rurouniLogObserver.logToSyslog 66 | logToStdout = lambda: startLoggingWithObserver(rurouniLogObserver) 67 | 68 | 69 | def cache(message, **context): 70 | context['type'] = 'cache' 71 | msg(message, **context) 72 | 73 | def clients(message, **context): 74 | context['type'] = 'clients' 75 | msg(message, **context) 76 | 77 | def creates(message, **context): 78 | context['type'] = 'creates' 79 | msg(message, **context) 80 | 81 | def updates(message, **context): 82 | context['type'] = 'updates' 83 | msg(message, **context) 84 | 85 | def listener(message, **context): 86 | context['type'] = 'listener' 87 | msg(message, **context) 88 | 89 | def relay(message, **context): 90 | context['type'] = 'relay' 91 | msg(message, **context) 92 | 93 | def aggregator(message, **context): 94 | context['type'] = 'aggregator' 95 | msg(message, **context) 96 | 97 | def query(message, **context): 98 | context['type'] = 'query' 99 | msg(message, **context) 100 | 101 | def debug(message, **context): 102 | if debugEnabled: 103 | msg(message, **context) 104 | 105 | debugEnabled = False 106 | def setDebugEnabled(enabled): 107 | global debugEnabled 108 | debugEnabled = enabled 109 | -------------------------------------------------------------------------------- /rurouni/protocols.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import cPickle as pickle 3 | 4 | from twisted.protocols.basic import LineOnlyReceiver, Int32StringReceiver 5 | from twisted.internet.error import ConnectionDone 6 | 7 | from rurouni.state import events 8 | from rurouni import log 9 | from rurouni.cache import MetricCache 10 | 11 | 12 | ### metric receiver 13 | 14 | class MetricReceiver: 15 | """ Base class for all metric receive protocols. 16 | """ 17 | def connectionMade(self): 18 | self.peerName = self.getPeerName() 19 | 20 | def getPeerName(self): 21 | if hasattr(self.transport, 'getPeer'): 22 | peer = self.transport.getPeer() 23 | return '%s:%d' % (peer.host, peer.port) 24 | else: 25 | return 'peer' 26 | 27 | def metricReceived(self, metric, datapoint): 28 | events.metricReceived(metric, datapoint) 29 | 30 | 31 | class MetricLineReceiver(MetricReceiver, LineOnlyReceiver): 32 | delimiter = '\n' 33 | 34 | def lineReceived(self, line): 35 | try: 36 | metric, value, timestamp = line.strip().split() 37 | datapoint = (int(timestamp), float(value)) 38 | except: 39 | log.msg('invalid line (%s) received from client %s' % 40 | (line, self.peerName)) 41 | return 42 | self.metricReceived(metric, datapoint) 43 | 44 | 45 | class MetricPickleReceiver(MetricReceiver, Int32StringReceiver): 46 | MAX_LENGTH = 2<<20 # 2M 47 | 48 | def connectionMade(self): 49 | MetricReceiver.connectionMade(self) 50 | 51 | def stringReceived(self, data): 52 | try: 53 | datapoints = pickle.loads(data) 54 | except: 55 | log.listener("invalid pickle received from %s, ignoring" 56 | % self.peerName) 57 | for metric, (timestamp, value) in datapoints: 58 | try: 59 | datapoint = int(timestamp), float(value) 60 | except Exception as e: 61 | continue 62 | self.metricReceived(metric, datapoint) 63 | 64 | 65 | class CacheManagementHandler(Int32StringReceiver): 66 | MAX_LENGTH = 3<<20 # 3M 67 | 68 | def connectionMade(self): 69 | peer = self.transport.getPeer() 70 | self.peerAddr = "%s:%s" % (peer.host, peer.port) 71 | log.query("%s connected" % self.peerAddr) 72 | 73 | def connectionLost(self, reason): 74 | if reason.check(ConnectionDone): 75 | log.query("%s disconnected" % self.peerAddr) 76 | else: 77 | log.query("%s connection lost: %s" % (self.peerAddr, reason.value)) 78 | 79 | def stringReceived(self, rawRequest): 80 | request = pickle.loads(rawRequest) 81 | datapoints = MetricCache.get(request['metric']) 82 | rs = dict(datapoints=datapoints) 83 | response = pickle.dumps(rs, protocol=-1) 84 | self.sendString(response) 85 | -------------------------------------------------------------------------------- /rurouni/service.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from twisted.application import internet, service 3 | from twisted.application.internet import TCPServer 4 | from twisted.plugin import IPlugin 5 | from twisted.internet.protocol import ServerFactory 6 | from twisted.python.log import ILogObserver 7 | from twisted.python.components import Componentized 8 | 9 | from rurouni import protocols 10 | from rurouni import state 11 | from rurouni.conf import settings 12 | from rurouni.log import rurouniLogObserver 13 | 14 | 15 | ### root serveice 16 | 17 | class RurouniRootService(service.MultiService): 18 | """ Root Service that properly configure twistd logging. 19 | """ 20 | 21 | def setServiceParent(self, parent): 22 | service.MultiService.setServiceParent(self, parent) 23 | if isinstance(parent, Componentized): 24 | parent.setComponent(ILogObserver, rurouniLogObserver) 25 | 26 | 27 | def createBaseService(options): 28 | root_service = RurouniRootService() 29 | root_service.setName('rurouni') 30 | 31 | receive_services = ( 32 | (settings.LINE_RECEIVER_INTERFACE, 33 | settings.LINE_RECEIVER_PORT, 34 | protocols.MetricLineReceiver 35 | ), 36 | (settings.PICKLE_RECEIVER_INTERFACE, 37 | settings.PICKLE_RECEIVER_PORT, 38 | protocols.MetricPickleReceiver 39 | ), 40 | ) 41 | for interface, port, protocol in receive_services: 42 | if port: 43 | factory = ServerFactory() 44 | factory.protocol = protocol 45 | service = TCPServer(int(port), factory, interface=interface) 46 | service.setServiceParent(root_service) 47 | 48 | from rurouni.state.instrumentation import InstrumentationService 49 | service = InstrumentationService() 50 | service.setServiceParent(root_service) 51 | 52 | return root_service 53 | 54 | 55 | def createCacheService(options): 56 | from rurouni.cache import MetricCache 57 | from rurouni.protocols import CacheManagementHandler 58 | 59 | MetricCache.init() 60 | state.events.metricReceived.addHandler(MetricCache.put) 61 | root_service = createBaseService(options) 62 | 63 | factory = ServerFactory() 64 | factory.protocol = CacheManagementHandler 65 | service = TCPServer(int(settings.CACHE_QUERY_PORT), factory, 66 | interface=settings.CACHE_QUERY_INTERFACE) 67 | service.setServiceParent(root_service) 68 | 69 | from rurouni.writer import WriterService 70 | service = WriterService() 71 | service.setServiceParent(root_service) 72 | 73 | return root_service 74 | -------------------------------------------------------------------------------- /rurouni/state/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module exists for the purpose of tracking global state. 3 | """ 4 | cacheTooFull = False 5 | -------------------------------------------------------------------------------- /rurouni/state/events.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from twisted.python.failure import Failure 3 | 4 | from rurouni import state, log 5 | from rurouni.state import instrumentation 6 | 7 | 8 | class Event(object): 9 | 10 | def __init__(self, name, default_handler=None): 11 | self.name = name 12 | self.handlers = [default_handler] if default_handler else [] 13 | 14 | def addHandler(self, handler): 15 | if handler not in self.handlers: 16 | self.handlers.append(handler) 17 | 18 | def removeHandler(self, handler): 19 | if handler in self.handlers: 20 | self.handlers.remove(handler) 21 | 22 | def __call__(self, *args, **kwargs): 23 | for h in self.handlers: 24 | try: 25 | h(*args, **kwargs) 26 | except Exception as e: 27 | log.err(None, 28 | "Exception %s in %s event handler: args=%s, kwargs=%s" 29 | % (e, self.name, args, kwargs)) 30 | 31 | 32 | metricReceived = Event('metricReceived', 33 | lambda *a, **ka: instrumentation.incr('metricReceived')) 34 | 35 | cacheFull = Event('cacheFull') 36 | cacheFull.addHandler(lambda *a, **ka: instrumentation.incr('cacheOverflow')) 37 | cacheFull.addHandler(lambda *a, **ka: setattr(state, 'cacheTooFull', True)) 38 | -------------------------------------------------------------------------------- /rurouni/state/instrumentation.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import time 4 | import socket 5 | from resource import getrusage, RUSAGE_SELF 6 | 7 | from twisted.application.service import Service 8 | from twisted.internet.task import LoopingCall 9 | 10 | from rurouni.conf import settings 11 | from rurouni import log 12 | 13 | 14 | # consts 15 | HOSTNAME = socket.gethostname().replace('.', '_') 16 | PAGESIZE = os.sysconf('SC_PAGESIZE') 17 | 18 | # globals 19 | stats = {} 20 | prior_stats = {} 21 | 22 | def _get_usage_info(): 23 | rusage = getrusage(RUSAGE_SELF) 24 | curr_usage = rusage.ru_utime + rusage.ru_stime 25 | curr_time = time.time() 26 | return curr_usage, curr_time 27 | 28 | last_usage, last_usage_time = _get_usage_info() 29 | 30 | 31 | def incr(stat, amount=1): 32 | stats.setdefault(stat, 0) 33 | stats[stat] += amount 34 | 35 | 36 | def max(stat, new_val): 37 | try: 38 | if stats[stat] < new_val: 39 | stats[stat] = new_val 40 | except KeyError: 41 | stats[stat] = new_val 42 | 43 | 44 | def append(stat, val): 45 | stats.setdefault(stat, []) 46 | stats[stat].append(val) 47 | 48 | 49 | def get_cpu_usage(): 50 | global last_usage, last_usage_time 51 | curr_usage, curr_time = _get_usage_info() 52 | 53 | usage_diff = curr_usage - last_usage 54 | time_diff = curr_time - last_usage_time 55 | cpu_usage_percent = (usage_diff / time_diff) * 100. 56 | 57 | last_usage, last_usage_time = curr_usage, curr_time 58 | return cpu_usage_percent 59 | 60 | 61 | def get_mem_usage(): 62 | rss_pages = int(open('/proc/self/statm').read().split()[1]) 63 | return rss_pages * PAGESIZE 64 | 65 | 66 | def record_metrics(): 67 | _stats = stats.copy() 68 | stats.clear() 69 | 70 | # rurouni cache 71 | record = cache_record 72 | update_times = _stats.get('updateTimes', []) 73 | committed_points = _stats.get('committedPoints', 0) 74 | creates = _stats.get('creates', 0) 75 | dropped_creates = _stats.get('droppedCreates', 0) 76 | errors = _stats.get('errors', 0) 77 | cache_queries = _stats.get('cacheQueries', 0) 78 | cache_overflow = _stats.get('cacheOverflow', 0) 79 | 80 | if update_times: 81 | avg_update_time = sum(update_times) / len(update_times) 82 | record('avgUpdateTime', avg_update_time) 83 | 84 | if committed_points: 85 | points_per_update = float(committed_points) / len(update_times) 86 | record('pointsPerUpdate', points_per_update) 87 | 88 | record('updateOperations', len(update_times)) 89 | record('committedPoints', committed_points) 90 | record('creates', creates) 91 | record('droppedCreates', dropped_creates) 92 | record('errors', errors) 93 | record('cacheQueries', cache_queries) 94 | record('cacheOverflow', cache_overflow) 95 | 96 | record('metricReceived', _stats.get('metricReceived', 0)) 97 | record('cpuUsage', get_cpu_usage()) 98 | # this only workds on linux 99 | try: 100 | record('memUsage', get_mem_usage()) 101 | except: 102 | pass 103 | 104 | 105 | def cache_record(metric_type, val): 106 | prefix = settings.RUROUNI_METRIC 107 | metric_tmpl = prefix + '.%s.%s.%s' 108 | if settings.instance is None: 109 | metric = metric_tmpl % (HOSTNAME, 'a', metric_type) 110 | else: 111 | metric = metric_tmpl % (HOSTNAME, settings.instance, metric_type) 112 | datapoint = int(time.time()), val 113 | cache.MetricCache.put(metric, datapoint) 114 | 115 | 116 | class InstrumentationService(Service): 117 | def __init__(self): 118 | self.record_task = LoopingCall(record_metrics) 119 | self.metric_interval = settings.RUROUNI_METRIC_INTERVAL 120 | 121 | def startService(self): 122 | if self.metric_interval > 0: 123 | self.record_task.start(self.metric_interval, False) 124 | Service.startService(self) 125 | 126 | def stopService(self): 127 | if self.metric_interval > 0: 128 | self.record_task.stop() 129 | Service.stopService(self) 130 | 131 | 132 | # avoid import circularities 133 | from rurouni import cache 134 | -------------------------------------------------------------------------------- /rurouni/storage.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import os 4 | import re 5 | import glob 6 | import errno 7 | from os.path import join, sep, splitext, basename, dirname 8 | 9 | import kenshin 10 | from kenshin.utils import mkdir_p 11 | from rurouni import log 12 | from rurouni.conf import settings, OrderedConfigParser 13 | 14 | 15 | def getFilePath(schema_name, file_idx): 16 | return join(settings.LOCAL_DATA_DIR, settings['instance'], 17 | schema_name, '%d.hs' % file_idx) 18 | 19 | 20 | def getMetricPath(metric): 21 | path = metric.replace('.', sep) 22 | return join(settings.LOCAL_LINK_DIR, settings['instance'], path + '.hs') 23 | 24 | 25 | def createLink(metric, file_path): 26 | metric_path = getMetricPath(metric) 27 | try: 28 | _createLinkHelper(metric_path, file_path) 29 | except OSError as exc: 30 | if exc.errno == errno.ENAMETOOLONG: 31 | pass 32 | else: 33 | raise 34 | 35 | 36 | def _createLinkHelper(link_path, file_path): 37 | """ 38 | Create symlink link_path -> file_path. 39 | """ 40 | dir_ = dirname(link_path) 41 | mkdir_p(dir_) 42 | if os.path.lexists(link_path): 43 | os.rename(link_path, link_path + '.bak') 44 | os.symlink(file_path, link_path) 45 | 46 | 47 | def getFilePathByInstanceDir(instance_data_dir, schema_name, file_idx): 48 | return join(instance_data_dir, schema_name, "%d.hs" % file_idx) 49 | 50 | 51 | def getMetricPathByInstanceDir(instance_link_dir, metric): 52 | path = metric.replace(".", sep) 53 | return join(instance_link_dir, path + ".hs") 54 | 55 | 56 | def rebuildIndex(instance_data_dir, instance_index_file): 57 | """ 58 | Rebuild index file from data file, if a data file has no valid metric, 59 | we will remove it. 60 | """ 61 | out = open(instance_index_file, 'w') 62 | for schema_name in os.listdir(instance_data_dir): 63 | hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs') 64 | for fp in glob.glob(hs_file_pat): 65 | with open(fp) as f: 66 | empty_flag = True 67 | header = kenshin.header(f) 68 | metric_list = header['tag_list'] 69 | file_id = splitext(basename(fp))[0] 70 | for i, metric in enumerate(metric_list): 71 | if metric != '': 72 | empty_flag = False 73 | out.write('%s %s %s %s\n' % 74 | (metric, schema_name, file_id, i)) 75 | if empty_flag: 76 | os.remove(fp) 77 | out.close() 78 | 79 | 80 | def rebuildLink(instance_data_dir, instance_link_dir): 81 | for schema_name in os.listdir(instance_data_dir): 82 | hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs') 83 | for fp in glob.glob(hs_file_pat): 84 | with open(fp) as f: 85 | header = kenshin.header(f) 86 | metric_list = header['tag_list'] 87 | for metric in metric_list: 88 | if metric != '': 89 | link_path = getMetricPathByInstanceDir(instance_link_dir, metric) 90 | try: 91 | _createLinkHelper(link_path, fp) 92 | except OSError as exc: 93 | if exc.errno == errno.ENAMETOOLONG: 94 | pass 95 | else: 96 | raise 97 | 98 | 99 | class Archive: 100 | def __init__(self, secPerPoint, points): 101 | self.secPerPoint = secPerPoint 102 | self.points = points 103 | 104 | def __str__(self): 105 | return 'Archive(%s, %s)' % (self.secPerPoint, self.points) 106 | 107 | def getTuple(self): 108 | return self.secPerPoint, self.points 109 | 110 | @staticmethod 111 | def fromString(retentionDef): 112 | rs = kenshin.parse_retention_def(retentionDef) 113 | return Archive(*rs) 114 | 115 | 116 | class Schema(object): 117 | def match(self, metric): 118 | raise NotImplementedError() 119 | 120 | 121 | class DefaultSchema(Schema): 122 | def __init__(self, name, xFilesFactor, aggregationMethod, archives, 123 | cache_retention, metrics_max_num, cache_ratio): 124 | self.name = name 125 | self.xFilesFactor = xFilesFactor 126 | self.aggregationMethod = aggregationMethod 127 | self.archives = archives 128 | self.cache_retention = cache_retention 129 | self.metrics_max_num = metrics_max_num 130 | self.cache_ratio = cache_ratio 131 | 132 | def match(self, metric): 133 | return True 134 | 135 | 136 | class PatternSchema(Schema): 137 | def __init__(self, name, pattern, xFilesFactor, aggregationMethod, archives, 138 | cache_retention, metrics_max_num, cache_ratio): 139 | self.name = name 140 | self.pattern = re.compile(pattern) 141 | self.xFilesFactor = xFilesFactor 142 | self.aggregationMethod = aggregationMethod 143 | self.archives = archives 144 | self.cache_retention = cache_retention 145 | self.metrics_max_num = metrics_max_num 146 | self.cache_ratio = cache_ratio 147 | 148 | def match(self, metric): 149 | return self.pattern.match(metric) 150 | 151 | 152 | def loadStorageSchemas(conf_file): 153 | schema_list = [] 154 | config = OrderedConfigParser() 155 | config.read(conf_file) 156 | 157 | for section in config.sections(): 158 | options = dict(config.items(section)) 159 | 160 | pattern = options.get('pattern') 161 | xff = float(options.get('xfilesfactor')) 162 | agg = options.get('aggregationmethod') 163 | retentions = options.get('retentions').split(',') 164 | archives = [Archive.fromString(s).getTuple() for s in retentions] 165 | cache_retention = kenshin.RetentionParser.parse_time_str( 166 | options.get('cacheretention')) 167 | metrics_max_num = options.get('metricsperfile') 168 | cache_ratio = 1.2 169 | 170 | try: 171 | kenshin.validate_archive_list(archives, xff) 172 | except kenshin.InvalidConfig: 173 | log.err("Invalid schema found in %s." % section) 174 | 175 | schema = PatternSchema(section, pattern, float(xff), agg, archives, 176 | int(cache_retention), int(metrics_max_num), 177 | float(cache_ratio)) 178 | schema_list.append(schema) 179 | schema_list.append(defaultSchema) 180 | return schema_list 181 | 182 | 183 | # default schema 184 | 185 | defaultSchema = DefaultSchema( 186 | 'default', 187 | 1.0, 188 | 'avg', 189 | ((60, 60 * 24 * 7)), # default retention (7 days of minutely data) 190 | 600, 191 | 40, 192 | 1.2 193 | ) 194 | 195 | 196 | class StorageSchemas(object): 197 | def __init__(self, conf_file): 198 | self.schemas = loadStorageSchemas(conf_file) 199 | 200 | def getSchemaByMetric(self, metric): 201 | for schema in self.schemas: 202 | if schema.match(metric): 203 | return schema 204 | return defaultSchema 205 | 206 | def getSchemaByName(self, schema_name): 207 | for schema in self.schemas: 208 | if schema.name == schema_name: 209 | return schema 210 | return None 211 | 212 | 213 | if __name__ == '__main__': 214 | import sys 215 | loadStorageSchemas(sys.argv[1]) 216 | -------------------------------------------------------------------------------- /rurouni/utils.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | from time import time 4 | from os.path import dirname, basename, abspath, splitext 5 | from rurouni.fnv1a import get_int32_hash 6 | 7 | 8 | def run_twistd_plugin(filename): 9 | from twisted.scripts.twistd import runApp 10 | from twisted.scripts.twistd import ServerOptions 11 | from rurouni.conf import get_parser 12 | 13 | bin_dir = dirname(abspath(filename)) 14 | root_dir = dirname(bin_dir) 15 | os.environ.setdefault('GRAPHITE_ROOT', root_dir) 16 | 17 | program = splitext(basename(filename))[0] 18 | parser = get_parser() 19 | (options, args) = parser.parse_args() 20 | 21 | if not args: 22 | parser.print_usage() 23 | return 24 | 25 | twistd_options = [] 26 | try: 27 | from twisted.internet import epollreactor 28 | twistd_options.append('--reactor=epoll') 29 | except: 30 | pass 31 | 32 | if options.debug or options.nodaemon: 33 | twistd_options.append('--nodaemon') 34 | if options.pidfile: 35 | twistd_options.extend(['--pidfile', options.pidfile]) 36 | if options.umask: 37 | twistd_options.extend(['--umask', options.umask]) 38 | 39 | twistd_options.append(program) 40 | 41 | if options.debug: 42 | twistd_options.append('--debug') 43 | for name, value in vars(options).items(): 44 | if (value is not None and 45 | name not in ('debug', 'nodaemon', 'pidfile', 'umask')): 46 | twistd_options.extend(["--%s" % name.replace("_", '-'), 47 | value]) 48 | 49 | twistd_options.extend(args) 50 | config = ServerOptions() 51 | config.parseOptions(twistd_options) 52 | runApp(config) 53 | 54 | 55 | class TokenBucket(object): 56 | ''' Token Bucket algorithm for rate-limiting. 57 | URL: https://en.wikipedia.org/wiki/Token_bucket 58 | 59 | >>> bucket = TokenBucket(60, 1) 60 | >>> print bucket.consume(6) 61 | True 62 | >>> print bucket.consume(54) 63 | True 64 | >>> print bucket.consume(1) 65 | False 66 | >>> import time 67 | >>> time.sleep(1) 68 | >>> print bucket.consume(1) 69 | True 70 | ''' 71 | def __init__(self, capacity, fill_rate): 72 | ''' 73 | @capacity: total number of tokens in the bucket. 74 | @fill_rate: the rate in tokens/second that the bucket will be refilled. 75 | ''' 76 | self.capacity = float(capacity) 77 | self._tokens = float(capacity) 78 | self.fill_rate = float(fill_rate) 79 | self.timestamp = time() 80 | 81 | def consume(self, tokens): 82 | ''' Consume tokens from the bucket. 83 | 84 | Return True if there were sufficient tokens otherwise False. 85 | ''' 86 | if tokens <= self.tokens: 87 | self._tokens -= tokens 88 | return True 89 | else: 90 | return False 91 | 92 | @property 93 | def tokens(self): 94 | ''' Return the current number of tokens in the bucket. ''' 95 | if self._tokens < self.capacity: 96 | now = time() 97 | delta = self.fill_rate * (now - self.timestamp) 98 | self._tokens = min(self.capacity, self._tokens + delta) 99 | self.timestamp = now 100 | return self._tokens 101 | 102 | def __repr__(self): 103 | return '<%s %.2f %.2f>' % ( 104 | self.__class__.__name__, self.capacity, self.fill_rate) 105 | 106 | 107 | def get_instance_of_metric(metric, num_all_instance): 108 | return get_int32_hash(metric) % num_all_instance 109 | 110 | 111 | if __name__ == '__main__': 112 | import doctest 113 | doctest.testmod() 114 | -------------------------------------------------------------------------------- /rurouni/writer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import time 3 | 4 | from twisted.application.service import Service 5 | from twisted.internet import reactor 6 | 7 | import kenshin 8 | from rurouni.cache import MetricCache 9 | from rurouni import log 10 | from rurouni.conf import settings 11 | from rurouni.state import instrumentation 12 | from rurouni.storage import getFilePath 13 | 14 | 15 | class WriterService(Service): 16 | 17 | def __init__(self): 18 | pass 19 | 20 | def startService(self): 21 | reactor.callInThread(writeForever) 22 | Service.startService(self) 23 | 24 | def stopService(self): 25 | try: 26 | file_cache_idxs = MetricCache.getAllFileCaches() 27 | writeCachedDataPointsWhenStop(file_cache_idxs) 28 | except Exception as e: 29 | log.err('write error when stopping service: %s' % e) 30 | Service.stopService(self) 31 | 32 | 33 | def writeForever(): 34 | while reactor.running: 35 | write = False 36 | try: 37 | file_cache_idxs = MetricCache.writableFileCaches() 38 | if file_cache_idxs: 39 | write = writeCachedDataPoints(file_cache_idxs) 40 | except Exception as e: 41 | log.err('write error: %s' % e) 42 | # The writer thread only sleeps when there is no write 43 | # or an error occurs 44 | if not write: 45 | time.sleep(1) 46 | 47 | 48 | def writeCachedDataPoints(file_cache_idxs): 49 | pop_func = MetricCache.pop 50 | for schema_name, file_idx in file_cache_idxs: 51 | datapoints = pop_func(schema_name, file_idx) 52 | file_path = getFilePath(schema_name, file_idx) 53 | 54 | try: 55 | t1 = time.time() 56 | kenshin.update(file_path, datapoints) 57 | update_time = time.time() - t1 58 | except Exception as e: 59 | log.err('Error writing to %s: %s' % (file_path, e)) 60 | instrumentation.incr('errors') 61 | else: 62 | point_cnt = len(datapoints) 63 | instrumentation.incr('committedPoints', point_cnt) 64 | instrumentation.append('updateTimes', update_time) 65 | 66 | if settings.LOG_UPDATES: 67 | log.updates("wrote %d datapoints for %s in %.5f secs" % 68 | (point_cnt, schema_name, update_time)) 69 | 70 | return True 71 | 72 | 73 | def writeCachedDataPointsWhenStop(file_cache_idxs): 74 | pop_func = MetricCache.pop 75 | for schema_name, file_idx in file_cache_idxs: 76 | datapoints = pop_func(schema_name, file_idx, int(time.time()), False) 77 | if datapoints: 78 | file_path = getFilePath(schema_name, file_idx) 79 | try: 80 | kenshin.update(file_path, datapoints) 81 | except Exception as e: 82 | log.err('Error writing to %s: %s' % (file_path, e)) 83 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from setuptools import setup, Extension 4 | 5 | import re 6 | import os 7 | import sys 8 | import codecs 9 | from glob import glob 10 | 11 | here = os.path.abspath(os.path.dirname(__file__)) 12 | 13 | if sys.version_info < (2,7): 14 | sys.exit('Sorry, Python < 2.7 is not supported') 15 | 16 | 17 | def read(*parts): 18 | # intentionally *not* adding an encoding option to open 19 | return codecs.open(os.path.join(here, *parts), 'r').read() 20 | 21 | 22 | def find_version(*file_paths): 23 | version_file = read(*file_paths) 24 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", 25 | version_file, re.M) 26 | if version_match: 27 | return version_match.group(1) 28 | raise RuntimeError("Unable to find version string.") 29 | 30 | 31 | long_description = read('README.md') 32 | 33 | setup( 34 | name='kenshin', 35 | version=find_version('kenshin', '__init__.py'), 36 | description='A scalable time series database.', 37 | long_description=long_description, 38 | author='Zhaolong Zhu', 39 | url='https://github.com/douban/Kenshin', 40 | download_url='https://github.com/douban/Kenshin.git', 41 | author_email='zhuzhaolong0@gmail.com', 42 | install_requires=[], 43 | tests_require=['nose'], 44 | packages=['kenshin', 'kenshin.tools', 'rurouni', 'rurouni.state', 'twisted.plugins'], 45 | scripts=glob('bin/*'), 46 | zip_safe=False, 47 | platforms='any', 48 | setup_requires=['Cython'], 49 | ext_modules=[ 50 | Extension( 51 | name='%s.%s' % ('rurouni', name), 52 | sources=['%s/%s.pyx' % ('rurouni', name)], 53 | extra_compile_args=['-O3', '-funroll-loops', '-Wall'], 54 | ) for name in ['fnv1a'] 55 | ], 56 | ) 57 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/douban/Kenshin/bb5dfa05f5d10b4bdd1e0403c9e7d3c7e4399fcb/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_agg.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import unittest 4 | 5 | from kenshin.agg import Agg 6 | 7 | 8 | class TestAgg(unittest.TestCase): 9 | 10 | def setUp(self): 11 | self.vals = map(float, range(10)) 12 | 13 | def _get_agg_func_by_name(self, name): 14 | return Agg.get_agg_func(Agg.get_agg_id(name)) 15 | 16 | def test_get_agg_id(self): 17 | for i, agg in enumerate(Agg.get_agg_type_list()): 18 | id_ = Agg.get_agg_id(agg) 19 | self.assertEqual(id_, i) 20 | 21 | def test_agg_avg(self): 22 | func = self._get_agg_func_by_name('average') 23 | self.assertEqual(func(self.vals), 4.5) 24 | 25 | def test_agg_sum(self): 26 | func = self._get_agg_func_by_name('sum') 27 | self.assertEqual(func(self.vals), 45.0) 28 | 29 | def test_agg_last(self): 30 | func = self._get_agg_func_by_name('last') 31 | self.assertEqual(func(self.vals), 9.0) 32 | 33 | def test_agg_max(self): 34 | func = self._get_agg_func_by_name('max') 35 | self.assertEqual(func(self.vals), 9.0) 36 | 37 | def test_agg_min(self): 38 | func = self._get_agg_func_by_name('min') 39 | self.assertEqual(func(self.vals), 0.0) 40 | -------------------------------------------------------------------------------- /tests/test_fnv1a.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import unittest 5 | from rurouni.fnv1a import get_int32_hash 6 | 7 | 8 | class TestFnv1a(unittest.TestCase): 9 | 10 | def _cmp_hash(self, int32_h, uint32_h): 11 | if uint32_h >= 0x80000000: 12 | uint32_h -= 0x100000000 13 | self.assertEqual(int32_h, uint32_h) 14 | 15 | def test_fnv1a_hash(self): 16 | test_cases = [ 17 | ("", 0x811c9dc5), 18 | ("a", 0xe40c292c), 19 | ("foobar", 0xbf9cf968), 20 | ("hello", 0x4f9f2cab), 21 | (b"\xff\x00\x00\x01", 0xc48fb86d), 22 | ] 23 | 24 | for s, uint32_h in test_cases: 25 | int32_h = get_int32_hash(s) 26 | self._cmp_hash(int32_h, uint32_h) 27 | -------------------------------------------------------------------------------- /tests/test_io_performance.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import shutil 4 | import unittest 5 | 6 | import kenshin.storage 7 | from kenshin.storage import Storage, enable_debug, RetentionParser 8 | from kenshin.utils import mkdir_p 9 | 10 | 11 | class TestStorageIO(unittest.TestCase): 12 | data_dir = '/tmp/kenshin' 13 | 14 | def setUp(self): 15 | if os.path.exists(self.data_dir): 16 | shutil.rmtree(self.data_dir) 17 | 18 | mkdir_p(self.data_dir) 19 | self.storage = Storage(data_dir=self.data_dir) 20 | self.basic_setup = self._basic_setup() 21 | self.storage.create(*self.basic_setup) 22 | 23 | metric_name = self.basic_setup[0] 24 | self.path = self.storage.gen_path(self.data_dir, metric_name) 25 | 26 | def tearDown(self): 27 | shutil.rmtree(self.data_dir) 28 | 29 | def _basic_setup(self): 30 | metric_name = 'sys.cpu.user' 31 | self.file_cnt = 40 32 | 33 | tag_list = ['host=webserver%s,cpu=%s' % (i, i) 34 | for i in range(self.file_cnt)] 35 | archive_list = "1s:1h,60s:2d,300s:7d,15m:25w,12h:5y".split(',') 36 | archive_list = [RetentionParser.parse_retention_def(x) 37 | for x in archive_list] 38 | 39 | x_files_factor = 20 40 | agg_name = 'min' 41 | return [metric_name, tag_list, archive_list, x_files_factor, agg_name] 42 | 43 | def _gen_val(self, i): 44 | res = [] 45 | for j in range(self.file_cnt): 46 | res.append(i + 10*j) 47 | return tuple(res) 48 | 49 | def test_io(self): 50 | """ 51 | test io perfermance. 52 | 53 | (1000 io/s * 3600 s * 24) / (3*10**6 metric) / (40 metric/file) = 1152 io/file 54 | 由于 header 函数在一次写入中被调用了多次,而 header 数据较小,完全可以读取缓存数据, 55 | 因此 enable_debug 中忽略了 header 的读操作。 56 | """ 57 | enable_debug(ignore_header=True) 58 | 59 | now_ts = 1411628779 60 | ten_min = 10 * RetentionParser.TIME_UNIT['minutes'] 61 | one_day = RetentionParser.TIME_UNIT['days'] 62 | from_ts = now_ts - one_day 63 | 64 | for i in range(one_day / ten_min): 65 | points = [(from_ts + i * ten_min + j, self._gen_val(i * ten_min + j)) 66 | for j in range(ten_min)] 67 | self.storage.update(self.path, points, from_ts + (i+1) * ten_min) 68 | 69 | open_ = kenshin.storage.open 70 | io = open_.read_cnt + open_.write_cnt 71 | io_limit = 1152 72 | self.assertLessEqual(io, io_limit) 73 | -------------------------------------------------------------------------------- /tests/test_storage.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import os 3 | import shutil 4 | import struct 5 | import unittest 6 | 7 | from kenshin.storage import Storage 8 | from kenshin.agg import Agg 9 | from kenshin.utils import mkdir_p, roundup 10 | from kenshin.consts import NULL_VALUE 11 | 12 | 13 | class TestStorageBase(unittest.TestCase): 14 | data_dir = '/tmp/kenshin' 15 | 16 | def setUp(self): 17 | if os.path.exists(self.data_dir): 18 | shutil.rmtree(self.data_dir) 19 | 20 | mkdir_p(self.data_dir) 21 | self.storage = Storage(data_dir=self.data_dir) 22 | self.basic_setup = self._basic_setup() 23 | self.storage.create(*self.basic_setup) 24 | 25 | metric_name = self.basic_setup[0] 26 | self.path = self.storage.gen_path(self.data_dir, metric_name) 27 | tag_list = self.basic_setup[1] 28 | self.null_point = (None,) * len(tag_list) 29 | 30 | def tearDown(self): 31 | shutil.rmtree(self.data_dir) 32 | 33 | @staticmethod 34 | def _gen_val(i, num=2): 35 | return [10 * j + i for j in range(num)] 36 | 37 | class TestStorage(TestStorageBase): 38 | 39 | def _basic_setup(self): 40 | metric_name = 'sys.cpu.user' 41 | 42 | tag_list = [ 43 | 'host=webserver01,cpu=0', 44 | 'host=webserver01,cpu=1', 45 | ] 46 | archive_list = [ 47 | (1, 6), 48 | (3, 6), 49 | ] 50 | x_files_factor = 1.0 51 | agg_name = 'min' 52 | return [metric_name, tag_list, archive_list, x_files_factor, agg_name] 53 | 54 | def test_gen_path(self): 55 | metric_name = 'a.b.c' 56 | data_dir = '/x/y' 57 | path = self.storage.gen_path(data_dir, metric_name) 58 | self.assertEqual(path, '/x/y/a/b/c.hs') 59 | 60 | def test_header(self): 61 | metric_name, tag_list, archive_list, x_files_factor, agg_name = self.basic_setup 62 | with open(self.path, 'rb') as f: 63 | header = self.storage.header(f) 64 | 65 | self.assertEqual(tag_list, header['tag_list']) 66 | self.assertEqual(x_files_factor, header['x_files_factor']) 67 | self.assertEqual(Agg.get_agg_id(agg_name), header['agg_id']) 68 | 69 | _archive_list = [(x['sec_per_point'], x['count']) 70 | for x in header['archive_list']] 71 | self.assertEqual(archive_list, _archive_list) 72 | 73 | def test_basic_update_fetch(self): 74 | now_ts = 1411628779 75 | num_points = 5 76 | points = [(now_ts - i, self._gen_val(i)) for i in range(1, num_points+1)] 77 | self.storage.update(self.path, points, now_ts) 78 | 79 | from_ts = now_ts - num_points 80 | series = self.storage.fetch(self.path, from_ts, now=now_ts) 81 | 82 | time_info = (from_ts, now_ts, 1) 83 | vals = [tuple(map(float, v)) for _, v in sorted(points)] 84 | expected = (time_info, vals) 85 | self.assertEqual(series[1:], expected) 86 | 87 | def test_update_propagate(self): 88 | now_ts = 1411628779 89 | num_points = 6 90 | points = [(now_ts - i, self._gen_val(i)) for i in range(1, num_points+1)] 91 | self.storage.update(self.path, points, now_ts) 92 | 93 | from_ts = now_ts - num_points - 1 94 | series = self.storage.fetch(self.path, from_ts, now=now_ts) 95 | time_info = (from_ts, roundup(now_ts, 3), 3) 96 | expected = time_info, [(5.0, 15.0), (2.0, 12.0), self.null_point] 97 | self.assertEqual(series[1:], expected) 98 | 99 | def test_null_point(self): 100 | now_ts = 1411628779 101 | num_points = 6 102 | points = [(now_ts - i, self._gen_val(i)) for i in range(1, num_points+1)] 103 | # change the last two points to null value 104 | points[4] = (now_ts - 5, (NULL_VALUE, NULL_VALUE)) 105 | points[5] = (now_ts - 6, (NULL_VALUE, NULL_VALUE)) 106 | 107 | self.storage.update(self.path, points, now_ts) 108 | 109 | from_ts = now_ts - num_points - 1 110 | series = self.storage.fetch(self.path, from_ts, now=now_ts) 111 | time_info = (from_ts, roundup(now_ts, 3), 3) 112 | expected = time_info, [self.null_point, (2.0, 12.0), self.null_point] 113 | self.assertEqual(series[1:], expected) 114 | 115 | def test_update_old_points(self): 116 | now_ts = 1411628779 117 | num_points = 12 118 | points = [(now_ts - i, self._gen_val(i)) for i in range(7, num_points+1)] 119 | self.storage.update(self.path, points, now_ts) 120 | 121 | from_ts = now_ts - num_points - 1 122 | series = self.storage.fetch(self.path, from_ts, now=now_ts) 123 | time_info = (from_ts, roundup(now_ts, 3), 3) 124 | expected = time_info, [(12.0, 22.0), (10.0, 20.0), (7.0, 17.0), self.null_point, self.null_point] 125 | self.assertEqual(series[1:], expected) 126 | 127 | def test_fetch_empty_metric(self): 128 | now_ts = 1411628779 129 | from_ts = 1411628775 130 | series = self.storage.fetch(self.path, from_ts, now=now_ts) 131 | time_info = (from_ts, now_ts, 1) 132 | expected = time_info, [self.null_point] * (now_ts - from_ts) 133 | self.assertEqual(series[1:], expected) 134 | 135 | def print_file_content(self): 136 | with open(self.path) as f: 137 | header = self.storage.header(f) 138 | archive_list = header['archive_list'] 139 | for i, archive in enumerate(archive_list): 140 | print "--------- archive %d ------------" % i 141 | print archive 142 | f.seek(archive['offset']) 143 | series_str = f.read(archive['size']) 144 | point_format = header['point_format'] 145 | series_format = point_format[0] + point_format[1:] * archive['count'] 146 | unpacked_series = struct.unpack(series_format, series_str) 147 | print unpacked_series 148 | 149 | 150 | class TestLostPoint(TestStorageBase): 151 | 152 | def _basic_setup(self): 153 | metric_name = 'sys.cpu.user' 154 | 155 | tag_list = [ 156 | 'host=webserver01,cpu=0', 157 | 'host=webserver01,cpu=1', 158 | ] 159 | archive_list = [ 160 | (1, 60), 161 | (3, 60), 162 | ] 163 | x_files_factor = 5 164 | agg_name = 'min' 165 | return [metric_name, tag_list, archive_list, x_files_factor, agg_name] 166 | 167 | def test_update_propagate(self): 168 | now_ts = 1411628779 169 | point_seeds_list = [range(30, 45), range(15)] 170 | mtime = None 171 | for i, point_seeds in enumerate(point_seeds_list): 172 | if i != 0: 173 | mtime = now_ts - max(point_seeds_list[i - 1]) 174 | points = [(now_ts - i, self._gen_val(i)) for i in point_seeds] 175 | self.storage.update(self.path, points, now_ts, mtime) 176 | 177 | from_ts = now_ts - 60 - 1 178 | series = self.storage.fetch(self.path, from_ts, now=now_ts) 179 | time_info = (from_ts, roundup(now_ts, 3), 3) 180 | null = self.null_point 181 | values = [null, null, null, null, null, (44.0, 54.0), (41.0, 51.0), 182 | (38.0, 48.0), (35.0, 45.0), (32.0, 42.0), (30.0, 40.0), 183 | null, null, null, null, (14.0, 24.0), (11.0, 21.0), (8.0, 18.0), 184 | (5.0, 15.0), null, null] 185 | expected = time_info, values 186 | self.assertEqual(series[1:], expected) 187 | 188 | def test_update_propagate_with_special_start_time(self): 189 | now_ts = 1411628779 190 | # start time is 1411628760 191 | point_seeds_list = [range(10, 20), range(1, 7)] 192 | mtime = None 193 | for i, point_seeds in enumerate(point_seeds_list): 194 | if i != 0: 195 | mtime = now_ts - max(point_seeds_list[i - 1]) 196 | points = [(now_ts - i, self._gen_val(i)) for i in point_seeds] 197 | self.storage.update(self.path, points, now_ts, mtime) 198 | from_ts = 1411628760 199 | until_ts = from_ts + 15 200 | series = self.storage.fetch(self.path, from_ts, until_ts, 201 | now=from_ts + 60 + 1) 202 | time_info = (from_ts, roundup(until_ts, 3), 3) 203 | values = [(17.0, 27.0), (14.0, 24.0), (11.0, 21.0), (10.0, 20.0), (5.0, 15.0)] 204 | expected = (time_info, values) 205 | self.assertEqual(series[1:], expected) 206 | 207 | def test_basic_update(self): 208 | now_ts = 1411628779 209 | point_seeds = [1, 2, 4, 5] 210 | points = [(now_ts - i, self._gen_val(i)) for i in point_seeds] 211 | self.storage.update(self.path, points, now_ts) 212 | 213 | from_ts = now_ts - 5 214 | series = self.storage.fetch(self.path, from_ts, now=now_ts) 215 | time_info = (from_ts, now_ts, 1) 216 | vals = [(5.0, 15.0), (4.0, 14.0), self.null_point, (2.0, 12.0), (1.0, 11.0)] 217 | expected = time_info, vals 218 | self.assertEqual(series[1:], expected) 219 | 220 | 221 | class TestMultiArchive(TestStorageBase): 222 | 223 | def _basic_setup(self): 224 | metric_name = 'sys.cpu.user' 225 | 226 | tag_list = [ 227 | 'host=webserver01,cpu=0', 228 | 'host=webserver01,cpu=1', 229 | 'host=webserver01,cpu=2', 230 | ] 231 | archive_list = [ 232 | (1, 60), 233 | (3, 60), 234 | (6, 60), 235 | ] 236 | x_files_factor = 5 237 | agg_name = 'min' 238 | return [metric_name, tag_list, archive_list, x_files_factor, agg_name] 239 | 240 | def test_time_range(self): 241 | now_ts = 1411628779 242 | # downsample time of chive2: 1411628760 (floor(1411628779. / (6*5))) 243 | point_seeds_list = [range(19, 30), range(5, 2)] 244 | mtime = None 245 | for i, point_seeds in enumerate(point_seeds_list): 246 | if i != 0: 247 | mtime = now_ts - max(point_seeds_list[i - 1]) 248 | points = [(now_ts - i, self._gen_val(i, num=3)) for i in point_seeds] 249 | self.storage.update(self.path, points, now_ts, mtime) 250 | from_ts = 1411628760 - 2 * 6 251 | until_ts = 1411628760 252 | series = self.storage.fetch(self.path, from_ts, until_ts, 253 | now=from_ts + 180 + 1) 254 | time_info = (from_ts, roundup(until_ts, 6), 6) 255 | values = [(26.0, 36.0, 46.0), (20.0, 30.0, 40.0)] 256 | expected = (time_info, values) 257 | self.assertEqual(series[1:], expected) 258 | -------------------------------------------------------------------------------- /twisted/plugins/rurouni_cache_plugin.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from zope.interface import implements 3 | 4 | from twisted.application.service import IServiceMaker 5 | from twisted.plugin import IPlugin 6 | 7 | from rurouni import service 8 | from rurouni import conf 9 | 10 | 11 | class RurouniServiceMaker(object): 12 | implements(IServiceMaker, IPlugin) 13 | 14 | tapname = 'rurouni-cache' 15 | description = 'Collect stats for graphite' 16 | options = conf.RurouniOptions 17 | 18 | def makeService(self, options): 19 | return service.createCacheService(options) 20 | 21 | 22 | serviceMaker = RurouniServiceMaker() 23 | --------------------------------------------------------------------------------