├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── bin ├── ceres-maintenance ├── ceres-node-create ├── ceres-node-read ├── ceres-node-write ├── ceres-tree-create ├── ceres-tree-find ├── convert-wsp-to-ceres └── slicecat ├── ceres.py ├── docs ├── Makefile ├── ceres.rst ├── conf.py ├── index.rst └── requirements.txt ├── plugins └── maintenance │ ├── defrag.py │ ├── merge.py │ ├── metadata.py │ ├── orphans.py │ ├── ratelimit.py │ ├── rollup.py │ ├── rollup_ng.py │ └── template.py ├── setup.py ├── test_requirements.txt └── tests ├── __init__.py └── test_ceres.py /.gitignore: -------------------------------------------------------------------------------- 1 | MANIFEST 2 | build 3 | docs/_build 4 | dist 5 | *.log 6 | *.pyc 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # http://travis-ci.org/#!/graphite-project/ceres 2 | sudo: false 3 | language: python 4 | python: 5 | - 2.7 6 | - 3.3 7 | - 3.4 8 | - 3.5 9 | - 3.6 10 | - "pypy" 11 | before_install: 12 | - pip install --upgrade pip 13 | install: 14 | - pip install -r test_requirements.txt 15 | - python setup.py install 16 | script: 17 | - nosetests 18 | - flake8 --max-line-length=100 --ignore=E111,E114,E121 ceres.py tests/*.py bin/* 19 | - flake8 --max-line-length=100 --ignore=E111,E114,E121,F821 plugins/maintenance/* 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ceres 2 | 3 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/e714322dad124c279d42b217a763bf6e)](https://www.codacy.com/app/graphite-project/ceres?utm_source=github.com&utm_medium=referral&utm_content=graphite-project/ceres&utm_campaign=badger) 4 | [![Build Status](https://secure.travis-ci.org/graphite-project/ceres.png)](http://travis-ci.org/graphite-project/ceres) 5 | [![FOSSA Status](https://app.fossa.io/api/projects/git%2Bhttps%3A%2F%2Fgithub.com%2Fgraphite-project%2Fceres.svg?type=shield)](https://app.fossa.io/projects/git%2Bhttps%3A%2F%2Fgithub.com%2Fgraphite-project%2Fceres?ref=badge_shield) 6 | 7 | Ceres is *not actively maintained*. 8 | 9 | Ceres is a component of [Graphite][] as one of the time-series storage options available for use. 10 | Ceres provides a file format for incoming metrics to be persisted when received from the network. 11 | See also [Whisper][] 12 | 13 | [Graphite]: https://github.com/graphite-project 14 | [Graphite Web]: https://github.com/graphite-project/graphite-web 15 | [Carbon]: https://github.com/graphite-project/carbon 16 | [Whisper]: https://github.com/graphite-project/whisper 17 | [Ceres]: https://github.com/graphite-project/ceres 18 | 19 | ## Overview 20 | 21 | Ceres is a time-series database format intended to replace [Whisper][] as the default storage 22 | format for [Graphite][]. In contrast with Whisper, Ceres is not a fixed-size database and is 23 | designed to better support sparse data of arbitrary fixed-size resolutions. This allows Graphite 24 | to distribute individual time-series across multiple servers or mounts. 25 | 26 | Expected features such as roll-up aggregation and data expiration are not provided by Ceres itself, 27 | but instead are implemented as maintenance plugins in [Carbon][]. 28 | -------------------------------------------------------------------------------- /bin/ceres-maintenance: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import os 5 | import time 6 | import traceback 7 | import posixfile 8 | from os.path import basename, splitext, exists, join, isfile, expanduser, abspath 9 | from optparse import OptionParser 10 | from ceres import getTree, CeresNode 11 | from multiprocessing import Pool 12 | 13 | # Make carbon imports available for some functionality 14 | root_dir = os.environ['GRAPHITE_ROOT'] = os.environ.get('GRAPHITE_ROOT', '/opt/graphite/') 15 | lib_dir = join(root_dir, 'lib') 16 | sys.path.append(lib_dir) 17 | 18 | try: 19 | from carbon.conf import settings, read_config 20 | HAVE_CARBON = True 21 | except ImportError: 22 | HAVE_CARBON = False 23 | 24 | 25 | EVENTS = ( 26 | 'maintenance_start', 27 | 'maintenance_complete', 28 | 'node_found', 29 | 'directory_found', 30 | 'directory_empty', 31 | ) 32 | 33 | 34 | class Plugin: 35 | context = { 36 | 'params': {} 37 | } 38 | 39 | def __init__(self, path): 40 | self.path = path 41 | self.name = basename(splitext(path)[0]) 42 | self.namespace = {} 43 | self.namespace.update(Plugin.context) 44 | self.event_handlers = {} 45 | 46 | def load(self): 47 | exec(compile(open(self.path, "rb").read(), self.path, 'exec'), self.namespace) 48 | for event in EVENTS: 49 | if event in self.namespace: 50 | self.event_handlers[event] = self.namespace[event] 51 | 52 | def handle_event(self, event, *args, **kwargs): 53 | handler = self.event_handlers.get(event) 54 | if handler: 55 | handler(*args, **kwargs) 56 | 57 | 58 | class PluginFinder: 59 | def __init__(self, plugin_dir): 60 | self.plugin_dir = abspath(expanduser(plugin_dir)) 61 | 62 | def find_plugins(self, plugin_refs): 63 | for ref in plugin_refs: 64 | if ref.startswith('~'): 65 | ref = expanduser(ref) 66 | 67 | if isfile(ref): 68 | yield Plugin(ref) 69 | 70 | else: 71 | filename = "%s.py" % ref 72 | plugin_path = join(self.plugin_dir, filename) 73 | 74 | if isfile(plugin_path): 75 | yield Plugin(plugin_path) 76 | else: 77 | raise PluginNotFound("The plugin '%s' could not be found in %s" % (ref, self.plugin_dir)) 78 | 79 | 80 | class PluginNotFound(Exception): 81 | pass 82 | 83 | 84 | class EventDispatcher: 85 | def __init__(self): 86 | self.handlers = {} 87 | 88 | def add_handler(self, event, handler): 89 | if event not in self.handlers: 90 | self.handlers[event] = [] 91 | self.handlers[event].append(handler) 92 | 93 | def dispatch(self, event, *args, **kwargs): 94 | for handler in self.handlers.get(event, []): 95 | try: 96 | handler(*args, **kwargs) 97 | except: 98 | log("--- Error in %s event-handler ---" % event) 99 | log(traceback.format_exc()) 100 | log('-' * 80) 101 | 102 | __call__ = dispatch 103 | 104 | 105 | def daemonize(): 106 | if os.fork() > 0: 107 | sys.exit(0) 108 | os.setsid() 109 | if os.fork() > 0: 110 | sys.exit(0) 111 | si = open('/dev/null', 'r') 112 | so = open('/dev/null', 'a+') 113 | se = open('/dev/null', 'a+', 0) 114 | os.dup2(si.fileno(), sys.stdin.fileno()) 115 | os.dup2(so.fileno(), sys.stdout.fileno()) 116 | os.dup2(se.fileno(), sys.stderr.fileno()) 117 | 118 | 119 | # Utility functions (exist in the plugin namespace) 120 | logfile = open('/dev/null', 'w') 121 | 122 | 123 | def log(message): 124 | logfile.write("[%s] %s\n" % (time.ctime(), message.strip())) 125 | logfile.flush() 126 | 127 | 128 | class MissingRequiredParam(Exception): 129 | def __init__(self, param): 130 | Exception.__init__(self) 131 | self.param = param 132 | 133 | 134 | class PluginFail(Exception): 135 | pass 136 | 137 | 138 | if __name__ == '__main__': 139 | default_plugindir = join(root_dir, 'plugins', 'maintenance') 140 | parser = OptionParser(usage='''%prog [options] plugin [plugin2 ...] [key=val ...]''') 141 | parser.add_option('--daemon', action='store_true') 142 | parser.add_option('--verbose', action='store_true', help="Increase truthiness") 143 | parser.add_option('--log', help="Write to the given log file instead of stdout") 144 | parser.add_option('--lock', help="lock file for maintenance proc") 145 | parser.add_option('--workers', help="Number of workers to run (default: 4)", type=int, default=4) 146 | parser.add_option('--root', default='/opt/graphite/storage/ceres/', 147 | help="Specify were to perform maintenance " 148 | "(default: /opt/graphite/storage/ceres/)") 149 | parser.add_option('--plugindir', default=default_plugindir, 150 | help="Specify path to the plugin directory (default: %s)" % default_plugindir) 151 | 152 | options, args = parser.parse_args() 153 | 154 | if options.lock: 155 | lock_timeout = 60 156 | got_lock = 0 157 | while lock_timeout: 158 | try: 159 | lock = posixfile.open(options.lock, 'w') 160 | lock.lock('w') 161 | got_lock = 1 162 | break 163 | except IOError as e: 164 | if e[0] == 11: 165 | lock_timeout = lock_timeout - 1 166 | time.sleep(1) 167 | else: 168 | print("can't get lock, reason: %s" % e[1]) 169 | sys.exit(1) 170 | except: 171 | print("failed to get lock for some unknown reason") 172 | sys.exit(1) 173 | 174 | if not got_lock: 175 | print("Failed to get lock for 60s") 176 | sys.exit(1) 177 | 178 | # Magic plugin vars 179 | Plugin.context['log'] = log 180 | Plugin.context['MissingRequiredParam'] = MissingRequiredParam 181 | Plugin.context['PluginFail'] = PluginFail 182 | Plugin.context['params']['maxSlicesPerNode'] = 10 183 | Plugin.context['params']['maxSliceGap'] = 80 184 | Plugin.context['params']['mode'] = 'operate' 185 | 186 | # Read carbon settings 187 | if HAVE_CARBON: 188 | configopts = dict(config=None, instance='ceres-maintenance', pidfile=None, logdir=None) 189 | program_settings = read_config('ceres-maintenance', configopts) 190 | settings.update(program_settings) 191 | Plugin.context['settings'] = settings 192 | else: 193 | Plugin.context['settings'] = {} 194 | 195 | # User-defined plugin vars 196 | plugin_args = [] 197 | for arg in args: 198 | if '=' in arg: 199 | key, value = arg.split('=') 200 | Plugin.context['params'][key] = value 201 | else: 202 | plugin_args.append(arg) 203 | 204 | if len(plugin_args) < 1: 205 | print("At least one plugin is required.") 206 | parser.print_usage() 207 | sys.exit(1) 208 | 209 | tree = getTree(options.root) 210 | if tree is None: 211 | print("%s is not inside a CeresTree" % options.root) 212 | sys.exit(1) 213 | 214 | # Load the plugins and setup event handlers 215 | finder = PluginFinder(options.plugindir) 216 | try: 217 | plugins = finder.find_plugins(plugin_args) 218 | except PluginNotFound as e: 219 | print(e.message, ' searched in %s' % options.plugindir) 220 | sys.exit(1) 221 | 222 | dispatcher = EventDispatcher() 223 | 224 | def dispatch(event, *args): 225 | if options.verbose: 226 | log("%s :: %s" % (event, args)) 227 | dispatcher(event, *args) 228 | 229 | for plugin in plugins: 230 | try: 231 | plugin.load() 232 | except MissingRequiredParam as e: 233 | print("Failed to load %s plugin: required param '%s' " 234 | "must be specified" % (plugin.name, e.param)) 235 | sys.exit(1) 236 | except PluginFail as e: 237 | print("Failed to load %s plugin: %s" % (plugin.name, e.message)) 238 | sys.exit(1) 239 | 240 | for event, handler in plugin.event_handlers.items(): 241 | dispatcher.add_handler(event, handler) 242 | 243 | # Daemonize & logify 244 | if options.daemon: 245 | daemonize() 246 | 247 | if options.log: 248 | logfile = open(options.log, 'a') 249 | 250 | if not (options.daemon or options.log): 251 | logfile = sys.stdout 252 | 253 | # Begin walking the tree 254 | dispatch('maintenance_start', tree) 255 | 256 | proc_pool = Pool(processes=int(options.workers)) 257 | 258 | for current_dir, subdirs, files in os.walk(options.root): 259 | subdirs.sort() 260 | for subdir in subdirs: 261 | if subdir == '.ceres-tree': 262 | continue 263 | 264 | path = join(current_dir, subdir) 265 | 266 | if os.listdir(path): 267 | 268 | if exists(join(path, '.ceres-node')): 269 | nodePath = tree.getNodePath(path) 270 | node = CeresNode(tree, nodePath, path) 271 | proc_pool.apply_async(dispatch, ('node_found', node,)) 272 | else: 273 | dispatch('directory_found', path) 274 | if not os.listdir(path): # check again in case a handler empties it 275 | dispatch('directory_empty', path) 276 | 277 | else: 278 | dispatch('directory_empty', path) 279 | 280 | proc_pool.close() 281 | proc_pool.join() 282 | dispatch('maintenance_complete', tree) 283 | -------------------------------------------------------------------------------- /bin/ceres-node-create: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import sys 5 | from optparse import OptionParser 6 | from ceres import CeresTree, getTree 7 | 8 | 9 | parser = OptionParser(usage='''%prog [options] 10 | If --tree is specified, is taken as a node path 11 | Otherwise is taken as a filesystem path 12 | ''') 13 | parser.add_option('--tree', default=None) 14 | parser.add_option('--step', default=60, type='int', help="Default time step") 15 | 16 | options, args = parser.parse_args() 17 | 18 | if not args: 19 | parser.print_usage() 20 | sys.exit(1) 21 | 22 | 23 | if options.tree: 24 | nodePath = args[0] 25 | tree = CeresTree(options.tree) 26 | 27 | else: 28 | fsPath = args[0] 29 | tree = getTree(fsPath) 30 | 31 | if not tree: 32 | print("error: %s is not in a ceres tree" % fsPath) 33 | sys.exit(1) 34 | 35 | nodePath = tree.getNodePath(fsPath) 36 | 37 | tree.createNode(nodePath, timeStep=options.step) 38 | -------------------------------------------------------------------------------- /bin/ceres-node-read: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import sys 5 | import time 6 | from optparse import OptionParser 7 | from ceres import CeresTree, getTree 8 | 9 | 10 | parser = OptionParser(usage='''%prog [options] 11 | If --tree is specified, is taken as a node path 12 | Otherwise is taken as a filesystem path 13 | ''') 14 | parser.add_option('--fromtime', default=int(time.time() - 900), type='int') 15 | parser.add_option('--untiltime', default=int(time.time()), type='int') 16 | parser.add_option('--tree', default=None) 17 | parser.add_option('--batch', action='store_true', help="Use numeric timestamps") 18 | 19 | options, args = parser.parse_args() 20 | 21 | if not args: 22 | parser.print_usage() 23 | sys.exit(1) 24 | 25 | 26 | if options.tree: 27 | nodePath = args[0] 28 | tree = CeresTree(options.tree) 29 | 30 | else: 31 | fsPath = args[0] 32 | tree = getTree(fsPath) 33 | 34 | if not tree: 35 | print("error: %s is not in a ceres tree" % fsPath) 36 | sys.exit(1) 37 | 38 | nodePath = tree.getNodePath(fsPath) 39 | 40 | results = tree.fetch(nodePath, options.fromtime, options.untiltime) 41 | 42 | for (timestamp, value) in results: 43 | if options.batch: 44 | print("%d\t%s" % (timestamp, value)) 45 | else: 46 | print("%s\t%s" % (time.ctime(timestamp), value)) 47 | -------------------------------------------------------------------------------- /bin/ceres-node-write: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import sys 5 | import time 6 | from optparse import OptionParser 7 | from ceres import CeresTree, getTree 8 | 9 | 10 | parser = OptionParser(usage='''%prog [options] [datapoint]+ 11 | If --tree is specified, is taken as a node path 12 | Otherwise is taken as a filesystem path 13 | 14 | Each datapoint is of the form : where may 15 | be a UNIX epoch time or the character 'N' to indicate 'now'. 16 | ''') 17 | parser.add_option('--tree', default=None) 18 | 19 | options, args = parser.parse_args() 20 | 21 | if not args: 22 | parser.print_usage() 23 | sys.exit(1) 24 | 25 | 26 | if options.tree: 27 | nodePath = args[0] 28 | tree = CeresTree(options.tree) 29 | 30 | else: 31 | fsPath = args[0] 32 | tree = getTree(fsPath) 33 | 34 | if not tree: 35 | print("error: %s is not in a ceres tree" % fsPath) 36 | sys.exit(1) 37 | 38 | nodePath = tree.getNodePath(fsPath) 39 | 40 | 41 | datapoints = [] 42 | now = time.time() 43 | for datapoint in args[1:]: 44 | timestamp, value = datapoint.split(':', 1) 45 | 46 | if timestamp == 'N': 47 | timestamp = now 48 | else: 49 | timestamp = float(timestamp) 50 | 51 | value = float(value) 52 | datapoints.append((timestamp, value)) 53 | 54 | datapoints.sort() 55 | 56 | if not args: 57 | print("error: no datapoints specified") 58 | parser.print_usage() 59 | sys.exit(1) 60 | 61 | 62 | node = tree.getNode(nodePath) 63 | node.write(datapoints) 64 | -------------------------------------------------------------------------------- /bin/ceres-tree-create: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import sys 5 | from optparse import OptionParser 6 | from ceres import CeresTree 7 | 8 | 9 | parser = OptionParser(usage='''%prog [options] [property=value]*''') 10 | parser.add_option('--verbose', action='store_true') 11 | 12 | options, args = parser.parse_args() 13 | 14 | if not args: 15 | print("You must specify a root directory for the tree") 16 | parser.print_usage() 17 | sys.exit(1) 18 | 19 | 20 | root_dir = args[0] 21 | props = {} 22 | 23 | for arg in args[1:]: 24 | prop, value = arg.split('=', 1) 25 | 26 | try: # convert numeric types 27 | value = float(value) 28 | except: 29 | try: 30 | value = int(value) 31 | except: 32 | pass 33 | 34 | props[prop] = value 35 | 36 | if options.verbose: 37 | print("Creating tree at %s with props=%s" % (root_dir, props)) 38 | 39 | tree = CeresTree.createTree(root_dir, **props) 40 | -------------------------------------------------------------------------------- /bin/ceres-tree-find: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import sys 5 | from optparse import OptionParser 6 | from ceres import CeresTree 7 | 8 | 9 | parser = OptionParser(usage='''%prog [options] ''') 10 | parser.add_option('--fromtime', default=None, type='int') 11 | parser.add_option('--untiltime', default=None, type='int') 12 | parser.add_option('--fspath', action='store_true') 13 | 14 | options, args = parser.parse_args() 15 | 16 | if len(args) < 2: 17 | parser.print_usage() 18 | sys.exit(1) 19 | 20 | 21 | root_dir = args[0] 22 | pattern = args[1] 23 | tree = CeresTree(root_dir) 24 | 25 | for node in tree.find(pattern, fromTime=options.fromtime, untilTime=options.untiltime): 26 | if options.fspath: 27 | print(node.fsPath) 28 | else: 29 | print(node.nodePath) 30 | -------------------------------------------------------------------------------- /bin/convert-wsp-to-ceres: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import sys 5 | import os 6 | import time 7 | from os.path import dirname, basename, isdir, join 8 | from optparse import OptionParser 9 | import whisper 10 | import ceres 11 | 12 | 13 | parser = OptionParser(usage='''%prog [options] ''') 14 | parser.add_option('--verbose', action='store_true') 15 | parser.add_option('--delete', action='store_true') 16 | 17 | options, args = parser.parse_args() 18 | 19 | if not args: 20 | print("You must specify a wsp file") 21 | parser.print_usage() 22 | sys.exit(1) 23 | 24 | 25 | wsp_file = args[0] 26 | wsp_dir = dirname(wsp_file) 27 | metric_name = basename(wsp_file)[:-4] # strip .wsp 28 | ceres_node_dir = join(wsp_dir, metric_name) 29 | 30 | if isdir(ceres_node_dir): 31 | print("error: ceres node directory already exists (%s)" % ceres_node_dir) 32 | sys.exit(1) 33 | 34 | tree = ceres.getTree(ceres_node_dir) 35 | 36 | if not tree: 37 | print("error: the specified path is not in a ceres tree") 38 | sys.exit(1) 39 | 40 | nodePath = tree.getNodePath(ceres_node_dir) 41 | 42 | if options.verbose: 43 | print("extracting datapoints from wsp file") 44 | 45 | timeInfo, values = whisper.fetch(wsp_file, fromTime=0, untilTime=time.time()) 46 | datapoints = zip(range(*timeInfo), values) 47 | datapoints = [(t, v) for (t, v) in datapoints if v is not None] 48 | 49 | if options.verbose: 50 | print("creating ceres node %s" % nodePath) 51 | 52 | node = tree.createNode(nodePath) 53 | 54 | if options.verbose: 55 | print("importing %d datapoints" % len(datapoints)) 56 | 57 | node.write(datapoints) 58 | 59 | if options.delete: 60 | if options.verbose: 61 | print("deleting original wsp file: %s" % wsp_file) 62 | os.unlink(wsp_file) 63 | 64 | if options.verbose: 65 | print("conversion successful") 66 | -------------------------------------------------------------------------------- /bin/slicecat: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from __future__ import print_function 3 | 4 | import sys 5 | import os 6 | import time 7 | import struct 8 | from optparse import OptionParser 9 | 10 | 11 | parser = OptionParser(usage='%prog [options] ') 12 | 13 | options, args = parser.parse_args() 14 | 15 | if not args: 16 | parser.print_usage() 17 | sys.exit(1) 18 | 19 | path = args[0] 20 | 21 | filename = os.path.basename(path) 22 | timestamp, timeStep = filename[:-6].split('@', 1) 23 | timestamp, timeStep = int(timestamp), int(timeStep) 24 | 25 | packedValues = open(path, 'rb').read() 26 | format = '!' + ('d' * (len(packedValues) / 8)) 27 | 28 | values = struct.unpack(format, packedValues) 29 | 30 | for value in values: 31 | print("[%d]\t%s\t%s" % (timestamp, time.ctime(timestamp), value)) 32 | timestamp += timeStep 33 | -------------------------------------------------------------------------------- /ceres.py: -------------------------------------------------------------------------------- 1 | # Copyright 2011 Chris Davis 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # 15 | # 16 | 17 | # Ceres requires Python 2.7 or newer 18 | import itertools 19 | import os 20 | import struct 21 | import json 22 | import errno 23 | from math import isnan 24 | from os.path import isdir, exists, join, dirname, abspath, getsize, getmtime 25 | from glob import glob 26 | from bisect import bisect_left 27 | 28 | izip = getattr(itertools, 'izip', zip) 29 | 30 | try: 31 | import fcntl 32 | CAN_LOCK = True 33 | except ImportError: 34 | CAN_LOCK = False 35 | 36 | LOCK_WRITES = False 37 | TIMESTAMP_FORMAT = "!L" 38 | TIMESTAMP_SIZE = struct.calcsize(TIMESTAMP_FORMAT) 39 | DATAPOINT_FORMAT = "!d" 40 | DATAPOINT_SIZE = struct.calcsize(DATAPOINT_FORMAT) 41 | NAN = float('nan') 42 | PACKED_NAN = struct.pack(DATAPOINT_FORMAT, NAN) 43 | MAX_SLICE_GAP = 80 44 | DEFAULT_TIMESTEP = 60 45 | DEFAULT_NODE_CACHING_BEHAVIOR = 'all' 46 | DEFAULT_SLICE_CACHING_BEHAVIOR = 'none' 47 | SLICE_AGGREGATION_METHODS = ['average', 'sum', 'last', 'max', 'min'] 48 | SLICE_PERMS = 0o644 49 | DIR_PERMS = 0o755 50 | 51 | 52 | class CeresTree(object): 53 | """Represents a tree of Ceres metrics contained within a single path on disk 54 | This is the primary Ceres API. 55 | 56 | :param root: The directory root of the Ceres tree 57 | 58 | .. note:: Use :func:`createTree` to initialize and instantiate a new CeresTree 59 | 60 | .. seealso:: :func:`setDefaultNodeCachingBehavior` to adjust caching behavior 61 | """ 62 | def __init__(self, root): 63 | if isdir(root): 64 | self.root = abspath(root) 65 | else: 66 | raise ValueError("Invalid root directory '%s'" % root) 67 | self.nodeCache = {} 68 | self.nodeCachingBehavior = DEFAULT_NODE_CACHING_BEHAVIOR 69 | 70 | def __repr__(self): 71 | return "" % (id(self), self.root) 72 | __str__ = __repr__ 73 | 74 | @classmethod 75 | def createTree(cls, root, **props): 76 | """Create and returns a new Ceres tree with the given properties 77 | 78 | :param root: The root directory of the new Ceres tree 79 | :param \*\*props: Arbitrary key-value properties to store as tree metadata 80 | 81 | :returns: :class:`CeresTree` 82 | """ 83 | 84 | ceresDir = join(root, '.ceres-tree') 85 | if not isdir(ceresDir): 86 | os.makedirs(ceresDir, DIR_PERMS) 87 | 88 | for prop, value in props.items(): 89 | propFile = join(ceresDir, prop) 90 | with open(propFile, 'w') as fh: 91 | fh.write(str(value)) 92 | 93 | return cls(root) 94 | 95 | def walk(self, **kwargs): 96 | """Iterate through the nodes contained in this :class:`CeresTree` 97 | 98 | :param \*\*kwargs: Options to pass to :func:`os.walk` 99 | 100 | :returns: An iterator yielding :class:`CeresNode` objects 101 | """ 102 | for (fsPath, subdirs, filenames) in os.walk(self.root, **kwargs): 103 | if CeresNode.isNodeDir(fsPath): 104 | nodePath = self.getNodePath(fsPath) 105 | yield CeresNode(self, nodePath, fsPath) 106 | 107 | def getFilesystemPath(self, nodePath): 108 | """Get the on-disk path of a Ceres node given a metric name 109 | 110 | :param nodePath: A metric name e.g. ``carbon.agents.graphite-a.cpuUsage`` 111 | 112 | :returns: The Ceres node path on disk""" 113 | return join(self.root, nodePath.replace('.', os.sep)) 114 | 115 | def getNodePath(self, fsPath): 116 | """Get the metric name of a Ceres node given the on-disk path 117 | 118 | :param fsPath: The filesystem path of a Ceres node 119 | 120 | :returns: A metric name 121 | 122 | :raises ValueError: When `fsPath` is not a path within the :class:`CeresTree` 123 | """ 124 | fsPath = abspath(fsPath) 125 | if not fsPath.startswith(self.root): 126 | raise ValueError("path '%s' not beneath tree root '%s'" % (fsPath, self.root)) 127 | 128 | nodePath = fsPath[len(self.root):].strip(os.sep).replace(os.sep, '.') 129 | return nodePath 130 | 131 | def hasNode(self, nodePath): 132 | """Returns whether the Ceres tree contains the given metric 133 | 134 | :param nodePath: A metric name e.g. ``carbon.agents.graphite-a.cpuUsage`` 135 | 136 | :returns: `True` or `False`""" 137 | return isdir(self.getFilesystemPath(nodePath)) 138 | 139 | def setNodeCachingBehavior(self, behavior): 140 | """Set node caching behavior. 141 | 142 | :param behavior: See :func:`getNode` for valid behavior values 143 | """ 144 | behavior = behavior.lower() 145 | if behavior not in ('none', 'all'): 146 | raise ValueError("invalid caching behavior '%s'" % behavior) 147 | 148 | self.nodeCachingBehavior = behavior 149 | self.nodeCache = {} 150 | 151 | def getNode(self, nodePath): 152 | """Returns a Ceres node given a metric name. Because nodes are looked up in 153 | every read and write, a caching mechanism is provided. Cache behavior is set 154 | using :func:`setNodeCachingBehavior` and defaults to the value set in 155 | ``DEFAULT_NODE_CACHING_BEHAVIOR`` 156 | 157 | The following behaviors are available: 158 | 159 | * `none` - Node is read from the filesystem at every access. 160 | * `all` (default) - All nodes are cached. 161 | 162 | :param nodePath: A metric name 163 | 164 | :returns: :class:`CeresNode` or `None` 165 | """ 166 | if self.nodeCachingBehavior == 'all': 167 | if nodePath not in self.nodeCache: 168 | fsPath = self.getFilesystemPath(nodePath) 169 | if CeresNode.isNodeDir(fsPath): 170 | self.nodeCache[nodePath] = CeresNode(self, nodePath, fsPath) 171 | else: 172 | return None 173 | 174 | return self.nodeCache[nodePath] 175 | 176 | elif self.nodeCachingBehavior == 'none': 177 | fsPath = self.getFilesystemPath(nodePath) 178 | if CeresNode.isNodeDir(fsPath): 179 | return CeresNode(self, nodePath, fsPath) 180 | else: 181 | return None 182 | 183 | else: 184 | raise ValueError("invalid caching behavior configured '%s'" % self.nodeCachingBehavior) 185 | 186 | def find(self, nodePattern, fromTime=None, untilTime=None): 187 | """Find nodes which match a wildcard pattern, optionally filtering on 188 | a time range 189 | 190 | :param nodePattern: A glob-style metric wildcard 191 | :param fromTime: Optional interval start time in unix-epoch. 192 | :param untilTime: Optional interval end time in unix-epoch. 193 | 194 | :returns: An iterator yielding :class:`CeresNode` objects 195 | """ 196 | for fsPath in glob(self.getFilesystemPath(nodePattern)): 197 | if CeresNode.isNodeDir(fsPath): 198 | nodePath = self.getNodePath(fsPath) 199 | node = self.getNode(nodePath) 200 | 201 | if fromTime is None and untilTime is None: 202 | yield node 203 | elif node.hasDataForInterval(fromTime, untilTime): 204 | yield node 205 | 206 | def createNode(self, nodePath, **properties): 207 | """Creates a new metric given a new metric name and optional per-node metadata 208 | :param nodePath: The new metric name. 209 | :param \*\*properties: Arbitrary key-value properties to store as metric metadata. 210 | 211 | :returns: :class:`CeresNode` 212 | """ 213 | return CeresNode.create(self, nodePath, **properties) 214 | 215 | def store(self, nodePath, datapoints): 216 | """Store a list of datapoints associated with a metric 217 | :param nodePath: The metric name to write to e.g. ``carbon.agents.graphite-a.cpuUsage`` 218 | :param datapoints: A list of datapoint tuples: ``[(timestamp, value), ...]`` 219 | """ 220 | node = self.getNode(nodePath) 221 | 222 | if node is None: 223 | raise NodeNotFound("The node '%s' does not exist in this tree" % nodePath) 224 | 225 | node.write(datapoints) 226 | 227 | def fetch(self, nodePath, fromTime, untilTime): 228 | """Fetch data within a given interval from the given metric 229 | 230 | :param nodePath: The metric name to fetch from 231 | :param fromTime: Requested interval start time in unix-epoch. 232 | :param untilTime: Requested interval end time in unix-epoch. 233 | 234 | :returns: :class:`TimeSeriesData` 235 | :raises: :class:`NodeNotFound`, :class:`InvalidRequest` 236 | """ 237 | node = self.getNode(nodePath) 238 | 239 | if not node: 240 | raise NodeNotFound("the node '%s' does not exist in this tree" % nodePath) 241 | 242 | return node.read(fromTime, untilTime) 243 | 244 | 245 | class CeresNode(object): 246 | """A :class:`CeresNode` represents a single time-series metric of a given `timeStep` 247 | (its seconds-per-point resolution) and containing arbitrary key-value metadata. 248 | 249 | A :class:`CeresNode` is associated with its most precise `timeStep`. This `timeStep` is the finest 250 | resolution that can be used for writing, though a :class:`CeresNode` can contain and read data with 251 | other, less-precise `timeStep` values in its underlying :class:`CeresSlice` data. 252 | 253 | :param tree: The :class:`CeresTree` this node is associated with 254 | :param nodePath: The name of the metric this node represents 255 | :param fsPath: The filesystem path of this metric 256 | 257 | .. note:: This class generally should be instantiated through use of :class:`CeresTree`. See 258 | :func:`CeresTree.createNode` and :func:`CeresTree.getNode` 259 | 260 | .. seealso:: :func:`setDefaultSliceCachingBehavior` to adjust caching behavior 261 | """ 262 | __slots__ = ('tree', 'nodePath', 'fsPath', 263 | 'metadataFile', 'timeStep', 'aggregationMethod', 264 | 'sliceCache', 'sliceCachingBehavior') 265 | 266 | def __init__(self, tree, nodePath, fsPath): 267 | self.tree = tree 268 | self.nodePath = nodePath 269 | self.fsPath = fsPath 270 | self.metadataFile = join(fsPath, '.ceres-node') 271 | self.timeStep = None 272 | self.aggregationMethod = 'average' 273 | self.sliceCache = None 274 | self.sliceCachingBehavior = DEFAULT_SLICE_CACHING_BEHAVIOR 275 | 276 | def __repr__(self): 277 | return "" % (id(self), self.nodePath) 278 | __str__ = __repr__ 279 | 280 | @classmethod 281 | def create(cls, tree, nodePath, **properties): 282 | """Create a new :class:`CeresNode` on disk with the specified properties. 283 | 284 | :param tree: The :class:`CeresTree` this node is associated with 285 | :param nodePath: The name of the metric this node represents 286 | :param \*\*properties: A set of key-value properties to be associated with this node 287 | 288 | A :class:`CeresNode` always has the `timeStep` property which is an integer value representing 289 | the precision of the node in seconds-per-datapoint. E.g. a value of ``60`` represents one datapoint 290 | per minute. If no `timeStep` is specified at creation, the value of ``ceres.DEFAULT_TIMESTEP`` is 291 | used 292 | 293 | :returns: :class:`CeresNode` 294 | """ 295 | # Create the node directory 296 | fsPath = tree.getFilesystemPath(nodePath) 297 | os.makedirs(fsPath, DIR_PERMS) 298 | 299 | properties['timeStep'] = properties.get('timeStep', DEFAULT_TIMESTEP) 300 | # Create the initial metadata 301 | node = cls(tree, nodePath, fsPath) 302 | node.writeMetadata(properties) 303 | 304 | # Create the initial data file 305 | # timeStep = properties['timeStep'] 306 | # now = int( time.time() ) 307 | # baseTime = now - (now % timeStep) 308 | # slice = CeresSlice.create(node, baseTime, timeStep) 309 | 310 | return node 311 | 312 | @staticmethod 313 | def isNodeDir(path): 314 | """Tests whether the given path is a :class:`CeresNode` 315 | 316 | :param path: Path to test 317 | :returns `True` or `False` 318 | """ 319 | return isdir(path) and exists(join(path, '.ceres-node')) 320 | 321 | @classmethod 322 | def fromFilesystemPath(cls, fsPath): 323 | """Instantiate a :class:`CeresNode` from the on-disk path of an existing node 324 | 325 | :params fsPath: The filesystem path of an existing node 326 | :returns: :class:`CeresNode` 327 | """ 328 | dirPath = dirname(fsPath) 329 | 330 | while True: 331 | ceresDir = join(dirPath, '.ceres-tree') 332 | if isdir(ceresDir): 333 | tree = CeresTree(dirPath) 334 | nodePath = tree.getNodePath(fsPath) 335 | return cls(tree, nodePath, fsPath) 336 | 337 | dirPath = dirname(dirPath) 338 | 339 | if dirPath == '/': 340 | raise ValueError("the path '%s' is not in a ceres tree" % fsPath) 341 | 342 | @property 343 | def slice_info(self): 344 | """A property providing a list of current information about each slice 345 | 346 | :returns: ``[(startTime, endTime, timeStep), ...]`` 347 | """ 348 | return [(slice.startTime, slice.endTime, slice.timeStep) for slice in self.slices] 349 | 350 | def readMetadata(self): 351 | """Update node metadata from disk 352 | 353 | :raises: :class:`CorruptNode` 354 | """ 355 | with open(self.metadataFile, 'r') as fh: 356 | try: 357 | metadata = json.load(fh) 358 | self.timeStep = int(metadata['timeStep']) 359 | if metadata.get('aggregationMethod'): 360 | self.aggregationMethod = metadata['aggregationMethod'] 361 | return metadata 362 | except (KeyError, IOError, ValueError) as e: 363 | raise CorruptNode(self, "Unable to parse node metadata: %s" % e.args) 364 | 365 | def writeMetadata(self, metadata): 366 | """Writes new metadata to disk 367 | 368 | :param metadata: a JSON-serializable dict of node metadata 369 | """ 370 | self.timeStep = int(metadata['timeStep']) 371 | with open(self.metadataFile, 'w') as fh: 372 | json.dump(metadata, fh) 373 | 374 | @property 375 | def slices(self): 376 | """A property providing access to information about this node's underlying slices. Because this 377 | information is accessed in every read and write, a caching mechanism is provided. Cache behavior is 378 | set using :func:`setSliceCachingBehavior` and defaults to the value set in 379 | ``DEFAULT_SLICE_CACHING_BEHAVIOR`` 380 | 381 | The following behaviors are available: 382 | 383 | * `none` (default) - Slice information is read from the filesystem at every access 384 | * `latest` - The latest slice is served from cache, all others from disk. Reads and writes of recent 385 | data are most likely to be in the latest slice 386 | * `all` - All slices are cached. The cache is only refreshed on new slice creation or deletion 387 | 388 | :returns: ``[(startTime, timeStep), ...]`` 389 | """ 390 | if self.sliceCache: 391 | if self.sliceCachingBehavior == 'all': 392 | for slice in self.sliceCache: 393 | yield slice 394 | 395 | elif self.sliceCachingBehavior == 'latest': 396 | yield self.sliceCache 397 | infos = self.readSlices() 398 | for info in infos[1:]: 399 | yield CeresSlice(self, *info) 400 | 401 | else: 402 | if self.sliceCachingBehavior == 'all': 403 | self.sliceCache = [CeresSlice(self, *info) for info in self.readSlices()] 404 | for slice in self.sliceCache: 405 | yield slice 406 | 407 | elif self.sliceCachingBehavior == 'latest': 408 | infos = self.readSlices() 409 | if infos: 410 | self.sliceCache = CeresSlice(self, *infos[0]) 411 | yield self.sliceCache 412 | 413 | for info in infos[1:]: 414 | yield CeresSlice(self, *info) 415 | 416 | elif self.sliceCachingBehavior == 'none': 417 | for info in self.readSlices(): 418 | yield CeresSlice(self, *info) 419 | 420 | else: 421 | raise ValueError("invalid caching behavior configured '%s'" % self.sliceCachingBehavior) 422 | 423 | def readSlices(self): 424 | """Read slice information from disk 425 | 426 | :returns: ``[(startTime, timeStep), ...]`` 427 | """ 428 | if not exists(self.fsPath): 429 | raise NodeDeleted() 430 | 431 | slice_info = [] 432 | for filename in os.listdir(self.fsPath): 433 | if filename.endswith('.slice'): 434 | startTime, timeStep = filename[:-6].split('@') 435 | slice_info.append((int(startTime), int(timeStep))) 436 | 437 | slice_info.sort(reverse=True) 438 | return slice_info 439 | 440 | def setSliceCachingBehavior(self, behavior): 441 | """Set slice caching behavior. 442 | 443 | :param behavior: See :func:`slices` for valid behavior values 444 | """ 445 | behavior = behavior.lower() 446 | if behavior not in ('none', 'all', 'latest'): 447 | raise ValueError("invalid caching behavior '%s'" % behavior) 448 | 449 | self.sliceCachingBehavior = behavior 450 | self.sliceCache = None 451 | 452 | def clearSliceCache(self): 453 | """Clear slice cache, forcing a refresh from disk at the next access""" 454 | self.sliceCache = None 455 | 456 | def hasDataForInterval(self, fromTime, untilTime): 457 | """Test whether this node has any data in the given time interval. All slices are inspected 458 | which will trigger a read of slice information from disk if slice cache behavior is set to `latest` 459 | or `none` (See :func:`slices`) 460 | 461 | :param fromTime: Beginning of interval in unix epoch seconds 462 | :param untilTime: End of interval in unix epoch seconds 463 | :returns `True` or `False` 464 | """ 465 | slices = list(self.slices) 466 | if not slices: 467 | return False 468 | 469 | earliestData = slices[-1].startTime 470 | latestData = slices[0].endTime 471 | 472 | return ((fromTime is None) or (fromTime < latestData)) and \ 473 | ((untilTime is None) or (untilTime > earliestData)) 474 | 475 | def read(self, fromTime, untilTime): 476 | """Read data from underlying slices and return as a single time-series 477 | 478 | :param fromTime: Beginning of interval in unix epoch seconds 479 | :param untilTime: End of interval in unix epoch seconds 480 | :returns: :class:`TimeSeriesData` 481 | """ 482 | if self.timeStep is None: 483 | self.readMetadata() 484 | 485 | # Normalize the timestamps to fit proper intervals 486 | fromTime = int(fromTime - (fromTime % self.timeStep)) 487 | untilTime = int(untilTime - (untilTime % self.timeStep)) 488 | 489 | sliceBoundary = None # to know when to split up queries across slices 490 | resultValues = [] 491 | earliestData = None 492 | timeStep = self.timeStep 493 | method = self.aggregationMethod 494 | 495 | for slice in self.slices: 496 | # If there was a prior slice covering the requested interval, dont ask for that data again 497 | if (sliceBoundary is not None) and untilTime > sliceBoundary: 498 | requestUntilTime = sliceBoundary 499 | else: 500 | requestUntilTime = untilTime 501 | 502 | # if the requested interval starts after the start of this slice 503 | if fromTime >= slice.startTime: 504 | try: 505 | series = slice.read(fromTime, requestUntilTime) 506 | except NoData: 507 | break 508 | 509 | if series.timeStep != timeStep: 510 | if len(resultValues) == 0: 511 | # First slice holding series data, this becomes the default timeStep. 512 | timeStep = series.timeStep 513 | elif series.timeStep < timeStep: 514 | # Series is at a different precision, aggregate to fit our current set. 515 | series.values = aggregateSeries(method, series.timeStep, timeStep, series.values) 516 | else: 517 | # Normalize current set to fit new series data. 518 | resultValues = aggregateSeries(method, timeStep, series.timeStep, resultValues) 519 | timeStep = series.timeStep 520 | 521 | earliestData = series.startTime 522 | 523 | rightMissing = (requestUntilTime - series.endTime) // timeStep 524 | rightNulls = [None for i in range(rightMissing)] 525 | resultValues = series.values + rightNulls + resultValues 526 | break 527 | 528 | # or if slice contains data for part of the requested interval 529 | elif untilTime >= slice.startTime: 530 | try: 531 | series = slice.read(slice.startTime, requestUntilTime) 532 | except NoData: 533 | continue 534 | 535 | if series.timeStep != timeStep: 536 | if len(resultValues) == 0: 537 | # First slice holding series data, this becomes the default timeStep. 538 | timeStep = series.timeStep 539 | elif series.timeStep < timeStep: 540 | # Series is at a different precision, aggregate to fit our current set. 541 | series.values = aggregateSeries(method, series.timeStep, timeStep, series.values) 542 | else: 543 | # Normalize current set to fit new series data. 544 | resultValues = aggregateSeries(method, timeStep, series.timeStep, resultValues) 545 | timeStep = series.timeStep 546 | 547 | earliestData = series.startTime 548 | 549 | rightMissing = (requestUntilTime - series.endTime) // timeStep 550 | rightNulls = [None for i in range(rightMissing)] 551 | resultValues = series.values + rightNulls + resultValues 552 | 553 | # this is the right-side boundary on the next iteration 554 | sliceBoundary = slice.startTime 555 | 556 | # The end of the requested interval predates all slices 557 | if earliestData is None: 558 | missing = int(untilTime - fromTime) // timeStep 559 | resultValues = [None for i in range(missing)] 560 | 561 | # Left pad nulls if the start of the requested interval predates all slices 562 | else: 563 | leftMissing = (earliestData - fromTime) // timeStep 564 | leftNulls = [None for i in range(leftMissing)] 565 | resultValues = leftNulls + resultValues 566 | 567 | return TimeSeriesData(fromTime, untilTime, timeStep, resultValues) 568 | 569 | def write(self, datapoints): 570 | """Writes datapoints to underlying slices. Datapoints that round to the same timestamp for the 571 | node's `timeStep` will be treated as duplicates and dropped. 572 | 573 | :param datapoints: List of datapoint tuples ``[(timestamp, value), ...]`` 574 | """ 575 | if self.timeStep is None: 576 | self.readMetadata() 577 | 578 | if not datapoints: 579 | return 580 | 581 | sequences = self.compact(datapoints) 582 | needsEarlierSlice = [] # keep track of sequences that precede all existing slices 583 | 584 | while sequences: 585 | sequence = sequences.pop() 586 | timestamps = [t for t, v in sequence] 587 | beginningTime = timestamps[0] 588 | endingTime = timestamps[-1] 589 | sliceBoundary = None # used to prevent writing sequences across slice boundaries 590 | slicesExist = False 591 | 592 | for slice in self.slices: 593 | if slice.timeStep != self.timeStep: 594 | continue 595 | 596 | slicesExist = True 597 | 598 | # truncate sequence so it doesn't cross the slice boundaries 599 | if beginningTime >= slice.startTime: 600 | if sliceBoundary is None: 601 | sequenceWithinSlice = sequence 602 | else: 603 | # index of highest timestamp that doesn't exceed sliceBoundary 604 | boundaryIndex = bisect_left(timestamps, sliceBoundary) 605 | sequenceWithinSlice = sequence[:boundaryIndex] 606 | 607 | try: 608 | slice.write(sequenceWithinSlice) 609 | except SliceGapTooLarge: 610 | newSlice = CeresSlice.create(self, beginningTime, slice.timeStep) 611 | newSlice.write(sequenceWithinSlice) 612 | self.sliceCache = None 613 | except SliceDeleted: 614 | self.sliceCache = None 615 | self.write(datapoints) # recurse to retry 616 | return 617 | 618 | sequence = [] 619 | break 620 | 621 | # sequence straddles the current slice, write the right side 622 | # left side will be taken up in the next slice down 623 | elif endingTime >= slice.startTime: 624 | # index of lowest timestamp that doesn't precede slice.startTime 625 | boundaryIndex = bisect_left(timestamps, slice.startTime) 626 | sequenceWithinSlice = sequence[boundaryIndex:] 627 | # write the leftovers on the next earlier slice 628 | sequence = sequence[:boundaryIndex] 629 | slice.write(sequenceWithinSlice) 630 | 631 | if not sequence: 632 | break 633 | 634 | sliceBoundary = slice.startTime 635 | 636 | else: # slice list exhausted with stuff still to write 637 | needsEarlierSlice.append(sequence) 638 | 639 | if not slicesExist: 640 | sequences.append(sequence) 641 | needsEarlierSlice = sequences 642 | break 643 | 644 | for sequence in needsEarlierSlice: 645 | slice = CeresSlice.create(self, int(sequence[0][0]), self.timeStep) 646 | slice.write(sequence) 647 | self.clearSliceCache() 648 | 649 | def compact(self, datapoints): 650 | """Compacts datapoints into a list of contiguous, sorted lists of points with duplicate 651 | timestamps and null values removed 652 | 653 | :param datapoints: List of datapoint tuples ``[(timestamp, value), ...]`` 654 | 655 | :returns: A list of lists of contiguous sorted datapoint tuples 656 | ``[[(timestamp, value), ...], ...]`` 657 | """ 658 | datapoints = sorted(((int(timestamp), float(value)) 659 | for timestamp, value in datapoints if value is not None), 660 | key=lambda datapoint: datapoint[0]) 661 | sequences = [] 662 | sequence = [] 663 | minimumTimestamp = 0 # used to avoid duplicate intervals 664 | 665 | for timestamp, value in datapoints: 666 | timestamp -= timestamp % self.timeStep # round it down to a proper interval 667 | 668 | if not sequence: 669 | sequence.append((timestamp, value)) 670 | 671 | else: 672 | if timestamp == minimumTimestamp: # overwrite duplicate intervals with latest value 673 | sequence[-1] = (timestamp, value) 674 | continue 675 | 676 | if timestamp == sequence[-1][0] + self.timeStep: # append contiguous datapoints 677 | sequence.append((timestamp, value)) 678 | 679 | else: # start a new sequence if not contiguous 680 | sequences.append(sequence) 681 | sequence = [(timestamp, value)] 682 | 683 | minimumTimestamp = timestamp 684 | 685 | if sequence: 686 | sequences.append(sequence) 687 | 688 | return sequences 689 | 690 | 691 | class CeresSlice(object): 692 | __slots__ = ('node', 'startTime', 'timeStep', 'fsPath') 693 | 694 | def __init__(self, node, startTime, timeStep): 695 | self.node = node 696 | self.startTime = startTime 697 | self.timeStep = timeStep 698 | self.fsPath = join(node.fsPath, '%d@%d.slice' % (startTime, timeStep)) 699 | 700 | def __repr__(self): 701 | return "" % (id(self), self.fsPath) 702 | __str__ = __repr__ 703 | 704 | @property 705 | def isEmpty(self): 706 | return getsize(self.fsPath) == 0 707 | 708 | @property 709 | def endTime(self): 710 | return self.startTime + ((getsize(self.fsPath) // DATAPOINT_SIZE) * self.timeStep) 711 | 712 | @property 713 | def mtime(self): 714 | return getmtime(self.fsPath) 715 | 716 | @classmethod 717 | def create(cls, node, startTime, timeStep): 718 | slice = cls(node, startTime, timeStep) 719 | fileHandle = open(slice.fsPath, 'wb') 720 | fileHandle.close() 721 | os.chmod(slice.fsPath, SLICE_PERMS) 722 | return slice 723 | 724 | def read(self, fromTime, untilTime): 725 | timeOffset = int(fromTime) - self.startTime 726 | 727 | if timeOffset < 0: 728 | raise InvalidRequest("requested time range (%d, %d) precedes this slice: %d" % ( 729 | fromTime, untilTime, self.startTime)) 730 | 731 | pointOffset = timeOffset // self.timeStep 732 | byteOffset = pointOffset * DATAPOINT_SIZE 733 | 734 | if byteOffset >= getsize(self.fsPath): 735 | raise NoData() 736 | 737 | with open(self.fsPath, 'rb') as fileHandle: 738 | fileHandle.seek(byteOffset) 739 | 740 | timeRange = int(untilTime - fromTime) 741 | pointRange = timeRange // self.timeStep 742 | byteRange = pointRange * DATAPOINT_SIZE 743 | packedValues = fileHandle.read(byteRange) 744 | 745 | pointsReturned = len(packedValues) // DATAPOINT_SIZE 746 | format = '!' + ('d' * pointsReturned) 747 | values = struct.unpack(format, packedValues) 748 | values = [v if not isnan(v) else None for v in values] 749 | 750 | endTime = fromTime + (len(values) * self.timeStep) 751 | # print '[DEBUG slice.read] startTime=%s fromTime=%s untilTime=%s' % ( 752 | # self.startTime, fromTime, untilTime) 753 | # print '[DEBUG slice.read] timeInfo = (%s, %s, %s)' % (fromTime, endTime, self.timeStep) 754 | # print '[DEBUG slice.read] values = %s' % str(values) 755 | return TimeSeriesData(fromTime, endTime, self.timeStep, values) 756 | 757 | def write(self, sequence): 758 | beginningTime = sequence[0][0] 759 | timeOffset = beginningTime - self.startTime 760 | pointOffset = timeOffset // self.timeStep 761 | byteOffset = pointOffset * DATAPOINT_SIZE 762 | 763 | values = [v for t, v in sequence] 764 | format = '!' + ('d' * len(values)) 765 | packedValues = struct.pack(format, *values) 766 | 767 | try: 768 | filesize = getsize(self.fsPath) 769 | except OSError as e: 770 | if e.errno == errno.ENOENT: 771 | raise SliceDeleted() 772 | else: 773 | raise 774 | 775 | byteGap = byteOffset - filesize 776 | if byteGap > 0: # pad the allowable gap with nan's 777 | pointGap = byteGap // DATAPOINT_SIZE 778 | if pointGap > MAX_SLICE_GAP: 779 | raise SliceGapTooLarge() 780 | else: 781 | packedGap = PACKED_NAN * pointGap 782 | packedValues = packedGap + packedValues 783 | byteOffset -= byteGap 784 | 785 | with open(self.fsPath, 'r+b') as fileHandle: 786 | if LOCK_WRITES: 787 | fcntl.flock(fileHandle.fileno(), fcntl.LOCK_EX) 788 | try: 789 | fileHandle.seek(byteOffset) 790 | except IOError: 791 | # print " IOError: fsPath=%s byteOffset=%d size=%d sequence=%s" % ( 792 | # self.fsPath, byteOffset, filesize, sequence) 793 | raise 794 | fileHandle.write(packedValues) 795 | 796 | def deleteBefore(self, t): 797 | if not exists(self.fsPath): 798 | raise SliceDeleted() 799 | 800 | if t % self.timeStep != 0: 801 | t = t - (t % self.timeStep) + self.timeStep 802 | timeOffset = t - self.startTime 803 | if timeOffset < 0: 804 | return 805 | 806 | pointOffset = timeOffset // self.timeStep 807 | byteOffset = pointOffset * DATAPOINT_SIZE 808 | if not byteOffset: 809 | return 810 | 811 | self.node.clearSliceCache() 812 | with open(self.fsPath, 'r+b') as fileHandle: 813 | if LOCK_WRITES: 814 | fcntl.flock(fileHandle.fileno(), fcntl.LOCK_EX) 815 | fileHandle.seek(byteOffset) 816 | fileData = fileHandle.read() 817 | if fileData: 818 | fileHandle.seek(0) 819 | fileHandle.write(fileData) 820 | fileHandle.truncate() 821 | fileHandle.close() 822 | newFsPath = join(dirname(self.fsPath), "%d@%d.slice" % (t, self.timeStep)) 823 | os.rename(self.fsPath, newFsPath) 824 | else: 825 | os.unlink(self.fsPath) 826 | raise SliceDeleted() 827 | 828 | def __lt__(self, other): 829 | return self.startTime < other.startTime 830 | 831 | 832 | class TimeSeriesData(object): 833 | __slots__ = ('startTime', 'endTime', 'timeStep', 'values') 834 | 835 | def __init__(self, startTime, endTime, timeStep, values): 836 | self.startTime = startTime 837 | self.endTime = endTime 838 | self.timeStep = timeStep 839 | self.values = values 840 | 841 | @property 842 | def timestamps(self): 843 | return range(self.startTime, self.endTime, self.timeStep) 844 | 845 | def __iter__(self): 846 | return izip(self.timestamps, self.values) 847 | 848 | def __len__(self): 849 | return len(self.values) 850 | 851 | def merge(self, other): 852 | for timestamp, value in other: 853 | if value is None: 854 | continue 855 | 856 | timestamp -= timestamp % self.timeStep 857 | if timestamp < self.startTime: 858 | continue 859 | 860 | index = int((timestamp - self.startTime) // self.timeStep) 861 | 862 | try: 863 | if self.values[index] is None: 864 | self.values[index] = value 865 | except IndexError: 866 | continue 867 | 868 | 869 | class CorruptNode(Exception): 870 | def __init__(self, node, problem): 871 | Exception.__init__(self, problem) 872 | self.node = node 873 | self.problem = problem 874 | 875 | 876 | class NoData(Exception): 877 | pass 878 | 879 | 880 | class NodeNotFound(Exception): 881 | pass 882 | 883 | 884 | class NodeDeleted(Exception): 885 | pass 886 | 887 | 888 | class InvalidRequest(Exception): 889 | pass 890 | 891 | 892 | class InvalidAggregationMethod(Exception): 893 | pass 894 | 895 | 896 | class SliceGapTooLarge(Exception): 897 | "For internal use only" 898 | 899 | 900 | class SliceDeleted(Exception): 901 | pass 902 | 903 | 904 | def aggregate(aggregationMethod, values): 905 | # Filter out None values 906 | knownValues = list(filter(lambda x: x is not None, values)) 907 | if len(knownValues) is 0: 908 | return None 909 | # Aggregate based on method 910 | if aggregationMethod == 'average': 911 | return float(sum(knownValues)) / float(len(knownValues)) 912 | elif aggregationMethod == 'sum': 913 | return float(sum(knownValues)) 914 | elif aggregationMethod == 'last': 915 | return knownValues[-1] 916 | elif aggregationMethod == 'max': 917 | return max(knownValues) 918 | elif aggregationMethod == 'min': 919 | return min(knownValues) 920 | else: 921 | raise InvalidAggregationMethod("Unrecognized aggregation method %s" % 922 | aggregationMethod) 923 | 924 | 925 | def aggregateSeries(method, oldTimeStep, newTimeStep, values): 926 | # Aggregate current values to fit newTimeStep. 927 | # Makes the assumption that the caller has already guaranteed 928 | # that newTimeStep is bigger than oldTimeStep. 929 | factor = int(newTimeStep // oldTimeStep) 930 | newValues = [] 931 | subArr = [] 932 | for val in values: 933 | subArr.append(val) 934 | if len(subArr) == factor: 935 | newValues.append(aggregate(method, subArr)) 936 | subArr = [] 937 | 938 | if len(subArr): 939 | newValues.append(aggregate(method, subArr)) 940 | 941 | return newValues 942 | 943 | 944 | def getTree(path): 945 | while path not in (os.sep, ''): 946 | if isdir(join(path, '.ceres-tree')): 947 | return CeresTree(path) 948 | 949 | path = dirname(path) 950 | 951 | 952 | def setDefaultNodeCachingBehavior(behavior): 953 | global DEFAULT_NODE_CACHING_BEHAVIOR 954 | 955 | behavior = behavior.lower() 956 | if behavior not in ('none', 'all'): 957 | raise ValueError("invalid caching behavior '%s'" % behavior) 958 | 959 | DEFAULT_NODE_CACHING_BEHAVIOR = behavior 960 | 961 | 962 | def setDefaultSliceCachingBehavior(behavior): 963 | global DEFAULT_SLICE_CACHING_BEHAVIOR 964 | 965 | behavior = behavior.lower() 966 | if behavior not in ('none', 'all', 'latest'): 967 | raise ValueError("invalid caching behavior '%s'" % behavior) 968 | 969 | DEFAULT_SLICE_CACHING_BEHAVIOR = behavior 970 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ceres.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ceres.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/ceres" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/ceres" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /docs/ceres.rst: -------------------------------------------------------------------------------- 1 | ============================================ 2 | ceres 3 | ============================================ 4 | 5 | .. contents:: 6 | :local: 7 | 8 | .. automodule:: ceres 9 | :members: 10 | :undoc-members: 11 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # ceres documentation build configuration file, created by 4 | # sphinx-quickstart on Thu Jan 3 04:15:28 2013. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | sys.path.insert(0, os.path.abspath('..')) 20 | 21 | # Bring in the new ReadTheDocs sphinx theme 22 | import sphinx_rtd_theme 23 | 24 | # -- General configuration ----------------------------------------------------- 25 | 26 | # If your documentation needs a minimal Sphinx version, state it here. 27 | #needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be extensions 30 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 31 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.coverage', 'sphinx.ext.ifconfig', 'sphinx.ext.viewcode'] 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ['.templates'] 35 | 36 | # The suffix of source filenames. 37 | source_suffix = '.rst' 38 | 39 | # The encoding of source files. 40 | #source_encoding = 'utf-8-sig' 41 | 42 | # The master toctree document. 43 | master_doc = 'index' 44 | 45 | # General information about the project. 46 | project = u'ceres' 47 | copyright = u'2011, Chris Davis' 48 | 49 | # The version info for the project you're documenting, acts as replacement for 50 | # |version| and |release|, also used in various other places throughout the 51 | # built documents. 52 | # 53 | # The short X.Y version. 54 | version = '0.10.0' 55 | # The full version, including alpha/beta/rc tags. 56 | release = '0.10.0' 57 | 58 | # The language for content autogenerated by Sphinx. Refer to documentation 59 | # for a list of supported languages. 60 | #language = None 61 | 62 | # There are two options for replacing |today|: either, you set today to some 63 | # non-false value, then it is used: 64 | #today = '' 65 | # Else, today_fmt is used as the format for a strftime call. 66 | #today_fmt = '%B %d, %Y' 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | exclude_patterns = ['_build'] 71 | 72 | # The reST default role (used for this markup: `text`) to use for all documents. 73 | #default_role = None 74 | 75 | # If true, '()' will be appended to :func: etc. cross-reference text. 76 | #add_function_parentheses = True 77 | 78 | # If true, the current module name will be prepended to all description 79 | # unit titles (such as .. function::). 80 | #add_module_names = True 81 | 82 | # If true, sectionauthor and moduleauthor directives will be shown in the 83 | # output. They are ignored by default. 84 | #show_authors = False 85 | 86 | # The name of the Pygments (syntax highlighting) style to use. 87 | pygments_style = 'sphinx' 88 | 89 | # A list of ignored prefixes for module index sorting. 90 | #modindex_common_prefix = [] 91 | 92 | 93 | # -- Options for HTML output --------------------------------------------------- 94 | 95 | # The theme to use for HTML and HTML Help pages. See the documentation for 96 | # a list of builtin themes. 97 | html_theme = 'sphinx_rtd_theme' 98 | 99 | # Theme options are theme-specific and customize the look and feel of a theme 100 | # further. For a list of options available for each theme, see the 101 | # documentation. 102 | #html_theme_options = {} 103 | 104 | # Add any paths that contain custom themes here, relative to this directory. 105 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 106 | 107 | # The name for this set of Sphinx documents. If None, it defaults to 108 | # " v documentation". 109 | #html_title = None 110 | 111 | # A shorter title for the navigation bar. Default is the same as html_title. 112 | #html_short_title = None 113 | 114 | # The name of an image file (relative to this directory) to place at the top 115 | # of the sidebar. 116 | #html_logo = None 117 | 118 | # The name of an image file (within the static path) to use as favicon of the 119 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 120 | # pixels large. 121 | #html_favicon = None 122 | 123 | # Add any paths that contain custom static files (such as style sheets) here, 124 | # relative to this directory. They are copied after the builtin static files, 125 | # so a file named "default.css" will overwrite the builtin "default.css". 126 | #html_static_path = ['_static'] 127 | 128 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 129 | # using the given strftime format. 130 | #html_last_updated_fmt = '%b %d, %Y' 131 | 132 | # If true, SmartyPants will be used to convert quotes and dashes to 133 | # typographically correct entities. 134 | #html_use_smartypants = True 135 | 136 | # Custom sidebar templates, maps document names to template names. 137 | #html_sidebars = {} 138 | 139 | # Additional templates that should be rendered to pages, maps page names to 140 | # template names. 141 | #html_additional_pages = {} 142 | 143 | # If false, no module index is generated. 144 | #html_domain_indices = True 145 | 146 | # If false, no index is generated. 147 | #html_use_index = True 148 | 149 | # If true, the index is split into individual pages for each letter. 150 | #html_split_index = False 151 | 152 | # If true, links to the reST sources are added to the pages. 153 | #html_show_sourcelink = True 154 | 155 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 156 | #html_show_sphinx = True 157 | 158 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 159 | #html_show_copyright = True 160 | 161 | # If true, an OpenSearch description file will be output, and all pages will 162 | # contain a tag referring to it. The value of this option must be the 163 | # base URL from which the finished HTML is served. 164 | #html_use_opensearch = '' 165 | 166 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 167 | #html_file_suffix = None 168 | 169 | # Output file base name for HTML help builder. 170 | htmlhelp_basename = 'ceresdoc' 171 | 172 | 173 | # -- Options for LaTeX output -------------------------------------------------- 174 | 175 | latex_elements = { 176 | # The paper size ('letterpaper' or 'a4paper'). 177 | #'papersize': 'letterpaper', 178 | 179 | # The font size ('10pt', '11pt' or '12pt'). 180 | #'pointsize': '10pt', 181 | 182 | # Additional stuff for the LaTeX preamble. 183 | #'preamble': '', 184 | } 185 | 186 | # Grouping the document tree into LaTeX files. List of tuples 187 | # (source start file, target name, title, author, documentclass [howto/manual]). 188 | latex_documents = [ 189 | ('index', 'ceres.tex', u'ceres Documentation', 190 | u'Chris Davis', 'manual'), 191 | ] 192 | 193 | # The name of an image file (relative to this directory) to place at the top of 194 | # the title page. 195 | #latex_logo = None 196 | 197 | # For "manual" documents, if this is true, then toplevel headings are parts, 198 | # not chapters. 199 | #latex_use_parts = False 200 | 201 | # If true, show page references after internal links. 202 | #latex_show_pagerefs = False 203 | 204 | # If true, show URL addresses after external links. 205 | #latex_show_urls = False 206 | 207 | # Documents to append as an appendix to all manuals. 208 | #latex_appendices = [] 209 | 210 | # If false, no module index is generated. 211 | #latex_domain_indices = True 212 | 213 | 214 | # -- Options for manual page output -------------------------------------------- 215 | 216 | # One entry per manual page. List of tuples 217 | # (source start file, name, description, authors, manual section). 218 | man_pages = [ 219 | ('index', 'ceres', u'ceres Documentation', 220 | [u'Chris Davis'], 1) 221 | ] 222 | 223 | # If true, show URL addresses after external links. 224 | #man_show_urls = False 225 | 226 | 227 | # -- Options for Texinfo output ------------------------------------------------ 228 | 229 | # Grouping the document tree into Texinfo files. List of tuples 230 | # (source start file, target name, title, author, 231 | # dir menu entry, description, category) 232 | texinfo_documents = [ 233 | ('index', 'ceres', u'ceres Documentation', 234 | u'Chris Davis', 'ceres', 'One line description of project.', 235 | 'Miscellaneous'), 236 | ] 237 | 238 | # Documents to append as an appendix to all manuals. 239 | #texinfo_appendices = [] 240 | 241 | # If false, no module index is generated. 242 | #texinfo_domain_indices = True 243 | 244 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 245 | #texinfo_show_urls = 'footnote' 246 | 247 | 248 | # Example configuration for intersphinx: refer to the Python standard library. 249 | intersphinx_mapping = {'http://docs.python.org/': None} 250 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Ceres 2 | ===== 3 | 4 | * :ref:`genindex` 5 | * :ref:`modindex` 6 | * :ref:`search` 7 | 8 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_rtd_theme 3 | -------------------------------------------------------------------------------- /plugins/maintenance/defrag.py: -------------------------------------------------------------------------------- 1 | import ceres 2 | import time 3 | import os 4 | from os.path import basename 5 | 6 | 7 | # Read in the required params and set them as global variables 8 | namespace = globals() 9 | for p in ('maxSlicesPerNode', 'maxSliceGap', 'mode'): 10 | if p not in params: 11 | raise MissingRequiredParam(p) 12 | value = params.pop(p) 13 | if value.isdigit(): 14 | value = int(value) 15 | namespace[p] = value 16 | 17 | if mode not in ('analyze', 'operate'): 18 | raise ValueError("Invalid mode '%s' must be either 'analyze' or 'operate'" % mode) 19 | operating = mode == 'operate' 20 | 21 | # tracking stuff for analze mode 22 | operation_log = [] 23 | gaps_found = 0 24 | gaps_filled_in = 0 25 | bytes_filled_in = 0 26 | excess_deletes = 0 27 | 28 | 29 | def node_found(node): 30 | global gaps_found 31 | global gaps_filled_in 32 | global bytes_filled_in 33 | global excess_deletes 34 | node.readMetadata() 35 | 36 | # First we delete excess slices 37 | slices = list(node.slices) # order is most recent first 38 | excess_slices = slices[maxSlicesPerNode:] 39 | if excess_slices: 40 | excess_deletes += 1 41 | operation_log.append('%s: delete %d excess slices losing data prior to %s' % 42 | (node.nodePath, len(excess_slices), time.ctime(excess_slices[0].endTime))) 43 | if operating: 44 | for slice in excess_slices: 45 | log('deleting excess slice: %s' % slice.fsPath) 46 | os.unlink(slice.fsPath) 47 | 48 | # Now we fill in sufficiently small gaps 49 | # Need to take a fresh look at slices because we may have just deleted some 50 | node.clearSliceCache() 51 | slices = list(node.slices) 52 | slices.reverse() # this time we iterate from oldest to newest 53 | for i in range(1, len(slices)): # iterate adjacent pairs 54 | slice_a = slices[i - 1] 55 | slice_b = slices[i] 56 | if slice_a is None: # avoid considering slices we've deleted 57 | continue 58 | 59 | gap = (slice_b.startTime - slice_a.endTime) / node.timeStep 60 | if gap: 61 | gaps_found += 1 62 | if gap > maxSliceGap: 63 | gaps_filled_in += 1 64 | log('found %d data gap following %s' % (gap, slice_a.fsPath)) 65 | operation_log.append('%s: fill %d datapoint gap and merge %s and %s' % 66 | (node.nodePath, gap, basename(slice_a.fsPath), 67 | basename(slice_b.fsPath))) 68 | 69 | if operating: 70 | with file(slice_b.fsPath, 'rb') as fh: 71 | b_data = fh.read() 72 | log('read %d bytes from next slice %s' % (len(b_data), slice_b.fsPath)) 73 | 74 | with file(slice_a.fsPath, 'ab') as fh: 75 | fh.seek(0, 2) # seek to the end 76 | fh.write(gap * ceres.PACKED_NAN) 77 | fh.write(b_data) 78 | 79 | log('data merged into prior slice, deleting %s' % slice_b.fsPath) 80 | os.unlink(slice_b.fsPath) 81 | 82 | # We indicate in the slices list that this slice has been deleted by setting it to None 83 | # Need to do it regardless of whether or not we're in operate mode so analyze is accurate 84 | slices[i] = None 85 | 86 | 87 | def maintenance_start(tree): 88 | global start_time 89 | start_time = time.time() 90 | 91 | 92 | def maintenance_complete(tree): 93 | if operating: 94 | log("Operate mode: Finished performing defrag operations") 95 | else: 96 | log("Analysis mode: No slice files were harmed in the making of this report.") 97 | 98 | log("--------- Operation Log ---------") 99 | for op in operation_log: 100 | log(op) 101 | 102 | log("------------ Summary ------------") 103 | log(" Gaps found: %d" % gaps_found) 104 | log(" Gaps filled in: %d" % gaps_filled_in) 105 | log("Bytes filled in: %d" % bytes_filled_in) 106 | log("Excess Slices Deleted: %d" % excess_deletes) 107 | -------------------------------------------------------------------------------- /plugins/maintenance/merge.py: -------------------------------------------------------------------------------- 1 | import ceres 2 | 3 | try: 4 | if 'CERES_MAX_SLICE_GAP' in settings: 5 | ceres.MAX_SLICE_GAP = int(settings['CERES_MAX_SLICE_GAP']) 6 | if ceres.CAN_LOCK and 'CERES_LOCK_WRITES' in settings: 7 | ceres.LOCK_WRITES = int(settings['CERES_LOCK_WRITES']) 8 | except KeyError: 9 | pass 10 | 11 | 12 | def node_found(node): 13 | node.readMetadata() 14 | 15 | if not node.slices: 16 | return 17 | 18 | slices = {} 19 | for slice in sorted(node.slices, key=lambda x: x.endTime): 20 | slices.setdefault(slice.timeStep, []).append(slice) 21 | do_merge(node, slices) 22 | 23 | 24 | # Merge any slices that have overlapping data. 25 | def do_merge(node, slices): 26 | for (precision, sliceList) in slices.iteritems(): 27 | if not sliceList: 28 | continue 29 | 30 | sliceList.sort(key=lambda x: (x.startTime, -x.endTime)) 31 | sliceListIterator = iter(sliceList) 32 | 33 | mergeToSlice = next(sliceListIterator) 34 | try: 35 | while True: 36 | nextSlice = next(sliceListIterator) 37 | nextSliceEndTime = nextSlice.endTime 38 | mergeToSliceEndTime = mergeToSlice.endTime 39 | # can't merge 40 | sliceGap = (nextSlice.startTime - mergeToSliceEndTime) * ceres.DATAPOINT_SIZE 41 | if sliceGap > ceres.MAX_SLICE_GAP * precision: 42 | mergeToSlice = nextSlice 43 | continue 44 | 45 | # merge slices 46 | if nextSlice.startTime < mergeToSliceEndTime: 47 | try: 48 | slicePoints = nextSlice.read(nextSlice.startTime, 49 | min(nextSliceEndTime, mergeToSliceEndTime)) 50 | log("update %d (%d not none): %s -> %s" % 51 | (len(slicePoints), len([p for p in slicePoints if p[1] is not None]), 52 | str(nextSlice), str(mergeToSlice))) 53 | 54 | updatePoints = [] 55 | for point in slicePoints: 56 | if point[1] is not None: 57 | updatePoints.append(point) 58 | continue 59 | 60 | if updatePoints: 61 | mergeToSlice.write(updatePoints) 62 | updatePoints = [] 63 | 64 | if updatePoints: 65 | mergeToSlice.write(updatePoints) 66 | except ceres.NoData: 67 | pass 68 | 69 | try: 70 | slicePoints = nextSlice.read(max(nextSlice.startTime, mergeToSliceEndTime), 71 | nextSliceEndTime) 72 | log("append %d (%d not none): %s -> %s" % 73 | (len(slicePoints), len([p for p in slicePoints if p[1] is not None]), 74 | str(nextSlice), str(mergeToSlice))) 75 | 76 | updatePoints = [] 77 | for point in slicePoints: 78 | if point[1] is not None: 79 | updatePoints.append(point) 80 | continue 81 | 82 | if updatePoints: 83 | mergeToSlice.write(updatePoints) 84 | updatePoints = [] 85 | 86 | if updatePoints: 87 | mergeToSlice.write(updatePoints) 88 | 89 | except ceres.SliceGapTooLarge: 90 | nextSliceEndTime = updatePoints[0][0] 91 | except ceres.NoData: 92 | pass 93 | 94 | try: 95 | nextSlice.deleteBefore(nextSliceEndTime) 96 | except ceres.SliceDeleted: 97 | pass 98 | except StopIteration: 99 | pass 100 | -------------------------------------------------------------------------------- /plugins/maintenance/metadata.py: -------------------------------------------------------------------------------- 1 | from itertools import izip 2 | 3 | try: 4 | from carbon.storage import loadStorageSchemas, loadAggregationSchemas 5 | SCHEMAS = loadStorageSchemas() 6 | AGGREGATION_SCHEMAS = loadAggregationSchemas() 7 | except ImportError: 8 | SCHEMAS = [] 9 | AGGREGATION_SCHEMAS = [] 10 | 11 | 12 | def determine_metadata(metric): 13 | metadata = dict(timeStep=None, retentions=None, aggregationMethod=None, xFilesFactor=None) 14 | 15 | # Storage rules. 16 | for schema in SCHEMAS: 17 | if schema.matches(metric): 18 | metadata['retentions'] = [archive.getTuple() for archive in schema.archives] 19 | metadata['timeStep'] = metadata['retentions'][0][0] 20 | break 21 | 22 | # Aggregation rules. 23 | for schema in AGGREGATION_SCHEMAS: 24 | if schema.matches(metric): 25 | metadata['xFilesFactor'], metadata['aggregationMethod'] = schema.archives 26 | break 27 | 28 | # Validate all metadata was set. 29 | for k in metadata.keys(): 30 | if metadata[k] is None: 31 | raise Exception("Couldn't determine metadata") 32 | 33 | return metadata 34 | 35 | 36 | # Update metadata to match carbon schemas. 37 | def node_found(node): 38 | metadata = node.readMetadata() 39 | write_metadata = 0 40 | 41 | if not node.slices: 42 | return 43 | 44 | try: 45 | new_metadata = determine_metadata(node.nodePath) 46 | except Exception: 47 | return 48 | 49 | # Work out whether any storage rules have changed. 50 | if len(metadata) != len(new_metadata): 51 | write_metadata = 1 52 | else: 53 | # Zip together the current and new retention points and compare. 54 | retentions = izip(metadata['retentions'], new_metadata['retentions']) 55 | for (old, new) in retentions: 56 | (precision, retention) = zip(old, new) 57 | # If the precision or retentions differ, update the metadata. 58 | if precision[0] != precision[1] or retention[0] != retention[1]: 59 | write_metadata = 1 60 | break 61 | 62 | # Maybe update the other metadata fields. 63 | if metadata['timeStep'] != new_metadata['timeStep']: 64 | write_metadata = 1 65 | 66 | if metadata['xFilesFactor'] != new_metadata['xFilesFactor']: 67 | write_metadata = 1 68 | 69 | if metadata['aggregationMethod'] != new_metadata['aggregationMethod']: 70 | write_metadata = 1 71 | 72 | # If any changes, write out the metadata now so the writers start using it. 73 | if write_metadata: 74 | log("updating metadata: %s" % str(node)) 75 | node.writeMetadata(new_metadata) 76 | 77 | return 78 | -------------------------------------------------------------------------------- /plugins/maintenance/orphans.py: -------------------------------------------------------------------------------- 1 | import ceres 2 | 3 | try: 4 | if 'CERES_MAX_SLICE_GAP' in settings: 5 | ceres.MAX_SLICE_GAP = int(settings['CERES_MAX_SLICE_GAP']) 6 | if ceres.CAN_LOCK and 'CERES_LOCK_WRITES' in settings: 7 | ceres.LOCK_WRITES = int(settings['CERES_LOCK_WRITES']) 8 | except KeyError: 9 | pass 10 | 11 | 12 | # Roll-up slices on disk that don't match any retentions in metadata. 13 | def node_found(node): 14 | metadata = node.readMetadata() 15 | 16 | if not node.slices: 17 | return 18 | 19 | retentions = dict(iter(metadata.get('retentions', []))) 20 | method = metadata.get('aggregationMethod', 'average') 21 | 22 | for slice in node.slices: 23 | found = filter(lambda x: x == slice.timeStep, retentions) 24 | if len(found) != 0: 25 | continue 26 | # Not found, recalculate to next precision up. 27 | bigger_timeSteps = sorted(filter(lambda x: x > slice.timeStep, retentions)) 28 | if len(bigger_timeSteps) != 0: 29 | new_timeStep = bigger_timeSteps[0] 30 | series = slice.read(slice.startTime, slice.endTime) 31 | # Aggregate and normalize it to the new interval. 32 | series.values = ceres.aggregateSeries(method, series.timeStep, new_timeStep, series.values) 33 | series.timeStep = new_timeStep 34 | series.startTime = series.startTime - (series.startTime % new_timeStep) 35 | series.endTime = series.startTime + (len(series.values) * series.timeStep) 36 | # Replace all None values with NaNs. 37 | new_sequence = [(t, v if v is not None else ceres.NAN) for t, v in series] 38 | if len(new_sequence) != 0: 39 | new_slice = ceres.CeresSlice.create(node, new_sequence[0][0], new_timeStep) 40 | log("rewriting slice in new time step: %s -> %s" % (str(slice), str(new_slice))) 41 | new_slice.write(new_sequence) 42 | try: 43 | slice.deleteBefore(slice.endTime) 44 | except ceres.SliceDeleted: 45 | pass 46 | 47 | return 48 | -------------------------------------------------------------------------------- /plugins/maintenance/ratelimit.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | if 'rate' not in params: 4 | raise MissingRequiredParam('rate') 5 | 6 | nodes_per_second = int(params['rate']) 7 | node_count = 0 8 | last_second = 0 9 | 10 | 11 | def node_found(node): 12 | global node_count 13 | global last_second 14 | now = time.time() 15 | this_second = int(now) 16 | 17 | if this_second != last_second: 18 | node_count = 0 19 | 20 | last_second = this_second 21 | node_count += 1 22 | 23 | if node_count == nodes_per_second: 24 | remaining = (this_second + 1) - now 25 | if remaining > 0: 26 | time.sleep(remaining) 27 | -------------------------------------------------------------------------------- /plugins/maintenance/rollup.py: -------------------------------------------------------------------------------- 1 | import time 2 | from ceres import CeresSlice, SliceDeleted 3 | 4 | 5 | ####################################################### 6 | # Put your custom aggregation logic in this function! # 7 | ####################################################### 8 | def aggregate(node, datapoints): 9 | "Put your custom aggregation logic here." 10 | values = [value for (timestamp, value) in datapoints if value is not None] 11 | metadata = node.readMetadata() 12 | method = metadata.get('aggregationMethod', 'avg') 13 | 14 | if method == 'average': 15 | return float(sum(values)) / len(values) # values is guaranteed to be nonempty 16 | 17 | elif method == 'sum': 18 | return sum(values) 19 | 20 | elif method == 'min': 21 | return min(values) 22 | 23 | elif method == 'max': 24 | return max(values) 25 | 26 | elif method == 'median': 27 | values.sort() 28 | return values[len(values) / 2] 29 | 30 | 31 | def node_found(node): 32 | archives = [] 33 | t = int(time.time()) 34 | metadata = node.readMetadata() 35 | 36 | for (precision, retention) in metadata['retentions']: 37 | archiveEnd = t - (t % precision) 38 | archiveStart = archiveEnd - (precision * retention) 39 | t = archiveStart 40 | archives.append({ 41 | 'precision': precision, 42 | 'retention': retention, 43 | 'startTime': archiveStart, 44 | 'endTime': archiveEnd, 45 | 'slices': [s for s in node.slices if s.timeStep == precision] 46 | }) 47 | 48 | for i, archive in enumerate(archives): 49 | if i == len(archives) - 1: 50 | do_rollup(node, archive, None) 51 | else: 52 | do_rollup(node, archive, archives[i + 1]) 53 | 54 | 55 | def do_rollup(node, fineArchive, coarseArchive): 56 | overflowSlices = [s for s in fineArchive['slices'] if s.startTime < fineArchive['startTime']] 57 | if not overflowSlices: 58 | return 59 | 60 | if coarseArchive is None: # delete the old datapoints 61 | for slice in overflowSlices: 62 | try: 63 | slice.deleteBefore(fineArchive['startTime']) 64 | except SliceDeleted: 65 | pass 66 | 67 | else: 68 | overflowDatapoints = [] 69 | for slice in overflowSlices: 70 | datapoints = slice.read(slice.startTime, fineArchive['startTime']) 71 | overflowDatapoints.extend(list(datapoints)) 72 | 73 | overflowDatapoints.sort() 74 | coarseStep = coarseArchive['precision'] 75 | deletePriorTo = coarseArchive['startTime'] + (coarseStep * coarseArchive['retention']) 76 | 77 | metadata = node.readMetadata() 78 | xff = metadata.get('xFilesFactor') 79 | 80 | tsMin = 2147472000 81 | tsMax = 0 82 | for d in overflowDatapoints: 83 | tsMin = min(tsMin, d[0]) 84 | tsMax = max(tsMax, d[0]) 85 | 86 | # We define a window corresponding to exactly one coarse datapoint 87 | # Then we use it to select datapoints for aggregation 88 | for i in range(coarseArchive['retention']): 89 | windowStart = coarseArchive['startTime'] + (i * coarseStep) 90 | windowEnd = windowStart + coarseStep 91 | fineDatapoints = [] 92 | if ( 93 | windowStart <= tsMin <= windowEnd or 94 | (tsMin <= windowStart and tsMax >= windowEnd) or 95 | windowStart <= tsMax <= windowEnd 96 | ): 97 | fineDatapoints = [d for d in overflowDatapoints if d[0] >= windowStart and d[0] < windowEnd] 98 | 99 | if len(fineDatapoints) > 0: 100 | knownValues = [value for (timestamp, value) in fineDatapoints if value is not None] 101 | if not knownValues: 102 | continue 103 | knownPercent = float(len(knownValues)) / len(fineDatapoints) 104 | if knownPercent < xff: # we don't have enough data to aggregate! 105 | continue 106 | 107 | coarseValue = aggregate(node, fineDatapoints) 108 | coarseDatapoint = (windowStart, coarseValue) 109 | 110 | written = False 111 | for slice in coarseArchive['slices']: 112 | if slice.startTime <= windowStart and slice.endTime >= windowStart: 113 | slice.write([coarseDatapoint]) 114 | written = True 115 | break 116 | 117 | # We could pre-pend to an adjacent slice starting after windowStart 118 | # but that would be much more expensive in terms of I/O operations. 119 | # In the common case, append-only is best. 120 | 121 | if not written: 122 | newSlice = CeresSlice.create(node, windowStart, coarseStep) 123 | newSlice.write([coarseDatapoint]) 124 | coarseArchive['slices'].append(newSlice) 125 | deletePriorTo = min(deletePriorTo, windowStart) 126 | 127 | # Delete the overflow from the fine archive 128 | for slice in overflowSlices: 129 | try: 130 | slice.deleteBefore(deletePriorTo) # start of most recent coarse datapoint 131 | except SliceDeleted: 132 | pass 133 | -------------------------------------------------------------------------------- /plugins/maintenance/rollup_ng.py: -------------------------------------------------------------------------------- 1 | import time 2 | import json 3 | import ceres 4 | 5 | try: 6 | if 'CERES_MAX_SLICE_GAP' in settings: 7 | ceres.MAX_SLICE_GAP = int(settings['CERES_MAX_SLICE_GAP']) 8 | if ceres.CAN_LOCK and 'CERES_LOCK_WRITES' in settings: 9 | ceres.LOCK_WRITES = int(settings['CERES_LOCK_WRITES']) 10 | except KeyError: 11 | pass 12 | 13 | rollupTime = None 14 | debug = False 15 | 16 | 17 | ####################################################### 18 | # Put your custom aggregation logic in this function! # 19 | ####################################################### 20 | def aggregate(method, values): 21 | if method in ('avg', 'average'): 22 | return float(sum(values)) / len(values) # values is guaranteed to be nonempty 23 | 24 | elif method == 'sum': 25 | return sum(values) 26 | 27 | elif method == 'min': 28 | return min(values) 29 | 30 | elif method == 'max': 31 | return max(values) 32 | 33 | elif method == 'median': 34 | values.sort() 35 | return values[len(values) / 2] 36 | 37 | 38 | def node_found(node): 39 | global rollupTime 40 | startTime = time.time() 41 | archives = [] 42 | if not rollupTime: 43 | rollupTime = int(startTime) 44 | t = rollupTime 45 | try: 46 | metadata = node.readMetadata() 47 | except ceres.CorruptNode: 48 | log("failed to read metadata: %s" % node) 49 | return 50 | for (precision, retention) in metadata['retentions']: 51 | archiveEnd = (t - (t % metadata['timeStep'])) + precision 52 | archiveStart = archiveEnd - (precision * retention) 53 | t = archiveStart 54 | archives.append({ 55 | 'precision': precision, 56 | 'retention': retention, 57 | 'startTime': archiveStart, 58 | 'endTime': archiveEnd, 59 | 'slices': [s for s in node.slices if s.timeStep == precision] 60 | }) 61 | 62 | do_rollup(node, archives, float(metadata.get('xFilesFactor')), 63 | metadata.get('aggregationMethod', 'avg')) 64 | log("rollup time %.3f seconds: %s" % ((time.time() - startTime), node)) 65 | 66 | 67 | def do_rollup(node, archives, xff, method): 68 | # empty node? 69 | if not archives: 70 | return 71 | 72 | rollupStat = {} 73 | for archive in archives: 74 | rollupStat[archive['precision']] = { 75 | 'aggregate': 0, 76 | 'drop': 0, 77 | 'memory': 0, 78 | 'write': 0, 79 | 'slice_create': 0, 80 | 'slice_delete': 0, 81 | 'slice_delete_points': 0, 82 | 'slice_read': 0, 83 | 'slice_read_points': 0, 84 | 'slice_write': 0, 85 | 'slice_write_points': 0, 86 | 'slice_update': 0, 87 | 'slice_update_points': 0, 88 | } 89 | 90 | # list of (slice,deletePrioTo) -- will be dropped after aggregation 91 | overflowSlices = [] 92 | 93 | # dict of in-memory aggregated points (one or more retentions skipped) 94 | coarsePoints = {} 95 | 96 | # start time of node ( = start time of lowest precision archive) 97 | windowStart = archives[-1]['startTime'] 98 | 99 | # dropping data from lowest precision archive 100 | fineStep = archives[-1]['precision'] 101 | for slice in archives[-1]['slices']: 102 | if slice.startTime < windowStart: 103 | overflowSlices.append((slice, windowStart)) 104 | 105 | for i in xrange(len(archives) - 1): 106 | statTime = time.time() 107 | 108 | # source archive for aggregation 109 | fineArchive = archives[i] 110 | fineStep = fineArchive['precision'] 111 | fineStat = rollupStat[fineStep] 112 | 113 | # lower precision archive 114 | coarseArchive = archives[i + 1] 115 | coarseStep = coarseArchive['precision'] 116 | coarseStart = coarseArchive['startTime'] 117 | coarseStat = rollupStat[coarseStep] 118 | 119 | # end time for lower presicion archive ( = start time of source archive) 120 | windowEnd = coarseArchive['endTime'] 121 | 122 | # reading points from source archive 123 | finePoints = [] 124 | for slice in fineArchive['slices']: 125 | # dropping data prior to start time of this archive 126 | if windowStart > slice.endTime: 127 | overflowSlices.append((slice, slice.endTime)) 128 | continue 129 | # slice starts after lower precision archive ends -- no aggregation needed 130 | if windowEnd <= slice.startTime: 131 | continue 132 | try: 133 | slicePoints = slice.read(max(windowStart, slice.startTime), windowEnd) 134 | finePoints += [p for p in slicePoints if p[1] is not None] 135 | 136 | fineStat['slice_read'] += 1 137 | fineStat['slice_read_points'] += len(slicePoints) 138 | # no data in slice, just removing slice 139 | except ceres.NoData: 140 | pass 141 | 142 | # dropping data, which aggregating right now 143 | overflowSlices.append((slice, windowEnd)) 144 | 145 | finePoints = dict(finePoints) 146 | # adding in-memory aggregated data 147 | finePoints.update(coarsePoints) 148 | # sort by timestamp in ascending order 149 | finePoints = sorted(finePoints.items()) 150 | 151 | coarsePoints = {} 152 | # no points to aggregate :( 153 | if not finePoints: 154 | continue 155 | 156 | # start time of aggregation (skipping already aggregated points) 157 | startTime = finePoints[0][0] 158 | startTime -= startTime % coarseStep 159 | 160 | # ... and last 161 | endTime = finePoints[-1][0] 162 | endTime -= endTime % coarseStep 163 | endTime += coarseStep 164 | 165 | # since we are trying to write points in bulk and already existing slices 166 | # we need a list of slice start/end times 167 | # sliceEvents: list of (time, isEnd, slice-number) 168 | sliceEvents = [] 169 | 170 | # writeSlices: list of slices, where writePoints already exists 171 | writeSlices = [] 172 | # lastSeenSlice: slice with maximum endTime 173 | # data will be written there with gap if no writeSlices found 174 | lastSeenSlice = None 175 | for j in xrange(len(coarseArchive['slices'])): 176 | slice = coarseArchive['slices'][j] 177 | # slice starts after end of aggregation 178 | if slice.startTime >= endTime: 179 | continue 180 | 181 | # slice ended before start of aggregation -- it can be lastSeenSlice 182 | if slice.endTime <= startTime: 183 | if lastSeenSlice is None or lastSeenSlice.endTime < slice.endTime: 184 | lastSeenSlice = slice 185 | continue 186 | 187 | # starting point is not covered by slice -- adding start slice event 188 | if slice.startTime > startTime: 189 | sliceEvents.append((slice.startTime, False, j)) 190 | # starting point covered by slice 191 | else: 192 | writeSlices.append(j) 193 | # adding end slice event 194 | sliceEvents.append((slice.endTime, True, j)) 195 | # sort slice events by time 196 | sliceEvents.sort() 197 | 198 | sliceEventsIterator = iter(sliceEvents) 199 | finePointsIterator = iter(finePoints) 200 | 201 | # list of points with no gap between and no slice start/end events 202 | # all these points will be written to one list of slices 203 | writePoints = [] 204 | try: 205 | sliceEvent = next(sliceEventsIterator) 206 | except StopIteration: 207 | sliceEvent = None 208 | 209 | finePoint = next(finePointsIterator) 210 | for ts in xrange(startTime, endTime, coarseStep): 211 | tsEndTime = ts + coarseStep 212 | 213 | # no data for current timestamp -- next existing point is newer 214 | if tsEndTime <= finePoint[0]: 215 | # writing previously found points if needed 216 | lastSeenSlice = write_points(node, coarseArchive, writePoints, 217 | writeSlices, lastSeenSlice, coarseStat) 218 | writePoints = [] 219 | continue 220 | 221 | values = [] 222 | try: 223 | # finding all values for current coarse point 224 | while finePoint[0] < tsEndTime: 225 | values.append(finePoint[1]) 226 | finePoint = next(finePointsIterator) 227 | except StopIteration: 228 | pass 229 | 230 | fineStat['aggregate'] += 1 231 | 232 | # checking xff 233 | if float(len(values)) * fineStep / coarseStep < xff: 234 | if len(writePoints) > 0: 235 | # writing previously found points if needed 236 | lastSeenSlice = write_points(node, coarseArchive, writePoints, 237 | writeSlices, lastSeenSlice, coarseStat) 238 | writePoints = [] 239 | 240 | fineStat['drop'] += 1 241 | continue 242 | 243 | newValue = aggregate(method, values) 244 | # in-memory aggregated point. writePoints is empty since 245 | # timestamps are processed in ascending order, 246 | if ts < coarseStart: 247 | coarsePoints[ts] = newValue 248 | 249 | fineStat['memory'] += 1 250 | continue 251 | 252 | # slice event found before current timestamp 253 | if sliceEvent and sliceEvent[0] <= ts: 254 | # since writeSlices changed -- writting all points 255 | lastSeenSlice = write_points(node, coarseArchive, writePoints, 256 | writeSlices, lastSeenSlice, coarseStat) 257 | writePoints = [(ts, newValue)] 258 | # updating writeSlices add lastSeenSlice 259 | try: 260 | while sliceEvent[0] <= ts: 261 | if sliceEvent[1]: 262 | writeSlices.remove(sliceEvent[2]) 263 | lastSeenSlice = coarseArchive['slices'][sliceEvent[2]] 264 | else: 265 | writeSlices.append(sliceEvent[2]) 266 | sliceEvent = next(sliceEventsIterator) 267 | except StopIteration: 268 | sliceEvent = None 269 | # no gaps, no events, just adding to list 270 | else: 271 | writePoints.append((ts, newValue)) 272 | 273 | fineStat['write'] += 1 274 | 275 | # writing last portion of points 276 | write_points(node, coarseArchive, writePoints, writeSlices, lastSeenSlice, coarseStat) 277 | 278 | fineStat['time'] = time.time() - statTime 279 | 280 | # after all -- drop aggregated data from source archives 281 | for slice, deletePriorTo in overflowSlices: 282 | try: 283 | deletedPoints = (min(slice.endTime, deletePriorTo) - slice.startTime) / slice.timeStep 284 | rollupStat[slice.timeStep]['slice_delete'] += 1 285 | rollupStat[slice.timeStep]['slice_delete_points'] += deletedPoints 286 | slice.deleteBefore(deletePriorTo) 287 | except ceres.SliceDeleted: 288 | pass 289 | 290 | if debug: 291 | log("rollup stat %s: %s" % (node, json.dumps(rollupStat))) 292 | 293 | 294 | def write_points(node, archive, points, slices, lastSlice, stat): 295 | if not points: 296 | return lastSlice 297 | 298 | written = False 299 | # trying to update all existing slices 300 | for i in slices: 301 | try: 302 | archive['slices'][i].write(points) 303 | written = True 304 | 305 | stat['slice_update'] += 1 306 | stat['slice_update_points'] += len(points) 307 | except ceres.SliceDeleted: 308 | pass 309 | # if not -- writing to lastSeenSlice with gap 310 | if not written and lastSlice: 311 | try: 312 | lastSlice.write(points) 313 | written = True 314 | 315 | stat['slice_write'] += 1 316 | stat['slice_write_points'] += len(points) 317 | except (ceres.SliceDeleted, ceres.SliceGapTooLarge): 318 | pass 319 | # gap in last slice too large -- creating new slice 320 | if not written: 321 | newSlice = ceres.CeresSlice.create(node, points[0][0], archive['precision']) 322 | newSlice.write(points) 323 | archive['slices'].append(newSlice) 324 | lastSlice = newSlice 325 | 326 | stat['slice_create'] += 1 327 | stat['slice_write'] += 1 328 | stat['slice_write_points'] += len(points) 329 | return lastSlice 330 | -------------------------------------------------------------------------------- /plugins/maintenance/template.py: -------------------------------------------------------------------------------- 1 | # Magical Variables 2 | # ------------------- 3 | # log(message) 4 | # except MissingRequiredParam 5 | # except PluginFail 6 | # params 7 | 8 | 9 | # def maintenance_start(tree): 10 | # def maintenance_complete(tree): 11 | 12 | 13 | # def node_found(node): 14 | # def directory_found(path): 15 | # def directory_empty(path): 16 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | from glob import glob 5 | from distutils.core import setup 6 | 7 | 8 | setup( 9 | name='ceres', 10 | version='1.1.0', 11 | url='http://graphiteapp.org/', 12 | author='Chris Davis', 13 | author_email='chrismd@gmail.com', 14 | license='Apache Software License 2.0', 15 | description='Distributable time-series database', 16 | py_modules=['ceres'], 17 | scripts=glob('bin/*'), 18 | data_files=[('plugins/maintenance', glob('plugins/maintenance/*.py'))], 19 | classifiers=[ 20 | 'Programming Language :: Python :: 2', 21 | 'Programming Language :: Python :: 2.7', 22 | 'Programming Language :: Python :: 3', 23 | 'Programming Language :: Python :: 3.3', 24 | 'Programming Language :: Python :: 3.4', 25 | 'Programming Language :: Python :: 3.5', 26 | 'Programming Language :: Python :: 3.6', 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | nose==1.3.7 2 | mock==1.0.1 3 | flake8==3.3.0 4 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphite-project/ceres/0804b6e15857aec461aec76b365bd94c40e30fae/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_ceres.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | 3 | import errno 4 | 5 | from mock import ANY, Mock, call, mock_open, patch 6 | from os import path 7 | 8 | try: 9 | import __builtin__ as builtins 10 | except ImportError: 11 | import builtins 12 | 13 | 14 | from ceres import CeresNode, CeresSlice, CeresTree 15 | from ceres import DATAPOINT_SIZE, DEFAULT_NODE_CACHING_BEHAVIOR, DEFAULT_SLICE_CACHING_BEHAVIOR,\ 16 | DEFAULT_TIMESTEP, DIR_PERMS, MAX_SLICE_GAP 17 | from ceres import getTree, CorruptNode, NoData, NodeDeleted, NodeNotFound, SliceDeleted,\ 18 | SliceGapTooLarge, TimeSeriesData, InvalidAggregationMethod 19 | 20 | 21 | def fetch_mock_open_writes(open_mock): 22 | handle = open_mock() 23 | # XXX Python3 compability since a write can be bytes or str 24 | try: 25 | return b''.join([c[0][0] for c in handle.write.call_args_list]) 26 | except TypeError: 27 | return ''.join([c[0][0] for c in handle.write.call_args_list]) 28 | 29 | 30 | def make_slice_mock(start, end, step): 31 | slice_mock = Mock(spec=CeresSlice) 32 | slice_mock.startTime = start 33 | slice_mock.endTime = end 34 | slice_mock.timeStep = step 35 | 36 | def side_effect(*args, **kwargs): 37 | startTime, endTime = args 38 | result_start = max(startTime, start) 39 | result_end = min(endTime, end) 40 | points = (result_end - result_start) // step 41 | return TimeSeriesData(result_start, result_end, step, [float(x) for x in range(points)]) 42 | 43 | slice_mock.read.side_effect = side_effect 44 | return slice_mock 45 | 46 | 47 | class ModuleFunctionsTest(TestCase): 48 | @patch('ceres.isdir', new=Mock(return_value=False)) 49 | @patch('ceres.CeresTree', new=Mock(spec=CeresTree)) 50 | def test_get_tree_with_no_tree(self): 51 | tree = getTree('/graphite/storage/ceres/foo/bar') 52 | self.assertEqual(None, tree) 53 | 54 | @patch('ceres.CeresTree', spec=CeresTree) 55 | @patch('ceres.isdir') 56 | def test_get_tree_with_tree_samedir(self, isdir_mock, ceres_tree_mock): 57 | isdir_mock.return_value = True 58 | tree = getTree('/graphite/storage/ceres') 59 | self.assertNotEqual(None, tree) 60 | isdir_mock.assert_called_once_with('/graphite/storage/ceres/.ceres-tree') 61 | ceres_tree_mock.assert_called_once_with('/graphite/storage/ceres') 62 | 63 | 64 | class TimeSeriesDataTest(TestCase): 65 | def setUp(self): 66 | self.time_series = TimeSeriesData(0, 50, 5, [float(x) for x in range(0, 10)]) 67 | 68 | def test_timestamps_property(self): 69 | self.assertEqual(10, len(self.time_series.timestamps)) 70 | self.assertEqual(0, self.time_series.timestamps[0]) 71 | self.assertEqual(45, self.time_series.timestamps[-1]) 72 | 73 | def test_iter_values(self): 74 | values = list(self.time_series) 75 | self.assertEqual(10, len(values)) 76 | self.assertEqual((0, 0.0), values[0]) 77 | self.assertEqual((45, 9.0), values[-1]) 78 | 79 | def test_merge_no_missing(self): 80 | # merge only has effect if time series has no gaps 81 | other_series = TimeSeriesData(0, 25, 5, [float(x * x) for x in range(1, 6)]) 82 | original_values = list(self.time_series) 83 | self.time_series.merge(other_series) 84 | self.assertEqual(original_values, list(self.time_series)) 85 | 86 | def test_merge_with_empty(self): 87 | new_series = TimeSeriesData(0, 50, 5, [None] * 10) 88 | new_series.merge(self.time_series) 89 | self.assertEqual(list(self.time_series), list(new_series)) 90 | 91 | def test_merge_with_holes(self): 92 | values = [] 93 | for x in range(0, 10): 94 | if x % 2 == 0: 95 | values.append(x) 96 | else: 97 | values.append(None) 98 | new_series = TimeSeriesData(0, 50, 5, values) 99 | new_series.merge(self.time_series) 100 | self.assertEqual(list(self.time_series), list(new_series)) 101 | 102 | 103 | class CeresTreeTest(TestCase): 104 | def setUp(self): 105 | with patch('ceres.isdir', new=Mock(return_value=True)): 106 | self.ceres_tree = CeresTree('/graphite/storage/ceres') 107 | 108 | @patch('ceres.isdir', new=Mock(return_value=False)) 109 | def test_init_invalid(self): 110 | self.assertRaises(ValueError, CeresTree, '/nonexistent_path') 111 | 112 | @patch('ceres.isdir', new=Mock(return_value=True)) 113 | @patch('ceres.abspath') 114 | def test_init_valid(self, abspath_mock): 115 | abspath_mock.return_value = '/var/graphite/storage/ceres' 116 | tree = CeresTree('/graphite/storage/ceres') 117 | abspath_mock.assert_called_once_with('/graphite/storage/ceres') 118 | self.assertEqual('/var/graphite/storage/ceres', tree.root) 119 | 120 | @patch('ceres.isdir', new=Mock(return_value=True)) 121 | def test_init_sets_default_cache_behavior(self): 122 | tree = CeresTree('/graphite/storage/ceres') 123 | self.assertEqual(DEFAULT_NODE_CACHING_BEHAVIOR, tree.nodeCachingBehavior) 124 | 125 | @patch('ceres.isdir', new=Mock(return_value=False)) 126 | @patch.object(CeresTree, '__init__') 127 | @patch('os.makedirs') 128 | def test_create_tree_new_dir(self, makedirs_mock, ceres_tree_init_mock): 129 | ceres_tree_init_mock.return_value = None 130 | with patch.object(builtins, 'open', mock_open()) as open_mock: 131 | CeresTree.createTree('/graphite/storage/ceres') 132 | makedirs_mock.assert_called_once_with('/graphite/storage/ceres/.ceres-tree', DIR_PERMS) 133 | self.assertFalse(open_mock.called) 134 | ceres_tree_init_mock.assert_called_once_with('/graphite/storage/ceres') 135 | 136 | @patch('ceres.isdir', new=Mock(return_value=True)) 137 | @patch.object(CeresTree, '__init__') 138 | @patch('os.makedirs') 139 | def test_create_tree_existing_dir(self, makedirs_mock, ceres_tree_init_mock): 140 | ceres_tree_init_mock.return_value = None 141 | with patch.object(builtins, 'open', mock_open()) as open_mock: 142 | CeresTree.createTree('/graphite/storage/ceres') 143 | self.assertFalse(makedirs_mock.called) 144 | self.assertFalse(open_mock.called) 145 | ceres_tree_init_mock.assert_called_once_with('/graphite/storage/ceres') 146 | 147 | @patch('ceres.isdir', new=Mock(return_value=True)) 148 | @patch.object(CeresTree, '__init__', new=Mock(return_value=None)) 149 | @patch('os.makedirs', new=Mock()) 150 | def test_create_tree_write_props(self): 151 | props = { 152 | "foo_prop": "foo_value", 153 | "bar_prop": "bar_value"} 154 | with patch.object(builtins, 'open', mock_open()) as open_mock: 155 | CeresTree.createTree('/graphite/storage/ceres', **props) 156 | for (prop, value) in props.items(): 157 | open_mock.assert_any_call(path.join('/graphite/storage/ceres', '.ceres-tree', prop), 'w') 158 | open_mock.return_value.write.assert_any_call(value) 159 | 160 | @patch('ceres.abspath', new=Mock(side_effect=lambda x: x)) 161 | def test_get_node_path_clean(self): 162 | result = self.ceres_tree.getNodePath('/graphite/storage/ceres/metric/foo') 163 | self.assertEqual('metric.foo', result) 164 | 165 | @patch('ceres.abspath', new=Mock(side_effect=lambda x: x)) 166 | def test_get_node_path_trailing_slash(self): 167 | result = self.ceres_tree.getNodePath('/graphite/storage/ceres/metric/foo/') 168 | self.assertEqual('metric.foo', result) 169 | 170 | @patch('ceres.abspath', new=Mock(side_effect=lambda x: x)) 171 | def test_get_node_path_outside_tree(self): 172 | self.assertRaises(ValueError, self.ceres_tree.getNodePath, '/metric/foo') 173 | 174 | @patch('ceres.CeresNode', spec=CeresNode) 175 | def test_get_node_uncached(self, ceres_node_mock): 176 | ceres_node_mock.isNodeDir.return_value = True 177 | result = self.ceres_tree.getNode('metrics.foo') 178 | ceres_node_mock.assert_called_once_with( 179 | self.ceres_tree, 180 | 'metrics.foo', 181 | '/graphite/storage/ceres/metrics/foo') 182 | self.assertEqual(result, ceres_node_mock()) 183 | 184 | @patch('ceres.CeresNode', spec=CeresNode) 185 | @patch('ceres.abspath', new=Mock(side_effect=lambda x: x)) 186 | @patch('ceres.glob', new=Mock(side_effect=lambda x: [x])) 187 | def test_find_explicit_metric(self, ceres_node_mock): 188 | ceres_node_mock.isNodeDir.return_value = True 189 | result = list(self.ceres_tree.find('metrics.foo')) 190 | self.assertEqual(1, len(result)) 191 | self.assertEqual(result[0], ceres_node_mock()) 192 | 193 | @patch('ceres.CeresNode', spec=CeresNode) 194 | @patch('ceres.abspath', new=Mock(side_effect=lambda x: x)) 195 | @patch('ceres.glob') 196 | def test_find_wildcard(self, glob_mock, ceres_node_mock): 197 | matches = ['foo', 'bar', 'baz'] 198 | glob_mock.side_effect = lambda x: [x.replace('*', m) for m in matches] 199 | ceres_node_mock.isNodeDir.return_value = True 200 | result = list(self.ceres_tree.find('metrics.*')) 201 | self.assertEqual(3, len(result)) 202 | ceres_node_mock.assert_any_call(self.ceres_tree, 'metrics.foo', ANY) 203 | ceres_node_mock.assert_any_call(self.ceres_tree, 'metrics.bar', ANY) 204 | ceres_node_mock.assert_any_call(self.ceres_tree, 'metrics.baz', ANY) 205 | 206 | @patch('ceres.CeresNode', spec=CeresNode) 207 | @patch('ceres.abspath', new=Mock(side_effect=lambda x: x)) 208 | @patch('ceres.glob', new=Mock(return_value=[])) 209 | def test_find_wildcard_no_matches(self, ceres_node_mock): 210 | ceres_node_mock.isNodeDir.return_value = False 211 | result = list(self.ceres_tree.find('metrics.*')) 212 | self.assertEqual(0, len(result)) 213 | self.assertFalse(ceres_node_mock.called) 214 | 215 | @patch('ceres.CeresNode', spec=CeresNode) 216 | @patch('ceres.abspath', new=Mock(side_effect=lambda x: x)) 217 | @patch('ceres.glob', new=Mock(side_effect=lambda x: [x])) 218 | def test_find_metric_with_interval(self, ceres_node_mock): 219 | ceres_node_mock.isNodeDir.return_value = True 220 | ceres_node_mock.return_value.hasDataForInterval.return_value = False 221 | result = list(self.ceres_tree.find('metrics.foo', 0, 1000)) 222 | self.assertEqual(0, len(result)) 223 | ceres_node_mock.return_value.hasDataForInterval.assert_called_once_with(0, 1000) 224 | 225 | @patch('ceres.CeresNode', spec=CeresNode) 226 | @patch('ceres.abspath', new=Mock(side_effect=lambda x: x)) 227 | @patch('ceres.glob', new=Mock(side_effect=lambda x: [x])) 228 | def test_find_metric_with_interval_not_found(self, ceres_node_mock): 229 | ceres_node_mock.isNodeDir.return_value = True 230 | ceres_node_mock.return_value.hasDataForInterval.return_value = True 231 | result = list(self.ceres_tree.find('metrics.foo', 0, 1000)) 232 | self.assertEqual(result[0], ceres_node_mock()) 233 | ceres_node_mock.return_value.hasDataForInterval.assert_called_once_with(0, 1000) 234 | 235 | def test_store_invalid_node(self): 236 | with patch.object(self.ceres_tree, 'getNode', new=Mock(return_value=None)): 237 | datapoints = [(100, 1.0)] 238 | self.assertRaises(NodeNotFound, self.ceres_tree.store, 'metrics.foo', datapoints) 239 | 240 | @patch('ceres.CeresNode', spec=CeresNode) 241 | def test_store_valid_node(self, ceres_node_mock): 242 | datapoints = [(100, 1.0)] 243 | self.ceres_tree.store('metrics.foo', datapoints) 244 | ceres_node_mock.assert_called_once_with(self.ceres_tree, 'metrics.foo', ANY) 245 | ceres_node_mock.return_value.write.assert_called_once_with(datapoints) 246 | 247 | def fetch_invalid_node(self): 248 | with patch.object(self.ceres_tree, 'getNode', new=Mock(return_value=None)): 249 | self.assertRaises(NodeNotFound, self.ceres_tree.fetch, 'metrics.foo') 250 | 251 | @patch('ceres.CeresNode', spec=CeresNode) 252 | def fetch_metric(self, ceres_node_mock): 253 | read_mock = ceres_node_mock.return_value.read 254 | read_mock.return_value = Mock(spec=TimeSeriesData) 255 | result = self.ceres_tree.fetch('metrics.foo', 0, 1000) 256 | ceres_node_mock.assert_called_once_with(self.ceres_tree, 'metrics.foo', ANY) 257 | read_mock.assert_called_once_with(0, 1000) 258 | self.assertEqual(Mock(spec=TimeSeriesData), result) 259 | 260 | def test_set_node_caching_behavior_validates_names(self): 261 | self.ceres_tree.setNodeCachingBehavior('none') 262 | self.assertEquals('none', self.ceres_tree.nodeCachingBehavior) 263 | self.ceres_tree.setNodeCachingBehavior('all') 264 | self.assertEquals('all', self.ceres_tree.nodeCachingBehavior) 265 | self.assertRaises(ValueError, self.ceres_tree.setNodeCachingBehavior, 'foo') 266 | # Assert unchanged 267 | self.assertEquals('all', self.ceres_tree.nodeCachingBehavior) 268 | 269 | 270 | class CeresNodeTest(TestCase): 271 | def setUp(self): 272 | with patch('ceres.isdir', new=Mock(return_value=True)): 273 | with patch('ceres.exists', new=Mock(return_value=True)): 274 | self.ceres_tree = CeresTree('/graphite/storage/ceres') 275 | self.ceres_node = CeresNode( 276 | self.ceres_tree, 277 | 'sample_metric', 278 | '/graphite/storage/ceres/sample_metric') 279 | self.ceres_node.timeStep = 60 280 | 281 | slice_configs = [ 282 | (1200, 1800, 60), 283 | (600, 1200, 60)] 284 | 285 | self.ceres_slices = [] 286 | for start, end, step in slice_configs: 287 | slice_mock = make_slice_mock(start, end, step) 288 | self.ceres_slices.append(slice_mock) 289 | 290 | def test_init_sets_default_cache_behavior(self): 291 | ceres_node = CeresNode( 292 | self.ceres_tree, 293 | 'sample_metric', 294 | '/graphite/storage/ceres/sample_metric') 295 | self.assertEqual(DEFAULT_SLICE_CACHING_BEHAVIOR, ceres_node.sliceCachingBehavior) 296 | 297 | @patch('ceres.os.makedirs', new=Mock()) 298 | @patch('ceres.CeresNode.writeMetadata') 299 | def test_create_sets_a_default_timestep(self, write_metadata_mock): 300 | CeresNode.create(self.ceres_tree, 'sample_metric') 301 | write_metadata_mock.assert_called_with(dict(timeStep=DEFAULT_TIMESTEP)) 302 | 303 | @patch('ceres.os.makedirs', new=Mock()) 304 | @patch('ceres.CeresNode.writeMetadata', new=Mock()) 305 | def test_create_returns_new_ceres_node(self): 306 | ceres_node = CeresNode.create(self.ceres_tree, 'sample_metric') 307 | self.assertTrue(isinstance(ceres_node, CeresNode)) 308 | 309 | def test_write_metadata(self): 310 | import json 311 | 312 | open_mock = mock_open() 313 | metadata = dict(timeStep=60, aggregationMethod='avg') 314 | with patch.object(builtins, 'open', open_mock): 315 | self.ceres_node.writeMetadata(metadata) 316 | self.assertEquals(json.dumps(metadata), fetch_mock_open_writes(open_mock)) 317 | 318 | def test_read_metadata_sets_timestep(self): 319 | import json 320 | 321 | metadata = dict(timeStep=60, aggregationMethod='avg') 322 | json_metadata = json.dumps(metadata) 323 | open_mock = mock_open(read_data=json_metadata) 324 | with patch.object(builtins, 'open', open_mock): 325 | self.ceres_node.readMetadata() 326 | open_mock().read.assert_called_once() 327 | self.assertEqual(60, self.ceres_node.timeStep) 328 | 329 | def test_read_metadata_returns_corrupt_if_json_error(self): 330 | with patch.object(builtins, 'open', mock_open()): 331 | self.assertRaises(CorruptNode, self.ceres_node.readMetadata) 332 | 333 | def test_set_slice_caching_behavior_validates_names(self): 334 | self.ceres_node.setSliceCachingBehavior('none') 335 | self.assertEquals('none', self.ceres_node.sliceCachingBehavior) 336 | self.ceres_node.setSliceCachingBehavior('all') 337 | self.assertEquals('all', self.ceres_node.sliceCachingBehavior) 338 | self.ceres_node.setSliceCachingBehavior('latest') 339 | self.assertEquals('latest', self.ceres_node.sliceCachingBehavior) 340 | self.assertRaises(ValueError, self.ceres_node.setSliceCachingBehavior, 'foo') 341 | # Assert unchanged 342 | self.assertEquals('latest', self.ceres_node.sliceCachingBehavior) 343 | 344 | def test_slices_is_a_generator(self): 345 | from types import GeneratorType 346 | 347 | self.assertTrue(isinstance(self.ceres_node.slices, GeneratorType)) 348 | 349 | def test_slices_returns_cached_set_when_behavior_is_all(self): 350 | def mock_slice(): 351 | return Mock(spec=CeresSlice) 352 | 353 | self.ceres_node.setSliceCachingBehavior('all') 354 | cached_contents = [mock_slice for c in range(4)] 355 | self.ceres_node.sliceCache = cached_contents 356 | with patch('ceres.CeresNode.readSlices') as read_slices_mock: 357 | slice_list = list(self.ceres_node.slices) 358 | self.assertFalse(read_slices_mock.called) 359 | 360 | self.assertEquals(cached_contents, slice_list) 361 | 362 | def test_slices_returns_first_cached_when_behavior_is_latest(self): 363 | self.ceres_node.setSliceCachingBehavior('latest') 364 | cached_contents = Mock(spec=CeresSlice) 365 | self.ceres_node.sliceCache = cached_contents 366 | 367 | read_slices_mock = Mock(return_value=[]) 368 | with patch('ceres.CeresNode.readSlices', new=read_slices_mock): 369 | slice_iter = self.ceres_node.slices 370 | self.assertEquals(cached_contents, next(slice_iter)) 371 | # We should be yielding cached before trying to read 372 | self.assertFalse(read_slices_mock.called) 373 | 374 | def test_slices_reads_remaining_when_behavior_is_latest(self): 375 | self.ceres_node.setSliceCachingBehavior('latest') 376 | cached_contents = Mock(spec=CeresSlice) 377 | self.ceres_node.sliceCache = cached_contents 378 | 379 | read_slices_mock = Mock(return_value=[(0, 60)]) 380 | with patch('ceres.CeresNode.readSlices', new=read_slices_mock): 381 | slice_iter = self.ceres_node.slices 382 | next(slice_iter) 383 | 384 | # *now* we expect to read from disk 385 | try: 386 | while True: 387 | next(slice_iter) 388 | except StopIteration: 389 | pass 390 | 391 | read_slices_mock.assert_called_once_with() 392 | 393 | def test_slices_reads_from_disk_when_behavior_is_none(self): 394 | self.ceres_node.setSliceCachingBehavior('none') 395 | read_slices_mock = Mock(return_value=[(0, 60)]) 396 | with patch('ceres.CeresNode.readSlices', new=read_slices_mock): 397 | slice_iter = self.ceres_node.slices 398 | next(slice_iter) 399 | 400 | read_slices_mock.assert_called_once_with() 401 | 402 | def test_slices_reads_from_disk_when_cache_empty_and_behavior_all(self): 403 | self.ceres_node.setSliceCachingBehavior('all') 404 | read_slices_mock = Mock(return_value=[(0, 60)]) 405 | with patch('ceres.CeresNode.readSlices', new=read_slices_mock): 406 | slice_iter = self.ceres_node.slices 407 | next(slice_iter) 408 | 409 | read_slices_mock.assert_called_once_with() 410 | 411 | def test_slices_reads_from_disk_when_cache_empty_and_behavior_latest(self): 412 | self.ceres_node.setSliceCachingBehavior('all') 413 | read_slices_mock = Mock(return_value=[(0, 60)]) 414 | with patch('ceres.CeresNode.readSlices', new=read_slices_mock): 415 | slice_iter = self.ceres_node.slices 416 | next(slice_iter) 417 | 418 | read_slices_mock.assert_called_once_with() 419 | 420 | @patch('ceres.exists', new=Mock(return_value=False)) 421 | def test_read_slices_raises_when_node_doesnt_exist(self): 422 | self.assertRaises(NodeDeleted, self.ceres_node.readSlices) 423 | 424 | @patch('ceres.exists', new=Mock(return_Value=True)) 425 | def test_read_slices_ignores_not_slices(self): 426 | listdir_mock = Mock(return_value=['0@60.slice', '0@300.slice', 'foo']) 427 | with patch('ceres.os.listdir', new=listdir_mock): 428 | self.assertEquals(2, len(self.ceres_node.readSlices())) 429 | 430 | @patch('ceres.exists', new=Mock(return_Value=True)) 431 | def test_read_slices_parses_slice_filenames(self): 432 | listdir_mock = Mock(return_value=['0@60.slice', '0@300.slice']) 433 | with patch('ceres.os.listdir', new=listdir_mock): 434 | slice_infos = self.ceres_node.readSlices() 435 | self.assertTrue((0, 60) in slice_infos) 436 | self.assertTrue((0, 300) in slice_infos) 437 | 438 | @patch('ceres.exists', new=Mock(return_Value=True)) 439 | def test_read_slices_reverse_sorts_by_time(self): 440 | listdir_mock = Mock(return_value=[ 441 | '0@60.slice', 442 | '320@300.slice', 443 | '120@120.slice', 444 | '0@120.slice', 445 | '600@300.slice']) 446 | 447 | with patch('ceres.os.listdir', new=listdir_mock): 448 | slice_infos = self.ceres_node.readSlices() 449 | slice_timestamps = [s[0] for s in slice_infos] 450 | self.assertEqual([600, 320, 120, 0, 0], slice_timestamps) 451 | 452 | def test_no_data_exists_if_no_slices_exist(self): 453 | with patch('ceres.CeresNode.readSlices', new=Mock(return_value=[])): 454 | self.assertFalse(self.ceres_node.hasDataForInterval(0, 60)) 455 | 456 | def test_no_data_exists_if_no_slices_exist_and_no_time_specified(self): 457 | with patch('ceres.CeresNode.readSlices', new=Mock(return_value=[])): 458 | self.assertFalse(self.ceres_node.hasDataForInterval(None, None)) 459 | 460 | def test_data_exists_if_slices_exist_and_no_time_specified(self): 461 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 462 | self.assertTrue(self.ceres_node.hasDataForInterval(None, None)) 463 | 464 | def test_data_exists_if_slice_covers_interval_completely(self): 465 | with patch('ceres.CeresNode.slices', new=[self.ceres_slices[0]]): 466 | self.assertTrue(self.ceres_node.hasDataForInterval(1200, 1800)) 467 | 468 | def test_data_exists_if_slice_covers_interval_end(self): 469 | with patch('ceres.CeresNode.slices', new=[self.ceres_slices[0]]): 470 | self.assertTrue(self.ceres_node.hasDataForInterval(600, 1260)) 471 | 472 | def test_data_exists_if_slice_covers_interval_start(self): 473 | with patch('ceres.CeresNode.slices', new=[self.ceres_slices[0]]): 474 | self.assertTrue(self.ceres_node.hasDataForInterval(1740, 2100)) 475 | 476 | def test_no_data_exists_if_slice_touches_interval_end(self): 477 | with patch('ceres.CeresNode.slices', new=[self.ceres_slices[0]]): 478 | self.assertFalse(self.ceres_node.hasDataForInterval(600, 1200)) 479 | 480 | def test_no_data_exists_if_slice_touches_interval_start(self): 481 | with patch('ceres.CeresNode.slices', new=[self.ceres_slices[0]]): 482 | self.assertFalse(self.ceres_node.hasDataForInterval(1800, 2100)) 483 | 484 | def test_compact_returns_empty_if_passed_empty(self): 485 | self.assertEqual([], self.ceres_node.compact([])) 486 | 487 | def test_compact_filters_null_values(self): 488 | self.assertEqual([], self.ceres_node.compact([(60, None)])) 489 | 490 | def test_compact_rounds_timestamps_down_to_step(self): 491 | self.assertEqual([[(600, 0)]], self.ceres_node.compact([(605, 0)])) 492 | 493 | def test_compact_drops_duplicate_timestamps(self): 494 | datapoints = [(600, 0), (600, 0)] 495 | compacted = self.ceres_node.compact(datapoints) 496 | self.assertEqual([[(600, 0.0)]], compacted) 497 | 498 | def test_compact_keeps_last_seen_duplicate_timestamp(self): 499 | datapoints = [(600, 0), (600, 1), (660, 1), (660, 0)] 500 | compacted = self.ceres_node.compact(datapoints) 501 | self.assertEqual([[(600, 1.0), (660, 0.0)]], compacted) 502 | 503 | def test_compact_groups_contiguous_points(self): 504 | datapoints = [(600, 0), (660, 0), (840, 0)] 505 | compacted = self.ceres_node.compact(datapoints) 506 | self.assertEqual([[(600, 0), (660, 0)], [(840, 0)]], compacted) 507 | 508 | def test_write_noops_if_no_datapoints(self): 509 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 510 | self.ceres_node.write([]) 511 | self.assertFalse(self.ceres_slices[0].write.called) 512 | 513 | def test_write_within_first_slice(self): 514 | datapoints = [(1200, 0.0), (1260, 1.0), (1320, 2.0)] 515 | 516 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 517 | self.ceres_node.write(datapoints) 518 | self.ceres_slices[0].write.assert_called_once_with(datapoints) 519 | 520 | @patch('ceres.CeresSlice.create') 521 | def test_write_within_first_slice_doesnt_create(self, slice_create_mock): 522 | datapoints = [(1200, 0.0), (1260, 1.0), (1320, 2.0)] 523 | 524 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 525 | self.ceres_node.write(datapoints) 526 | self.assertFalse(slice_create_mock.called) 527 | 528 | @patch('ceres.CeresSlice.create', new=Mock()) 529 | def test_write_within_first_slice_with_gaps(self): 530 | datapoints = [(1200, 0.0), (1320, 2.0)] 531 | 532 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 533 | self.ceres_node.write(datapoints) 534 | 535 | # sorted most recent first 536 | calls = [call.write([datapoints[1]]), call.write([datapoints[0]])] 537 | self.ceres_slices[0].assert_has_calls(calls) 538 | 539 | @patch('ceres.CeresSlice.create', new=Mock()) 540 | def test_write_within_previous_slice(self): 541 | datapoints = [(720, 0.0), (780, 2.0)] 542 | 543 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 544 | self.ceres_node.write(datapoints) 545 | 546 | # 2nd slice has this range 547 | self.ceres_slices[1].write.assert_called_once_with(datapoints) 548 | 549 | @patch('ceres.CeresSlice.create') 550 | def test_write_within_previous_slice_doesnt_create(self, slice_create_mock): 551 | datapoints = [(720, 0.0), (780, 2.0)] 552 | 553 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 554 | self.ceres_node.write(datapoints) 555 | self.assertFalse(slice_create_mock.called) 556 | 557 | @patch('ceres.CeresSlice.create', new=Mock()) 558 | def test_write_within_previous_slice_with_gaps(self): 559 | datapoints = [(720, 0.0), (840, 2.0)] 560 | 561 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 562 | self.ceres_node.write(datapoints) 563 | 564 | calls = [call.write([datapoints[1]]), call.write([datapoints[0]])] 565 | self.ceres_slices[1].assert_has_calls(calls) 566 | 567 | @patch('ceres.CeresSlice.create', new=Mock()) 568 | def test_write_across_slice_boundaries(self): 569 | datapoints = [(1080, 0.0), (1140, 1.0), (1200, 2.0), (1260, 3.0)] 570 | 571 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 572 | self.ceres_node.write(datapoints) 573 | self.ceres_slices[0].write.assert_called_once_with(datapoints[2:4]) 574 | self.ceres_slices[1].write.assert_called_once_with(datapoints[0:2]) 575 | 576 | @patch('ceres.CeresSlice.create') 577 | def test_write_before_earliest_slice_creates_new(self, slice_create_mock): 578 | datapoints = [(300, 0.0)] 579 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 580 | self.ceres_node.write(datapoints) 581 | slice_create_mock.assert_called_once_with(self.ceres_node, 300, 60) 582 | 583 | @patch('ceres.CeresSlice.create') 584 | def test_write_before_earliest_slice_writes_to_new_one(self, slice_create_mock): 585 | datapoints = [(300, 0.0)] 586 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 587 | self.ceres_node.write(datapoints) 588 | slice_create_mock.return_value.write.assert_called_once_with(datapoints) 589 | 590 | @patch('ceres.CeresSlice.create') 591 | def test_write_before_earliest_slice_writes_next_slice_too(self, slice_create_mock): 592 | # slice 0 starts at 600 593 | datapoints = [(540, 0.0), (600, 0.0)] 594 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 595 | self.ceres_node.write(datapoints) 596 | self.ceres_slices[1].write.assert_called_once_with([datapoints[1]]) 597 | 598 | @patch('ceres.CeresSlice.create') 599 | def test_create_during_write_clears_slice_cache(self, slice_create_mock): 600 | self.ceres_node.setSliceCachingBehavior('all') 601 | self.ceres_node.sliceCache = self.ceres_slices 602 | datapoints = [(300, 0.0)] 603 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 604 | self.ceres_node.write(datapoints) 605 | self.assertEquals(None, self.ceres_node.sliceCache) 606 | 607 | @patch('ceres.CeresSlice.create') 608 | def test_write_past_max_gap_size_creates(self, slice_create_mock): 609 | datapoints = [(6000, 0.0)] 610 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 611 | with patch.object(self.ceres_slices[0], 'write', side_effect=SliceGapTooLarge): 612 | self.ceres_node.write(datapoints) 613 | 614 | @patch('ceres.CeresSlice.create') 615 | def test_write_different_timestep_creates(self, slice_create_mock): 616 | datapoints = [(600, 0.0)] 617 | with patch('ceres.CeresNode.slices', new=self.ceres_slices): 618 | self.ceres_node.timeStep = 10 619 | self.ceres_node.write(datapoints) 620 | slice_create_mock.assert_called_once_with(self.ceres_node, 600, 10) 621 | 622 | 623 | class CeresNodeReadTest(TestCase): 624 | def setUp(self): 625 | with patch('ceres.isdir', new=Mock(return_value=True)): 626 | with patch('ceres.exists', new=Mock(return_value=True)): 627 | self.ceres_tree = CeresTree('/graphite/storage/ceres') 628 | self.ceres_node = CeresNode( 629 | self.ceres_tree, 630 | 'sample_metric', 631 | '/graphite/storage/ceres/sample_metric') 632 | self.ceres_node.timeStep = 60 633 | 634 | slice_configs = [ 635 | (1200, 1800, 60), 636 | (600, 1200, 60)] 637 | 638 | self.ceres_slices = [] 639 | for start, end, step in slice_configs: 640 | slice_mock = make_slice_mock(start, end, step) 641 | self.ceres_slices.append(slice_mock) 642 | 643 | self.ceres_slices_patch = patch('ceres.CeresNode.slices', new=iter(self.ceres_slices)) 644 | self.ceres_slices_patch.start() 645 | 646 | def tearDown(self): 647 | self.ceres_slices_patch.stop() 648 | 649 | def test_read_loads_metadata_if_timestep_unknown(self): 650 | with patch('ceres.CeresNode.readMetadata', new=Mock(side_effect=Exception))\ 651 | as read_metadata_mock: 652 | self.ceres_node.timeStep = None 653 | try: # Raise Exception as a cheap exit out of the function once we have the call we want 654 | self.ceres_node.read(600, 660) 655 | except Exception: 656 | pass 657 | read_metadata_mock.assert_called_once_with() 658 | 659 | def test_read_normalizes_from_time(self): 660 | self.ceres_node.read(1210, 1260) 661 | self.ceres_slices[0].read.assert_called_once_with(1200, 1260) 662 | 663 | def test_read_normalizes_until_time(self): 664 | self.ceres_node.read(1200, 1270) 665 | self.ceres_slices[0].read.assert_called_once_with(1200, 1260) 666 | 667 | def test_read_returns_empty_time_series_if_before_slices(self): 668 | result = self.ceres_node.read(0, 300) 669 | self.assertEqual([None] * 5, result.values) 670 | 671 | def test_read_returns_empty_time_series_if_slice_has_no_data(self): 672 | self.ceres_slices[0].read.side_effect = NoData 673 | result = self.ceres_node.read(1200, 1500) 674 | self.assertEqual([None] * 5, result.values) 675 | 676 | def test_read_pads_points_missing_before_series(self): 677 | result = self.ceres_node.read(540, 1200) 678 | self.assertEqual([None, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], result.values) 679 | 680 | def test_read_pads_points_missing_after_series(self): 681 | result = self.ceres_node.read(1200, 1860) 682 | self.assertEqual(None, result.values[-1]) 683 | 684 | def test_read_goes_across_slices(self): 685 | self.ceres_node.read(900, 1500) 686 | self.ceres_slices[0].read.assert_called_once_with(1200, 1500) 687 | self.ceres_slices[1].read.assert_called_once_with(900, 1200) 688 | 689 | def test_read_across_slices_merges_results(self): 690 | result = self.ceres_node.read(900, 1500) 691 | self.assertEqual([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], result.values) 692 | 693 | def test_read_pads_points_missing_after_series_across_slices(self): 694 | result = self.ceres_node.read(900, 1860) 695 | self.assertEqual(None, result.values[-1]) 696 | 697 | def test_read_pads_points_missing_between_slices(self): 698 | self.ceres_slices[1] = make_slice_mock(600, 1140, 60) 699 | result = self.ceres_node.read(900, 1500) 700 | self.assertEqual([0, 1, 2, 3, None, 0, 1, 2, 3, 4], result.values) 701 | 702 | 703 | class CeresSliceTest(TestCase): 704 | def setUp(self): 705 | with patch('ceres.isdir', new=Mock(return_value=True)): 706 | with patch('ceres.exists', new=Mock(return_value=True)): 707 | self.ceres_tree = CeresTree('/graphite/storage/ceres') 708 | self.ceres_node = CeresNode( 709 | self.ceres_tree, 710 | 'sample_metric', 711 | '/graphite/storage/ceres/sample_metric') 712 | 713 | def test_init_sets_fspath_name(self): 714 | ceres_slice = CeresSlice(self.ceres_node, 0, 60) 715 | self.assertTrue(ceres_slice.fsPath.endswith('0@60.slice')) 716 | 717 | @patch('ceres.getsize') 718 | def test_end_time_calculated_via_filesize(self, getsize_mock): 719 | getsize_mock.return_value = DATAPOINT_SIZE * 300 720 | ceres_slice = CeresSlice(self.ceres_node, 0, 60) 721 | # 300 points at 60 sec per point 722 | self.assertEqual(300 * 60, ceres_slice.endTime) 723 | 724 | @patch('ceres.exists') 725 | def test_delete_before_raises_if_deleted(self, exists_mock): 726 | exists_mock.return_value = False 727 | ceres_slice = CeresSlice(self.ceres_node, 0, 60) 728 | self.assertRaises(SliceDeleted, ceres_slice.deleteBefore, 60) 729 | 730 | @patch('ceres.exists', Mock(return_value=True)) 731 | @patch.object(builtins, 'open', new_callable=mock_open) 732 | def test_delete_before_returns_if_time_earlier_than_start(self, open_mock): 733 | ceres_slice = CeresSlice(self.ceres_node, 300, 60) 734 | # File starts at timestamp 300, delete points before timestamp 60 735 | ceres_slice.deleteBefore(60) 736 | open_mock.assert_has_calls([]) # no calls 737 | 738 | @patch('ceres.exists', Mock(return_value=True)) 739 | @patch.object(builtins, 'open', new_callable=mock_open) 740 | def test_delete_before_returns_if_time_less_than_step_earlier_than_start(self, open_mock): 741 | ceres_slice = CeresSlice(self.ceres_node, 300, 60) 742 | ceres_slice.deleteBefore(299) 743 | open_mock.assert_has_calls([]) 744 | 745 | @patch('ceres.exists', Mock(return_value=True)) 746 | @patch.object(builtins, 'open', new_callable=mock_open) 747 | def test_delete_before_returns_if_time_same_as_start(self, open_mock): 748 | ceres_slice = CeresSlice(self.ceres_node, 300, 60) 749 | ceres_slice.deleteBefore(300) 750 | open_mock.assert_has_calls([]) 751 | 752 | @patch('ceres.exists', Mock(return_value=True)) 753 | @patch('ceres.os.rename', Mock(return_value=True)) 754 | def test_delete_before_clears_slice_cache(self): 755 | ceres_slice = CeresSlice(self.ceres_node, 300, 60) 756 | open_mock = mock_open(read_data='foo') # needs to be non-null for this test 757 | with patch.object(builtins, 'open', open_mock): 758 | with patch('ceres.CeresNode.clearSliceCache') as clear_slice_cache_mock: 759 | ceres_slice.deleteBefore(360) 760 | clear_slice_cache_mock.assert_called_once_with() 761 | 762 | @patch('ceres.exists', Mock(return_value=True)) 763 | @patch.object(builtins, 'open', new_callable=mock_open) 764 | def test_delete_before_deletes_file_if_no_more_data(self, open_mock): 765 | ceres_slice = CeresSlice(self.ceres_node, 300, 60) 766 | with patch('ceres.os.unlink') as unlink_mock: 767 | try: 768 | ceres_slice.deleteBefore(360) 769 | except Exception: 770 | pass 771 | self.assertTrue(unlink_mock.called) 772 | 773 | @patch('ceres.exists', Mock(return_value=True)) 774 | @patch('ceres.os.unlink', Mock()) 775 | @patch.object(builtins, 'open', new_callable=mock_open) 776 | def test_delete_before_raises_slice_deleted_if_no_more_data(self, open_mock): 777 | ceres_slice = CeresSlice(self.ceres_node, 300, 60) 778 | self.assertRaises(SliceDeleted, ceres_slice.deleteBefore, 360) 779 | 780 | @patch('ceres.exists', Mock(return_value=True)) 781 | @patch('ceres.os.rename', Mock()) 782 | def test_delete_before_seeks_to_time(self): 783 | ceres_slice = CeresSlice(self.ceres_node, 300, 60) 784 | open_mock = mock_open(read_data='foo') 785 | with patch.object(builtins, 'open', open_mock) as open_mock: 786 | ceres_slice.deleteBefore(360) 787 | # Seek from 300 (start of file) to 360 (1 datapointpoint) 788 | open_mock.return_value.seek.assert_any_call(1 * DATAPOINT_SIZE) 789 | 790 | @patch('ceres.exists', Mock(return_value=True)) 791 | def test_slices_are_sortable(self): 792 | ceres_slices = [ 793 | CeresSlice(self.ceres_node, 300, 60), 794 | CeresSlice(self.ceres_node, 600, 60), 795 | CeresSlice(self.ceres_node, 0, 60)] 796 | 797 | expected_order = [0, 300, 600] 798 | result_order = [slice.startTime for slice in sorted(ceres_slices)] 799 | self.assertEqual(expected_order, result_order) 800 | 801 | 802 | class CeresSliceWriteTest(TestCase): 803 | def setUp(self): 804 | with patch('ceres.isdir', new=Mock(return_value=True)): 805 | with patch('ceres.exists', new=Mock(return_value=True)): 806 | self.ceres_tree = CeresTree('/graphite/storage/ceres') 807 | self.ceres_node = CeresNode( 808 | self.ceres_tree, 809 | 'sample_metric', 810 | '/graphite/storage/ceres/sample_metric') 811 | self.ceres_slice = CeresSlice(self.ceres_node, 300, 60) 812 | 813 | @patch('ceres.getsize', Mock(side_effect=OSError)) 814 | def test_raises_os_error_if_not_enoent(self): 815 | self.assertRaises(OSError, self.ceres_slice.write, [(0, 0)]) 816 | 817 | @patch('ceres.getsize', Mock(side_effect=OSError(errno.ENOENT, 'foo'))) 818 | def test_raises_slice_deleted_oserror_enoent(self): 819 | self.assertRaises(SliceDeleted, self.ceres_slice.write, [(0, 0)]) 820 | 821 | @patch('ceres.getsize', Mock(return_value=0)) 822 | @patch.object(builtins, 'open', mock_open()) 823 | def test_raises_slice_gap_too_large_when_it_is(self): 824 | # one point over the max 825 | new_time = self.ceres_slice.startTime + self.ceres_slice.timeStep * (MAX_SLICE_GAP + 1) 826 | datapoint = (new_time, 0) 827 | self.assertRaises(SliceGapTooLarge, self.ceres_slice.write, [datapoint]) 828 | 829 | @patch('ceres.getsize', Mock(return_value=0)) 830 | @patch.object(builtins, 'open', mock_open()) 831 | def test_doesnt_raise_slice_gap_too_large_when_it_isnt(self): 832 | new_time = self.ceres_slice.startTime + self.ceres_slice.timeStep * (MAX_SLICE_GAP - 1) 833 | datapoint = (new_time, 0) 834 | try: 835 | self.ceres_slice.write([datapoint]) 836 | except SliceGapTooLarge: 837 | self.fail("SliceGapTooLarge raised") 838 | 839 | @patch('ceres.getsize', Mock(return_value=DATAPOINT_SIZE * 100)) 840 | @patch.object(builtins, 'open', mock_open()) 841 | def test_doesnt_raise_slice_gap_when_newer_points_exist(self): 842 | new_time = self.ceres_slice.startTime + self.ceres_slice.timeStep * (MAX_SLICE_GAP + 1) 843 | datapoint = (new_time, 0) 844 | try: 845 | self.ceres_slice.write([datapoint]) 846 | except SliceGapTooLarge: 847 | self.fail("SliceGapTooLarge raised") 848 | 849 | @patch('ceres.getsize', Mock(return_value=0)) 850 | @patch.object(builtins, 'open', new_callable=mock_open) 851 | def test_raises_ioerror_if_seek_hits_ioerror(self, open_mock): 852 | open_mock.return_value.seek.side_effect = IOError 853 | self.assertRaises(IOError, self.ceres_slice.write, [(300, 0)]) 854 | 855 | @patch('ceres.getsize', Mock(return_value=0)) 856 | @patch.object(builtins, 'open', new_callable=mock_open) 857 | def test_opens_file_as_binary(self, open_mock): 858 | self.ceres_slice.write([(300, 0)]) 859 | # call_args = (args, kwargs) 860 | self.assertTrue(open_mock.call_args[0][1].endswith('b')) 861 | 862 | @patch('ceres.getsize', Mock(return_value=0)) 863 | @patch.object(builtins, 'open', new_callable=mock_open) 864 | def test_seeks_to_the_correct_offset_first_point(self, open_mock): 865 | self.ceres_slice.write([(300, 0)]) 866 | open_mock.return_value.seek.assert_called_once_with(0) 867 | 868 | @patch('ceres.getsize', Mock(return_value=1 * DATAPOINT_SIZE)) 869 | @patch.object(builtins, 'open', new_callable=mock_open) 870 | def test_seeks_to_the_correct_offset_next_point(self, open_mock): 871 | self.ceres_slice.write([(360, 0)]) 872 | # 2nd point in the file 873 | open_mock.return_value.seek.assert_called_once_with(DATAPOINT_SIZE) 874 | 875 | @patch('ceres.getsize', Mock(return_value=1 * DATAPOINT_SIZE)) 876 | @patch.object(builtins, 'open', new_callable=mock_open) 877 | def test_seeks_to_the_next_empty_offset_one_point_gap(self, open_mock): 878 | # Gaps are written out as NaNs so the offset we expect is the beginning 879 | # of the gap, not the offset of the point itself 880 | self.ceres_slice.write([(420, 0)]) 881 | open_mock.return_value.seek.assert_called_once_with(1 * DATAPOINT_SIZE) 882 | 883 | @patch('ceres.getsize', Mock(return_value=0)) 884 | @patch.object(builtins, 'open', new_callable=mock_open) 885 | def test_correct_size_written_first_point(self, open_mock): 886 | self.ceres_slice.write([(300, 0)]) 887 | self.assertEqual(1 * DATAPOINT_SIZE, len(fetch_mock_open_writes(open_mock))) 888 | 889 | @patch('ceres.getsize', Mock(return_value=1 * DATAPOINT_SIZE)) 890 | @patch.object(builtins, 'open', new_callable=mock_open) 891 | def test_correct_size_written_next_point(self, open_mock): 892 | self.ceres_slice.write([(360, 0)]) 893 | self.assertEqual(1 * DATAPOINT_SIZE, len(fetch_mock_open_writes(open_mock))) 894 | 895 | @patch('ceres.getsize', Mock(return_value=1 * DATAPOINT_SIZE)) 896 | @patch.object(builtins, 'open', new_callable=mock_open) 897 | def test_correct_size_written_one_point_gap(self, open_mock): 898 | self.ceres_slice.write([(420, 0)]) 899 | # one empty point, one real point = two points total written 900 | self.assertEqual(2 * DATAPOINT_SIZE, len(fetch_mock_open_writes(open_mock))) 901 | 902 | 903 | class CeresArchiveNodeReadTest(TestCase): 904 | def setUp(self): 905 | with patch('ceres.isdir', new=Mock(return_value=True)): 906 | with patch('ceres.exists', new=Mock(return_value=True)): 907 | self.ceres_tree = CeresTree('/graphite/storage/ceres') 908 | self.ceres_node = CeresNode( 909 | self.ceres_tree, 910 | 'sample_metric', 911 | '/graphite/storage/ceres/sample_metric') 912 | self.ceres_node.timeStep = 30 913 | 914 | slice_configs = [ 915 | (1200, 1800, 30), 916 | (600, 1200, 60)] 917 | 918 | self.ceres_slices = [] 919 | for start, end, step in slice_configs: 920 | slice_mock = make_slice_mock(start, end, step) 921 | self.ceres_slices.append(slice_mock) 922 | 923 | self.ceres_slices_patch = patch('ceres.CeresNode.slices', new=iter(self.ceres_slices)) 924 | self.ceres_slices_patch.start() 925 | 926 | def tearDown(self): 927 | self.ceres_slices_patch.stop() 928 | 929 | def test_archives_read_loads_metadata_if_timestep_unknown(self): 930 | with patch('ceres.CeresNode.readMetadata', new=Mock(side_effect=Exception))\ 931 | as read_metadata_mock: 932 | self.ceres_node.timeStep = None 933 | try: # Raise Exception as a cheap exit out of the function once we have the call we want 934 | self.ceres_node.read(600, 660) 935 | except Exception: 936 | pass 937 | read_metadata_mock.assert_called_once_with() 938 | 939 | def test_archives_read_normalizes_from_time(self): 940 | self.ceres_node.read(1210, 1260) 941 | self.ceres_slices[0].read.assert_called_once_with(1200, 1260) 942 | 943 | def test_archives_read_normalizes_until_time(self): 944 | self.ceres_node.read(1200, 1270) 945 | self.ceres_slices[0].read.assert_called_once_with(1200, 1260) 946 | 947 | def test_archives_read_returns_empty_time_series_if_before_slices(self): 948 | result = self.ceres_node.read(0, 300) 949 | self.assertEqual([None] * 10, result.values) 950 | 951 | def test_archives_read_returns_empty_time_series_if_slice_has_no_data(self): 952 | self.ceres_slices[0].read.side_effect = NoData 953 | result = self.ceres_node.read(1200, 1500) 954 | self.assertEqual([None] * 10, result.values) 955 | 956 | def test_archives_read_pads_points_missing_before_series(self): 957 | result = self.ceres_node.read(300, 1200) 958 | self.assertEqual(None, result.values[0]) 959 | 960 | def test_archives_read_pads_points_missing_after_series(self): 961 | result = self.ceres_node.read(1200, 1860) 962 | self.assertEqual(None, result.values[-1]) 963 | 964 | def test_archives_read_goes_across_slices(self): 965 | self.ceres_node.read(900, 1500) 966 | self.ceres_slices[0].read.assert_called_once_with(1200, 1500) 967 | self.ceres_slices[1].read.assert_called_once_with(900, 1200) 968 | 969 | def test_archives_read_across_slices_merges_results_average(self): 970 | result = self.ceres_node.read(900, 1470) 971 | self.assertEqual([0, 1, 2, 3, 4, 0.5, 2.5, 4.5, 6.5, 8], result.values) 972 | 973 | def test_archives_read_across_slices_merges_results_sum(self): 974 | self.ceres_node.aggregationMethod = 'sum' 975 | result = self.ceres_node.read(900, 1470) 976 | self.assertEqual([0, 1, 2, 3, 4, 1, 5, 9, 13, 8], result.values) 977 | 978 | def test_archives_read_across_slices_merges_results_last(self): 979 | self.ceres_node.aggregationMethod = 'last' 980 | result = self.ceres_node.read(900, 1470) 981 | self.assertEqual([0, 1, 2, 3, 4, 1, 3, 5, 7, 8], result.values) 982 | 983 | def test_archives_read_across_slices_merges_results_max(self): 984 | self.ceres_node.aggregationMethod = 'max' 985 | result = self.ceres_node.read(900, 1470) 986 | self.assertEqual([0, 1, 2, 3, 4, 1, 3, 5, 7, 8], result.values) 987 | 988 | def test_archives_read_across_slices_merges_results_min(self): 989 | self.ceres_node.aggregationMethod = 'min' 990 | result = self.ceres_node.read(900, 1470) 991 | self.assertEqual([0, 1, 2, 3, 4, 0, 2, 4, 6, 8], result.values) 992 | 993 | def test_archives_invalid_aggregation_method(self): 994 | self.ceres_node.aggregationMethod = 'invalid' 995 | self.assertRaises(InvalidAggregationMethod, self.ceres_node.read, 900, 1500) 996 | 997 | def test_archives_read_pads_points_missing_after_series_across_slices(self): 998 | result = self.ceres_node.read(900, 1860) 999 | self.assertEqual(None, result.values[-1]) 1000 | 1001 | def test_archives_read_pads_points_missing_between_slices(self): 1002 | self.ceres_slices[1] = make_slice_mock(600, 900, 300) 1003 | result = self.ceres_node.read(600, 1500) 1004 | self.assertEqual([0, None, 4.5], result.values) 1005 | --------------------------------------------------------------------------------