├── .gitignore ├── CHANGELOG ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.md ├── TODO ├── bin ├── code_linter.py ├── debugger.py └── demo.sh ├── docker-compose.yml ├── relay ├── __init__.py ├── __main__.py ├── argparse_shared.py ├── plugins │ └── __init__.py ├── relay_logging.py ├── runner.py └── util.py ├── setup.py └── web ├── Dockerfile ├── package.json ├── src ├── index.html └── index.js └── vendor ├── canvasjs.min.js └── jquery.canvasjs.min.js /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info/ 2 | *pyc 3 | .ropeproject 4 | build/ 5 | dist/ 6 | node_modules/ 7 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | Differences between different versions of Relay 2 | =========================================== 3 | 4 | 5 | 0.1.10 (unreleased) 6 | ------------------- 7 | 8 | - Nothing changed yet. 9 | 10 | 11 | 0.1.9 (2015-09-14) 12 | ------------------ 13 | 14 | ####New Features 15 | - adds support for stop_condition as an optional user-defined function 16 | - introduces docker-compose for the demo instead of managing manually 17 | 18 | <=1.8 19 | ---------------- 20 | 21 | - beginning of changelog. 22 | - warmer, cooler, metric and target functions 23 | - several optional parameters, includeing ramp up, delay, lookpack, sendstats 24 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM continuumio/miniconda 2 | MAINTAINER Alex Gaudio 3 | ENV PATH /opt/anaconda/bin:$PATH 4 | 5 | RUN apt-get install -y -f procps 6 | WORKDIR /relay 7 | COPY ./setup.py /relay/ 8 | RUN conda install setuptools numpy pyzmq && python setup.py install 9 | COPY ./relay /relay/relay 10 | RUN python setup.py develop 11 | 12 | EXPOSE 8080 13 | 14 | CMD relay 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Relay: A thermostat for distributed systems 2 | ============ 3 | 4 | Quickstart with a demo! 5 | ------------ 6 | 7 | ``` 8 | ./bin/demo.sh # You must have Docker installed 9 | ``` 10 | 11 | Navigate your browser to: 12 | 13 | ``` 14 | http://:8080 # is probably $DOCKER_HOST or "localhost" 15 | ``` 16 | 17 | What problem does it solve? 18 | ------------ 19 | 20 | Imagine I wish to manually manage the temperature of a room. I have a 21 | thermostat at one corner and a heater and/or cooler at the other, and 22 | let's say I wish to maintain a target temperature of 68 degrees. I 23 | decide on a simple rule: add heat to the room until the thermometer 24 | reaches 68 degress. There is a problem, though. Heat takes some time 25 | to appear on the thermometer. If I wait until the thermometer reads my 26 | target temperature, I drastically overheat the room. So I decide on 27 | another rule: slowly add heat to the room until it reaches the target 28 | temperature. After trying this out, I realize that the temperature in 29 | the room doesn't get above 60 degrees because it is apparently so cold 30 | outside that I'm not adding enough heat to counter-balance those 31 | effects. 32 | 33 | It looks like I need a more sophisticated temperature regulating 34 | algorithm. This type of problem is often quite complex, and there is a 35 | field called Control Theory dedicated to problems like the ones that a 36 | thermostat solves. 37 | 38 | How does this apply to distributed systems? 39 | ------------ 40 | 41 | Distributed systems need thermostats everywhere! Perhaps you have a 42 | need to add workers in proportion to a queue size. Or in another 43 | scenario, you may need to add more aws nodes when there's a lot of work 44 | to do. Perhaps your grid scheduler needs to maintain a constant number 45 | of jobs running at a time, once per node, but the number of nodes is 46 | dynamic. You could use Relay to tune hyper-parameters for online 47 | machine learning algorithms. Can you think of any applications? If you 48 | can't, look at a couple timeseries and you'll come up with good ideas, 49 | and there is a good chance that Relay makes solving those quite a bit easier. 50 | 51 | In general, Relay is a good candidate for any scenario where you find 52 | yourself looking at some metric and then responding to that metric by 53 | running some code or tweaking your system. 54 | 55 | Background: A lesson on PID controllers 56 | ------------ 57 | 58 | A PID controller is a mechanism that implements a control loop. A 59 | control loop leverages feedback from its previous decisions to make new 60 | decisions about what to do next. According to 61 | [Wikipedia](http://en.wikipedia.org/wiki/PID_controller), "In the 62 | absence of knowledge of the underlying process, a PID controller has 63 | historically been considered to be the best controller." 64 | 65 | PID controllers look at a metric, like room temperature over time, and 66 | represent it as three components. "P," the "proportional" term, defines 67 | the amount of error between the current metric value and the target 68 | value. (How far off is the temperature from the target temperature?). 69 | "I" and "D" look at the integral and derivative of the metric. The 70 | amount of "heating" or "cooling" a PID controller decides to add depends 71 | on a weighted sum of those three terms. 72 | 73 | 74 | MV = K_p * P + K_i * I + K_d * D 75 | 76 | where 77 | P = error between current value and target value 78 | I = integral of this error over time 79 | D = derivative of this error over time 80 | K_* - these are weights for the above 81 | 82 | MV = the amount of heat or cooling to add to the system 83 | 84 | The challenge, in general, is to find the ideal weighting for the 85 | ```K_*``` terms. 86 | 87 | 88 | Background: How does Relay solve this? 89 | ------------ 90 | 91 | Relay is technically a modified PI controller. 92 | Specifically, ```K_p = 1```, ```K_d * D = 0```, and ```K_i``` is chosen 93 | according to a tuning algorithm. Given ```P```, ```I```, and an error 94 | history, the amount of heat or cooling to add (MV) is: 95 | 96 | MV = P + K_i * I 97 | 98 | where 99 | P = PV - SP .... (metric value - target) value at timestep i 100 | I = sum(P_hist) / len(P_hist) .... average value of P over time. 101 | K_i is defined below as a weighted sum of component frequencies. 102 | 103 | The challenge in this problem is to answer the question: "At the current 104 | moment in time, how important is the history of errors?" Here's how 105 | Relay does this: 106 | 107 | If we can assume the signal is made up of various periodic (repeating) 108 | functions, we can evaluate, at any point in time, how far away from 109 | “zero error” each periodic function is. By joining all these errors 110 | as a weighted sum, we can estimate how important the error is by 111 | considering the relative presence of the signal's repeating components. 112 | 113 | A Fast Fourier Transform (we use FFT) breaks down a signal (ie Relay's 114 | error history) into a number of repeating sine waves of different 115 | frequencies. For any given component frequency, ```f_j``` we can look 116 | at the current phase, ```ø_i```, of the wave we happen to be in. Since 117 | we sample the signal (and therefore each component frequency) at a known 118 | rate, we can also calculate the number of samples, ```n```, in this 119 | particular frequency that we consider. In one period, large frequency 120 | components will have less samples than small frequency components. 121 | Given a component frequency, ```f_j```, the current phase, ```ø```, and 122 | the number of samples in one wavelength, ```n```, we can then look back 123 | we can find how much of ```n - 1``` most recent samples the the current 124 | phase is worth, or 125 | ```h_j = abs(sin(ø_i)) / Σ_k [ abs(sin(ø_(i-k))) ] where k = [0:n)```. 126 | 127 | If we calculate ```h_j``` for each frequency, ```j```, and than take a 128 | weighted sum of frequencies, we define a tuning parameter, ```K_i```, 129 | that responds quite well to periodicity of the signal! 130 | 131 | At the current timestep: 132 | For each f_j: 133 | n=num samples in f_j 134 | h_j = abs(sin(ø_i)) / Σ_k [ abs(sin(ø_(i-k))) ] where k = [0:n) 135 | 136 | K_i = Σ_j [ f_j * h_j ] 137 | 138 | 139 | And finally: 140 | MV = P + K_i * I 141 | 142 | 143 | Quick Start! 144 | ------------ 145 | 146 | Install relay 147 | 148 | pip install relay.runner 149 | 150 | OR for this demo: 151 | 152 | pip install "relay.runner[webui]" # webui needs ZeroMQ installed 153 | 154 | Look at the usage 155 | 156 | relay -h 157 | 158 | Try a demo. This demo runs monitors the number of a certain kind of bash echo command running every .1 seconds, and if there aren't enough of them running, will add more. 159 | 160 | relay --metric bash_echo_metric --warmer bash_echo_warmer --delay .1 --sendstats webui --target 20 161 | 162 | # navigate to localhost:8080 in a web browser 163 | 164 | # demos 2 and 3: changing target values over time 165 | relay --metric bash_echo_metric --warmer bash_echo_warmer --delay .1 --sendstats webui --target oscillating_setpoint 166 | relay --metric bash_echo_metric --warmer bash_echo_warmer --delay .1 --sendstats webui --target squarewave_setpoint 167 | 168 | # demo 4: running a heater and cooler at the same time 169 | relay --metric bash_echo_metric --warmer bash_echo_warmer --delay .1 --sendstats webui --target squarewave_setpoint --cooler bash_echo_cooler 170 | 171 | 172 | Detailed Usage: 173 | ------------ 174 | 175 | Relay has 4 main components: 176 | 177 | metric - a timeseries of numbers that you wish to monitor (ie temperature) 178 | target - a numerical value (or timeseries) that you'd like the 179 | metric to be at (ie same units as the metric) 180 | 181 | warmer - a function that Relay assumes will increase the metric 182 | cooler - a function that Relay assumes will decrease the metric 183 | 184 | In certain cases it can be more efficient to run a heater and air 185 | conditioner at the same time, but generally it's not. It's usually safe 186 | to apply this reasoning to Relay. You generaly should define only a 187 | warmer or a cooler, but sometimes it is better to define both. 188 | 189 | 190 | Relay makes some assumptions: 191 | 192 | - The metric you monitoring is eventually consistent. This means that 193 | if it takes a little while for effects to become apparent, Relay will 194 | figure this out as soon as it acquires enough error history to do so. 195 | 196 | - The signal you're monitoring is continuous, integrable, and otherwise 197 | valid input to a Fourier Transforms. 198 | 199 | - Warmer functions increase metric values and Cooler functions decrease 200 | metric values. 201 | 202 | - If Relay accrues a large history of error, it will remember that error 203 | for n samples, where n is the size of Relay's lookback window. If a 204 | warmer or cooler function suddenly stops working or something changes 205 | how it affects themetric, Relay's decisions may become less predictable 206 | until it stabilizes. 207 | 208 | - You can run multiple redundant Relays! If you add multiple Relay 209 | processes, they will each account for a part of the signal. If you 210 | stop multiple Relays, the remaining ones will figure this out and 211 | re-adjust themselves over the next few samples. 212 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - consider Gain Scheduling (different weights in different regions) 2 | 3 | - better web UI 4 | - support a bash plugin 5 | -------------------------------------------------------------------------------- /bin/code_linter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Syntax checker adapted from 4 | [StarCluster](https://github.com/jtriley/StarCluster) 5 | 6 | Strict PyFlakes and Pep8 checkers, with the following differences: 7 | - ignore UnusedImport and "Import *" errors from __init__.py files 8 | 9 | """ 10 | import os 11 | import re 12 | import ast 13 | import sys 14 | import glob 15 | import subprocess 16 | 17 | import pep8 18 | from pyflakes import checker 19 | from pyflakes.messages import UnusedImport, ImportStarUsed 20 | 21 | 22 | def check(codeString, filename): 23 | """ 24 | Check the Python source given by C{codeString} for flakes. 25 | 26 | @param codeString: The Python source to check. 27 | @type codeString: C{str} 28 | 29 | @param filename: The name of the file the source came from, used to report 30 | errors. 31 | @type filename: C{str} 32 | 33 | @return: The number of warnings emitted. 34 | @rtype: C{int} 35 | """ 36 | # First, compile into an AST and handle syntax errors. 37 | try: 38 | tree = compile(codeString, filename, "exec", ast.PyCF_ONLY_AST) 39 | except SyntaxError, value: 40 | msg = value.args[0] 41 | 42 | (lineno, offset, text) = value.lineno, value.offset, value.text 43 | 44 | # If there's an encoding problem with the file, the text is None. 45 | if text is None: 46 | # Avoid using msg, since for the only known case, it contains a 47 | # bogus message that claims the encoding the file declared was 48 | # unknown. 49 | sys.stderr.write("%s: problem decoding source\n" % (filename, )) 50 | else: 51 | line = text.splitlines()[-1] 52 | 53 | if offset is not None: 54 | offset = offset - (len(text) - len(line)) 55 | 56 | sys.stderr.write('%s:%d: %s' % (filename, lineno, msg)) 57 | sys.stderr.write(line + '\n') 58 | 59 | if offset is not None: 60 | sys.stderr.write(" " * offset + "^\n") 61 | 62 | return 1 63 | else: 64 | # Okay, it's syntactically valid. Now check it. 65 | w = checker.Checker(tree, filename) 66 | lines = codeString.split('\n') 67 | messages = [message for message in w.messages 68 | if lines[message.lineno - 1].find('pyflakes:ignore') < 0] 69 | messages.sort(lambda a, b: cmp(a.lineno, b.lineno)) 70 | false_positives = 0 71 | for warning in messages: 72 | if not (re.match('.*__init__.py', str(warning)) 73 | and isinstance(warning, (UnusedImport, ImportStarUsed))): 74 | print(warning) 75 | else: 76 | false_positives += 1 77 | return len(messages) - false_positives 78 | 79 | 80 | def checkPath(filename): 81 | """ 82 | Check the given path, printing out any warnings detected. 83 | 84 | @return: the number of warnings printed 85 | """ 86 | try: 87 | return check(file(filename, 'U').read() + '\n', filename) 88 | except IOError, msg: 89 | sys.stderr.write("%s: %s\n" % (filename, msg.args[1])) 90 | return 1 91 | 92 | 93 | def matches_file(file_name, match_files): 94 | return any(re.compile(match_file).match(file_name) for match_file in 95 | match_files) 96 | 97 | 98 | def check_files(files, check): 99 | clean = True 100 | print check['start_msg'] 101 | for file_name in files: 102 | if not matches_file(file_name, check.get('match_files', [])): 103 | continue 104 | if matches_file(file_name, check.get('ignore_files', [])): 105 | continue 106 | print 'checking file: %s' % file_name 107 | process = subprocess.Popen(check['command'] % file_name, 108 | stdout=subprocess.PIPE, 109 | stderr=subprocess.PIPE, shell=True) 110 | out, err = process.communicate() 111 | output = out + err 112 | if output: 113 | output_lines = ['%s: %s' % (file_name, line) for line in 114 | (out + err).splitlines()] 115 | print '\n'.join(output_lines) 116 | if process.returncode != 0: 117 | clean = False 118 | if not clean: 119 | raise Exception("ERROR: checks failed on some source files") 120 | 121 | 122 | def find_py_files(path): 123 | for cfile in glob.glob(os.path.join(path, '*')): 124 | if os.path.basename(cfile) == 'node_modules': 125 | continue 126 | if os.path.isdir(cfile): 127 | for py in find_py_files(cfile): 128 | yield py 129 | if cfile.endswith('.py'): 130 | yield cfile 131 | 132 | 133 | def check_pyflakes(files): 134 | print(">>> Running pyflakes...") 135 | clean = True 136 | for pyfile in files: 137 | if checkPath(pyfile) != 0: 138 | clean = False 139 | if not clean: 140 | raise Exception("ERROR: pyflakes failed on some source files") 141 | 142 | 143 | def check_pep8(files): 144 | print(">>> Running pep8...") 145 | sg = pep8.StyleGuide(parse_argv=False, config_file=False) 146 | sg.options.repeat = True 147 | sg.options.show_pep8 = True 148 | report = sg.check_files(files) 149 | if report.total_errors: 150 | raise Exception("ERROR: pep8 failed on some source files") 151 | 152 | 153 | def main(git_index=False, filetypes=['.py']): 154 | files = [] 155 | if git_index: 156 | p = subprocess.Popen(['git', 'status', '--porcelain'], 157 | stdout=subprocess.PIPE) 158 | out, err = p.communicate() 159 | modified = re.compile('^(?:MM|M|A)(\s+)(?P.*)') 160 | for line in out.splitlines(): 161 | match = modified.match(line) 162 | if match: 163 | f = match.group('name') 164 | if filetypes: 165 | if f.endswith(tuple(filetypes)): 166 | files.append(f) 167 | else: 168 | files.append(f) 169 | else: 170 | src = os.path.join(os.path.dirname(__file__), '..') 171 | files = list(find_py_files(src)) 172 | if not files: 173 | return 174 | try: 175 | check_pyflakes(files) 176 | check_pep8(files) 177 | print(">>> Clean!") 178 | except Exception, e: 179 | print 180 | print(e) 181 | print("ERROR: please fix the errors and re-run this script") 182 | sys.exit(1) 183 | 184 | if __name__ == '__main__': 185 | git_index = '--git-index' in sys.argv 186 | main(git_index=git_index) 187 | -------------------------------------------------------------------------------- /bin/debugger.py: -------------------------------------------------------------------------------- 1 | import json 2 | import zmq 3 | import pandas as pd 4 | import pylab 5 | 6 | 7 | def getstream(address): 8 | c = zmq.Context() 9 | s = c.socket(zmq.SUB) 10 | s.setsockopt(zmq.SUBSCRIBE, '') 11 | s.connect(address) 12 | while True: 13 | yield s.recv() 14 | 15 | 16 | def populate(df, n, stream): 17 | m = df.shape[0] 18 | while True: 19 | next(stream) 20 | j = next(stream) 21 | 22 | d = json.loads(j).get('data') 23 | if d: 24 | df.loc[df.shape[0]] = d 25 | 26 | if df.shape[0] >= n + m: 27 | break 28 | 29 | 30 | def plot_df(df): 31 | for i in range(df.shape[1]): 32 | pylab.subplot(df.shape[1], 1, i + 1) 33 | pylab.plot(df[i]) 34 | pylab.show(block=False) 35 | 36 | 37 | address = 'ipc:///tmp/relaylog' 38 | df = pd.DataFrame(columns=[0, 1, 2]) 39 | populate(df, 500, getstream(address)) 40 | plot_df(df) 41 | 42 | 43 | import IPython 44 | IPython.embed() 45 | -------------------------------------------------------------------------------- /bin/demo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # This shows you Relay in action. Point your browser to: 3 | # http://:8080 4 | 5 | # is probably "localhost" 6 | # on mac, it's probably your boot2docker vm's IP address, $DOCKER_HOST 7 | 8 | 9 | dir="$( cd "$( dirname "$( dirname "$0" )" )" && pwd )" 10 | 11 | docker-compose build relay 12 | 13 | ( 14 | echo opening browser tab 15 | sleep 1 16 | cat </dev/null' % n) 152 | subprocess.Popen(cmd, shell=True, executable='bash').wait() 153 | 154 | 155 | def stop_if_mostly_diverging(errdata): 156 | """This is an example stop condition that asks Relay to quit if 157 | the error difference between consecutive samples is increasing more than 158 | half of the time. 159 | 160 | It's quite sensitive and designed for the demo, so you probably shouldn't 161 | use this is a production setting 162 | """ 163 | n_increases = sum([ 164 | abs(y) - abs(x) > 0 for x, y in zip(errdata, errdata[1:])]) 165 | if len(errdata) * 0.5 < n_increases: 166 | # most of the time, the next sample is worse than the previous sample 167 | # relay is not healthy 168 | return 0 169 | else: 170 | # most of the time, the next sample is better than the previous sample 171 | # realy is in a healthy state 172 | return -1 173 | -------------------------------------------------------------------------------- /relay/relay_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import json 3 | from colorlog import ColoredFormatter 4 | from relay import log 5 | 6 | 7 | def configure_logging(add_handler, log=log): 8 | """ 9 | Configure log records. If adding a handler, make the formatter print all 10 | passed in key:value data. 11 | ie log.extra('msg', extra=dict(a=1)) 12 | generates 'msg a=1' 13 | 14 | `add_handler` (True, False, None, or Handler instance) 15 | if True, add a logging.StreamHandler() instance 16 | if False, do not add any handlers. 17 | if given a handler instance, add that the the logger 18 | """ 19 | _ignore_log_keys = set(logging.makeLogRecord({}).__dict__) 20 | 21 | def _json_format(record): 22 | extras = ' '.join( 23 | "%s=%s" % (k, record.__dict__[k]) 24 | for k in set(record.__dict__).difference(_ignore_log_keys)) 25 | if extras: 26 | record.msg = "%s %s" % (record.msg, extras) 27 | return record 28 | 29 | class ColoredJsonFormatter(ColoredFormatter): 30 | def format(self, record): 31 | record = _json_format(record) 32 | return super(ColoredJsonFormatter, self).format(record) 33 | if isinstance(add_handler, logging.Handler): 34 | log.addHandler(add_handler) 35 | elif add_handler is True: 36 | if not any(isinstance(h, logging.StreamHandler) for h in log.handlers): 37 | _h = logging.StreamHandler() 38 | _h.setFormatter(ColoredJsonFormatter( 39 | "%(log_color)s%(levelname)-8s %(message)s %(reset)s %(cyan)s", 40 | reset=True 41 | )) 42 | log.addHandler(_h) 43 | elif not log.handlers: 44 | log.addHandler(logging.NullHandler()) 45 | log.setLevel(logging.DEBUG) 46 | log.propagate = False 47 | return log 48 | 49 | 50 | def add_zmq_log_handler(address): 51 | import zmq.log.handlers 52 | 53 | class JSONPubHandler(zmq.log.handlers.PUBHandler): 54 | def format(self, record): 55 | return json.dumps(record.__dict__) 56 | 57 | sock = zmq.Context().socket(zmq.PUB) 58 | sock.connect(address) 59 | handler = JSONPubHandler(sock) 60 | return configure_logging(handler) 61 | -------------------------------------------------------------------------------- /relay/runner.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from collections import deque 4 | import numpy as np 5 | import os 6 | from os.path import abspath, dirname, join 7 | import subprocess 8 | import sys 9 | import time 10 | import threading 11 | 12 | from relay import log, configure_logging, add_zmq_log_handler 13 | from relay import util 14 | from relay import argparse_shared as at 15 | 16 | 17 | def start_webui(): 18 | cwd = join(dirname(dirname(abspath(__file__))), 'web/src') 19 | log.info("Starting node.js webui in a subshell") 20 | subprocess.Popen( 21 | 'cd %s ; node index.js' % cwd, shell=True, 22 | preexec_fn=os.setsid) # guarantee that the child process exits with me 23 | 24 | 25 | @util.coroutine 26 | def window(n, initial_data=()): 27 | win = deque(initial_data, n) 28 | while 1: 29 | win.append((yield win)) 30 | 31 | 32 | def calc_weight(errdata): 33 | sp = np.fft.fft(errdata)[1: len(errdata) // 2] 34 | if sp.sum() == 0: # there is no variation in the signal 35 | log.warn('no variation in the signal. fft cannot continue') 36 | return 1 37 | 38 | # get the phase in radians # -np.pi < phase <= +np.pi 39 | phase = np.angle(sp) # radians 40 | 41 | # find the amplitude integral of neighboring samples. 42 | # search <360 degrees to left of most recent sample's phase 43 | # p_k = phase - degrees_between_samples * k # kth phase 44 | amplitude_integrals = np.abs(np.sin(phase)) # iteratively updated 45 | # samples per cycle 46 | kth = len(errdata) / np.arange(1, len(errdata) // 2) 47 | num_degrees_between_samples = 2 * np.pi / kth 48 | p_k = phase.copy() 49 | while (kth > 0).any(): 50 | # find amplitude of a sign wave at specific phase 51 | p_k -= num_degrees_between_samples 52 | amplitude_integrals += np.abs(np.sin(p_k)) 53 | kth -= 1 54 | idxs = kth > 0 55 | not_idxs = ~idxs 56 | kth = kth[idxs] 57 | p_k[not_idxs] = 0 58 | num_degrees_between_samples[not_idxs] = 0 59 | 60 | # get the amplitude of each frequency in the fft spectrum 61 | amplitude = np.abs(sp) 62 | return ( 63 | # np.sin(phase) 64 | (np.sin(phase) / amplitude_integrals) 65 | * (amplitude / amplitude.sum()) 66 | ).sum() 67 | 68 | 69 | def create_ramp_plan(err, ramp): 70 | """ 71 | Formulate and execute on a plan to slowly add heat or cooling to the system 72 | 73 | `err` initial error (PV - SP) 74 | `ramp` the size of the ramp 75 | 76 | A ramp plan might yield MVs in this order at every timestep: 77 | [5, 0, 4, 0, 3, 0, 2, 0, 1] 78 | where err == 5 + 4 + 3 + 2 + 1 79 | """ 80 | if ramp == 1: # basecase 81 | yield int(err) 82 | while True: 83 | yield 0 84 | # np.arange(n).sum() == err 85 | # --> solve for n 86 | # err = (n - 1) * (n // 2) == .5 * n**2 - .5 * n 87 | # 0 = n**2 - n --> solve for n 88 | n = np.abs(np.roots([.5, -.5, 0]).max()) 89 | niter = int(ramp // (2 * n)) # 2 means add all MV in first half of ramp 90 | MV = n 91 | log.info('Initializing a ramp plan', extra=dict( 92 | ramp_size=ramp, err=err, niter=niter)) 93 | for x in range(int(n)): 94 | budget = MV 95 | for x in range(niter): 96 | budget -= MV // niter 97 | yield int(np.sign(err) * (MV // niter)) 98 | yield int(budget * np.sign(err)) 99 | MV -= 1 100 | while True: 101 | yield 0 102 | 103 | 104 | def validate_ns_or_sysexit(ns): 105 | ex = 0 106 | if None in [ns.target, ns.metric]: 107 | log.error("you must define a --metric and --target!") 108 | ex = 1 109 | if ns.warmer is None and ns.cooler is None: 110 | log.error("you must define either a --warmer or a --cooler!") 111 | ex = 1 112 | if ex: 113 | build_arg_parser().print_usage() 114 | sys.exit(1) 115 | 116 | 117 | def evaluate_stop_condition(errdata, stop_condition): 118 | """ 119 | Call the user-defined function: stop_condition(errdata) 120 | If the function returns -1, do nothing. Otherwise, sys.exit. 121 | """ 122 | if stop_condition: 123 | return_code = stop_condition(list(errdata)) 124 | if return_code != -1: 125 | log.info( 126 | 'Stop condition triggered! Relay is terminating.', 127 | extra=dict(return_code=return_code)) 128 | sys.exit(return_code) 129 | 130 | 131 | def main(ns): 132 | validate_ns_or_sysexit(ns) 133 | configure_logging(True) 134 | if ns.sendstats: 135 | if ns.sendstats == 'webui': 136 | add_zmq_log_handler('ipc:///tmp/relaylog') 137 | start_webui() 138 | else: 139 | add_zmq_log_handler(ns.sendstats) 140 | log.info( 141 | "Starting relay!", extra={k: str(v) for k, v in ns.__dict__.items()}) 142 | 143 | metric = ns.metric() 144 | target = ns.target() 145 | errhist = window(ns.lookback) 146 | ramp_index = 0 147 | 148 | while True: 149 | SP = next(target) # set point 150 | PV = next(metric) # process variable 151 | err = (SP - PV) 152 | log.debug('got metric value', extra=dict(PV=PV, SP=SP)) 153 | if ramp_index < ns.ramp: 154 | if ramp_index == 0: 155 | plan = create_ramp_plan(err, ns.ramp) 156 | ramp_index += 1 157 | MV = next(plan) 158 | errdata = errhist.send(0) 159 | else: 160 | errdata = errhist.send(err) 161 | weight = calc_weight(errdata) 162 | MV = int(round(err - weight * sum(errdata) / len(errdata))) 163 | log.info('data', extra=dict(data=[ 164 | err, weight, 165 | sum(errdata) / len(errdata)])) 166 | 167 | if MV > 0: 168 | if ns.warmer: 169 | log.debug('adding heat', extra=dict(MV=MV, err=err)) 170 | threading.Thread(target=ns.warmer, args=(MV,)).start() 171 | else: 172 | log.warn('too cold') 173 | elif MV < 0: 174 | if ns.cooler: 175 | log.debug('removing heat', extra=dict(MV=MV, err=err)) 176 | threading.Thread(target=ns.cooler, args=(MV,)).start() 177 | else: 178 | log.warn('too hot') 179 | else: 180 | log.debug( 181 | 'stabilized PV at setpoint', extra=dict(MV=MV, PV=PV, SP=SP)) 182 | time.sleep(ns.delay) 183 | evaluate_stop_condition(list(errdata), ns.stop_condition) 184 | 185 | 186 | build_arg_parser = at.build_arg_parser([ 187 | at.group( 188 | "What is Relay optimizing?", 189 | at.metric, at.target), 190 | at.group( 191 | "Instruct Relay how to heat or cool your metric", 192 | at.warmer, at.cooler), 193 | at.group( 194 | "Some optional Relay parameters", 195 | at.delay, at.lookback, at.ramp, at.sendstats, at.stop_condition), 196 | ]) 197 | -------------------------------------------------------------------------------- /relay/util.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import logging 3 | log = logging.getLogger('relay.util') 4 | 5 | 6 | def log_raise(msg, extra={}, err_kls=Exception): 7 | log.error(msg, extra=extra) 8 | raise err_kls("%s ||| %s" % ( 9 | msg, ' '.join('='.join([str(k), str(v)]) for k, v in extra.items()))) 10 | 11 | 12 | class InvalidImportPath(Exception): 13 | pass 14 | 15 | 16 | def load_obj_from_path(import_path, prefix=None, ld=dict()): 17 | """ 18 | import a python object from an import path 19 | 20 | `import_path` - a python import path. For instance: 21 | mypackage.module.func 22 | or 23 | mypackage.module.class 24 | `prefix` (str) - a value to prepend to the import path 25 | if it isn't already there. For instance: 26 | load_obj_from_path('module.func', prefix='mypackage') 27 | is the same as 28 | load_obj_from_path('mypackage.module.func') 29 | `ld` (dict) key:value data to pass to the logger if an error occurs 30 | """ 31 | if prefix and not import_path.startswith(prefix): 32 | import_path = '.'.join([prefix, import_path]) 33 | 34 | log.debug( 35 | 'attempting to load a python object from an import path', 36 | extra=dict(import_path=import_path, **ld)) 37 | try: 38 | mod = importlib.import_module(import_path) 39 | return mod # yay, we found a module. return it 40 | except: 41 | pass # try to extract an object from a module 42 | try: 43 | path, obj_name = import_path.rsplit('.', 1) 44 | except ValueError: 45 | log_raise( 46 | ("import path needs at least 1 period in your import path." 47 | " An example import path is something like: module.obj"), 48 | dict(import_path=import_path, **ld), InvalidImportPath) 49 | try: 50 | mod = importlib.import_module(path) 51 | except ImportError: 52 | newpath = path.replace(prefix, '', 1).lstrip('.') 53 | log.debug( 54 | "Could not load import path. Trying a different one", 55 | extra=dict(oldpath=path, newpath=newpath)) 56 | path = newpath 57 | mod = importlib.import_module(path) 58 | try: 59 | obj = getattr(mod, obj_name) 60 | except AttributeError: 61 | log_raise( 62 | ("object does not exist in given module." 63 | " Your import path is not" 64 | " properly defined because the given `obj_name` does not exist"), 65 | dict(import_path=path, obj_name=obj_name, **ld), 66 | InvalidImportPath) 67 | return obj 68 | 69 | 70 | def coroutine(func): 71 | def f(*args, **kwargs): 72 | g = func(*args, **kwargs) 73 | next(g) 74 | return g 75 | return f 76 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | try: 3 | from setuptools import find_packages 4 | except ImportError: 5 | print ("Please install Distutils and setuptools" 6 | " before installing this package") 7 | raise 8 | 9 | setup( 10 | name='relay.runner', 11 | version='0.1.10.dev0', 12 | description=( 13 | 'A smart thermostat. Given a metric, or some timeseries that should' 14 | ' approach a given target, add heat or coolant as necessary' 15 | ' You can use Relay to auto-scale workers in large' 16 | ' distributed systems or do anything a thermostat might do.' 17 | ), 18 | long_description="Check the project homepage for details", 19 | keywords=[ 20 | 'relay', 'pid', 'pid controller', 'thermostat', 'tuning', 21 | 'oscilloscope', 'auto-scale'], 22 | 23 | author='Alex Gaudio', 24 | author_email='adgaudio@gmail.com', 25 | url='http://github.com/sailthru/relay', 26 | 27 | packages=find_packages(), 28 | include_package_data=True, 29 | install_requires=['argparse_tools>=1.0.6', 'colorlog', 'numpy'], 30 | 31 | extras_require={ 32 | 'webui': ['pyzmq'], 33 | }, 34 | tests_require=['nose'], 35 | test_suite="nose.main", 36 | zip_safe=True, 37 | 38 | entry_points = { 39 | 'console_scripts': [ 40 | 'relay = relay.__main__:go', 41 | ], 42 | 'setuptools.installation': [ 43 | 'eggsecutable = relay.__main__:go', 44 | ], 45 | }, 46 | ) 47 | -------------------------------------------------------------------------------- /web/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dockerfile/nodejs 2 | EXPOSE 8080 3 | EXPOSE 5673 4 | RUN apt-get update && apt-get install -y -f libzmq3 libzmq3-dev 5 | WORKDIR /relay/web 6 | COPY ./package.json /relay/web/ 7 | RUN npm install 8 | COPY ./src /relay/web/src/ 9 | COPY ./vendor /relay/web/vendor/ 10 | CMD node src/index.js tcp://0.0.0.0:5673 11 | -------------------------------------------------------------------------------- /web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "relay-visualizer", 3 | "version": "0.0.1", 4 | "description": "Visualize relay over time", 5 | "dependencies": { 6 | "ejs": "^1.0.0", 7 | "express": "^4.10.1", 8 | "socket.io": "^1.2.0", 9 | "zmq": "~2.8.0" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /web/src/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 75 | 76 | 77 |
78 |
79 | 80 | 81 | -------------------------------------------------------------------------------- /web/src/index.js: -------------------------------------------------------------------------------- 1 | var express = require('express'); 2 | var app = express(); 3 | var server = require('http').Server(app); 4 | var io = require('socket.io')(server); 5 | var path = require('path'); 6 | var util = require('util'); 7 | 8 | // receive zmq messages 9 | var zmq = require('zmq'); 10 | var subscriber = zmq.socket('sub'); 11 | if (process.argv[2]) { 12 | console.log('zmq binding to address ' + process.argv[2]); 13 | subscriber.bindSync(process.argv[2]); 14 | } else { 15 | console.log('zmq binding to address ' + 'ipc:///tmp/relaylog'); 16 | subscriber.bindSync('ipc:///tmp/relaylog'); 17 | } 18 | subscriber.subscribe(''); 19 | console.log('zmq initialized') 20 | 21 | // when a client connects via websockets, 22 | // forward relevant zmq message data to the client 23 | io.on('connection', function (socket) { 24 | 25 | subscriber.on('message', function () { 26 | var payload = JSON.parse(arguments[1].toString()); 27 | if (payload.PV) { 28 | socket.emit('pvdata', {y: payload.PV}); 29 | } 30 | if (payload.MV) { 31 | socket.emit('mvdata', {y: payload.MV}); 32 | } 33 | if (payload.SP) { 34 | socket.emit('spdata', {y: payload.SP}); 35 | } 36 | }); 37 | }); 38 | 39 | // configure webserver 40 | app.engine('.html', require('ejs').__express); 41 | var port = 8080; 42 | server.listen(port); 43 | app.use('/vendor', express.static(path.join(__dirname, '..', '/vendor'))); 44 | app.get('/', function (req, res) { 45 | res.render( 46 | path.join(__dirname, 'index.html'), 47 | {webserver_url: util.format('http://%s:%s', req.hostname, port)}); 48 | }); 49 | 50 | --------------------------------------------------------------------------------