├── .gitignore ├── Berksfile ├── Berksfile.lock ├── Gemfile.lock ├── LICENSE.txt ├── README.md ├── Vagrantfile ├── attributes ├── default.rb ├── nerve.rb ├── ports.rb ├── services.rb ├── synapse.rb └── test.rb ├── environments └── test.rb ├── files └── default │ └── tests │ └── minitest │ ├── support │ └── helpers.rb │ └── test_test.rb ├── libraries └── deep_to_hash.rb ├── metadata.rb ├── recipes ├── default.rb ├── nerve.rb ├── nerve_disable.rb ├── synapse.rb ├── synapse_disable.rb └── test.rb └── templates └── default ├── sv-helloworld-run.erb ├── sv-nerve-run.erb ├── sv-synapse-run.erb ├── sv-zookeeper-run.erb └── zookeeper.cfg.erb /.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant 2 | .*sw? 3 | -------------------------------------------------------------------------------- /Berksfile: -------------------------------------------------------------------------------- 1 | site :opscode 2 | 3 | metadata 4 | cookbook 'apt' 5 | cookbook 'minitest-handler' 6 | -------------------------------------------------------------------------------- /Berksfile.lock: -------------------------------------------------------------------------------- 1 | DEPENDENCIES 2 | apt 3 | minitest-handler 4 | smartstack 5 | path: . 6 | metadata: true 7 | 8 | GRAPH 9 | apt (2.3.8) 10 | build-essential (2.0.0) 11 | chef_handler (1.1.6) 12 | java (1.22.0) 13 | minitest-handler (1.1.4) 14 | chef_handler (>= 0.0.0) 15 | ruby (0.9.2) 16 | runit (1.5.10) 17 | build-essential (>= 0.0.0) 18 | yum (~> 3.0) 19 | yum-epel (>= 0.0.0) 20 | smartstack (0.5.0) 21 | java (>= 0.0.0) 22 | ruby (~> 0.9.2) 23 | runit (>= 1.1.0) 24 | yum (3.2.0) 25 | yum-epel (0.3.6) 26 | yum (~> 3.0) 27 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | activesupport (3.2.15) 5 | i18n (~> 0.6, >= 0.6.4) 6 | multi_json (~> 1.0) 7 | addressable (2.3.5) 8 | akami (1.2.0) 9 | gyoku (>= 0.4.0) 10 | nokogiri (>= 1.4.0) 11 | berkshelf (2.0.10) 12 | activesupport (~> 3.2.0) 13 | addressable (~> 2.3.4) 14 | buff-shell_out (~> 0.1) 15 | chozo (>= 0.6.1) 16 | faraday (>= 0.8.5) 17 | hashie (>= 2.0.2) 18 | minitar (~> 0.5.4) 19 | rbzip2 (~> 0.2.0) 20 | retryable (~> 1.3.3) 21 | ridley (~> 1.5.0) 22 | solve (>= 0.5.0) 23 | thor (~> 0.18.0) 24 | buff-config (0.4.0) 25 | buff-extensions (~> 0.3) 26 | varia_model (~> 0.1) 27 | buff-extensions (0.5.0) 28 | buff-ignore (1.1.0) 29 | buff-ruby_engine (0.1.0) 30 | buff-shell_out (0.1.0) 31 | buff-ruby_engine (~> 0.1.0) 32 | builder (3.2.2) 33 | celluloid (0.14.1) 34 | timers (>= 1.0.0) 35 | celluloid-io (0.14.1) 36 | celluloid (>= 0.14.1) 37 | nio4r (>= 0.4.5) 38 | chozo (0.6.1) 39 | activesupport (>= 3.2.0) 40 | hashie (>= 2.0.2) 41 | multi_json (>= 1.3.0) 42 | erubis (2.7.0) 43 | faraday (0.8.8) 44 | multipart-post (~> 1.2.0) 45 | ffi (1.9.0) 46 | gssapi (1.0.3) 47 | ffi (>= 1.0.1) 48 | gyoku (1.1.0) 49 | builder (>= 2.1.2) 50 | hashie (2.0.5) 51 | httpclient (2.3.4.1) 52 | httpi (0.9.7) 53 | rack 54 | i18n (0.6.5) 55 | json (1.8.1) 56 | little-plugger (1.1.3) 57 | logging (1.8.1) 58 | little-plugger (>= 1.1.3) 59 | multi_json (>= 1.3.6) 60 | mini_portile (0.5.1) 61 | minitar (0.5.4) 62 | mixlib-authentication (1.3.0) 63 | mixlib-log 64 | mixlib-log (1.6.0) 65 | multi_json (1.8.2) 66 | multipart-post (1.2.0) 67 | net-http-persistent (2.9) 68 | net-ssh (2.7.0) 69 | nio4r (0.5.0) 70 | nokogiri (1.6.0) 71 | mini_portile (~> 0.5.0) 72 | nori (1.1.5) 73 | rack (1.5.2) 74 | rbzip2 (0.2.0) 75 | retryable (1.3.3) 76 | ridley (1.5.3) 77 | addressable 78 | buff-config (~> 0.2) 79 | buff-extensions (~> 0.3) 80 | buff-ignore (~> 1.1) 81 | buff-shell_out (~> 0.1) 82 | celluloid (~> 0.14.0) 83 | celluloid-io (~> 0.14.0) 84 | erubis 85 | faraday (>= 0.8.4) 86 | hashie (>= 2.0.2) 87 | json (>= 1.7.7) 88 | mixlib-authentication (>= 1.3.0) 89 | net-http-persistent (>= 2.8) 90 | net-ssh 91 | nio4r (>= 0.5.0) 92 | retryable 93 | solve (>= 0.4.4) 94 | varia_model (~> 0.1) 95 | winrm (~> 1.1.0) 96 | rubyntlm (0.1.1) 97 | savon (0.9.5) 98 | akami (~> 1.0) 99 | builder (>= 2.1.2) 100 | gyoku (>= 0.4.0) 101 | httpi (~> 0.9) 102 | nokogiri (>= 1.4.0) 103 | nori (~> 1.0) 104 | wasabi (~> 1.0) 105 | solve (0.8.1) 106 | thor (0.18.1) 107 | timers (1.1.0) 108 | uuidtools (2.1.4) 109 | varia_model (0.2.0) 110 | buff-extensions (~> 0.2) 111 | hashie (>= 2.0.2) 112 | wasabi (1.0.0) 113 | nokogiri (>= 1.4.0) 114 | winrm (1.1.3) 115 | gssapi (~> 1.0.0) 116 | httpclient (~> 2.2, >= 2.2.0.2) 117 | logging (~> 1.6, >= 1.6.1) 118 | nokogiri (~> 1.5) 119 | rubyntlm (~> 0.1.1) 120 | savon (= 0.9.5) 121 | uuidtools (~> 2.1.2) 122 | 123 | PLATFORMS 124 | ruby 125 | 126 | DEPENDENCIES 127 | berkshelf 128 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Martin Rhoads 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Description # 2 | 3 | This cookbook configures Airbnb's SmartStack. 4 | SmartStack is our service registration, discovery and monitoring platform. 5 | It allows you to quickly and reliably connect to other services that you need, and for others to connect to your service. 6 | 7 | # Getting started with this cookbook # 8 | 9 | This cookbook contains everything you need to get SmartStack up and running, both in development and in production. 10 | 11 | ## Production Use ## 12 | 13 | ### Set up zookeeper ### 14 | 15 | If you are ready to install SmartStack on your machines, you will first need to do a bit of prep. 16 | First, you will need [Zookeeper](https://cwiki.apache.org/confluence/display/ZOOKEEPER/ProjectDescription) running in your infrastructure. 17 | We recommend using an [existing cookbook](https://github.com/SimpleFinance/chef-zookeeper). 18 | For now, you can just set up a single machine, but for production use we recommend an [ensemble](http://zookeeper.apache.org/doc/r3.1.2/zookeeperAdmin.html#sc_zkMulitServerSetup) of at least 3 nodes managed with [exhibitor](https://github.com/Netflix/exhibitor/wiki). 19 | 20 | ### Configure chef ### 21 | 22 | In your role, environment file, or infrastructure repo: 23 | 24 | * set `node.zookeeper.smartstack_cluster` to a list of the zookeeper machines you'll be using for smartstack. 25 | * create a services hash in `smartstack/attributes/services.rb` and `ports.rb` describing how you want your services configured. more information is [below](#configuring-smartstack) 26 | * enable the services you want: 27 | * where the service is running, add it to `node.nerve.enabled_services` 28 | * where it is being consumed, add it to `node.synapse.enabled_services` 29 | 30 | That's all! 31 | See the more extensive documentation below if you need additional help. 32 | 33 | ## Dev and Testing ## 34 | 35 | This cookbook is configured to be easy to run in dev using [vagrant](http://www.vagrantup.com/). 36 | To get started: 37 | 38 | * Install [Virtualbox](https://www.virtualbox.org/wiki/Downloads); it's free! 39 | * Install [Vagrant](http://downloads.vagrantup.com/); this cookbook has been tested with v1.3.5 40 | * Install the [berkshelf](http://berkshelf.com/) plugin for vagrant: `vagrant plugin install vagrant-berkshelf` 41 | * Bring up SmartStack in a VM: `vagrant up` 42 | 43 | This will bring up an Ubuntu VM configured with Zookeeper, SmartStack, and a few sample services. 44 | The SmartStack integration tests will automatically run inside the Vagrant VM. 45 | 46 | # How SmartStack Works # 47 | 48 | ## Synapse ## 49 | 50 | [Synapse](https://github.com/airbnb/synapse) is a service discovery platform. 51 | It lets you reliably connect to an available worker for a given service. 52 | You don't have to worry about discovery within your application, and you can easily do the same thing in dev as in prod. 53 | 54 | ### How to use synapse ### 55 | 56 | Using synapse to talk to a service is easy. 57 | Just specify that you would like to do so in your role file. 58 | You'll need to add a `'synapse' => {'enabled_services' => ['desired_service']}` section to your `default_attributes` section: 59 | 60 | ```ruby 61 | name 'myrole' 62 | description 'my role file' 63 | 64 | default_attributes({ 65 | 'synapse' => { 'enabled_services' => [ 'service1', 'service2' ] } 66 | }) 67 | 68 | run_list( 69 | 'recipe[smartstack]', 70 | 'recipe[myrole]' 71 | ) 72 | ``` 73 | 74 | Once you've done this and reconverged your boxes, the service will be available to you on `localhost` at its synapse port. 75 | If you are writing out a config file in chef and need to specify the port to use, just use `node.smartstack.services.desired_service.local_port` in your config. 76 | You can manually look up your synapse port in `attributes/ports.rb` in this cookbook. 77 | 78 | ### How synapse works ### 79 | 80 | For every enabled service, synapse looks up a list of available servers which run the service in Zookeeper. 81 | It then configures a local haproxy to forward requests for `localhost`:`synapse_port` to one of those backends (by default, in a round-robin fashion). 82 | Whenever the list of servers for the service changes in zookeeper, synapse reconfigures haproxy to reflect the latest information. 83 | 84 | If synapse is not running, haproxy is still running, containing the latest set of servers. 85 | So, even with synapse or zookeeper broken, the list of servers remains reasonably current unless there's massive change. 86 | 87 | ### How to troubleshoot synapse ### 88 | 89 | The immediate course of action is to visit the haproxy stats page. 90 | This is accessible at `your.box:3212` -- just hit it in your web browser. 91 | The stats page will show you all of your enabled services and the backends for those services. 92 | You'll be able to see many per-service and per-backend stats, including the current status and insight into processed requests and how they are doing. 93 | 94 | You can restart synapse via the usual way with runit: `sv restart synapse`. 95 | You can also safely reload haproxy if you suspect issues there -- existing connections will be unaffected. 96 | 97 | ## Nerve ## 98 | 99 | [Nerve](https://github.com/airbnb/nerve) is the registration component for synapse. 100 | It takes care of creating entries for your services in Zookeeper. 101 | Your service will be published in zookeeper only when it passes the configured health checks. 102 | When your service stops passing health checks, it will be removed, and placed in maintenance mode in all of its synapse consumers. 103 | 104 | ### Using Nerve ##### 105 | 106 | Using nerve is as simple as [using synapse](#using-synapse). 107 | You just add a `'nerve' => {'enabled_services' => ['your_service']}` section to your `default_attributes` in your role file: 108 | 109 | ```ruby 110 | name 'myservice' 111 | description 'sets up myservice' 112 | 113 | default_attributes({ 114 | 'nerve' => { 'enabled_services' => [ 'myservice' ] } 115 | }) 116 | 117 | run_list( 118 | 'recipe[smartstack]', 119 | 'recipe[myservice]' 120 | ) 121 | ``` 122 | 123 | However, you would normally do this if you are writing a role file for your service. 124 | This probably means that you wrote the service as well. 125 | In this case, you'll need to write the [nerve/synapse configuration](#configuring-smartstack) for the service. 126 | You'll also want to make sure that your service has the correct endpoints for [health](#health-checks) and [connectivity](#connectivity-checks) checks. 127 | 128 | Once nerve is configured to check your service on your boxes, it will start making health checks. 129 | You can see the health checks being made in nerve's log, in `/etc/service/nerve/log`. 130 | 131 | ### Configuring Smartstack ### 132 | 133 | Smartstack configuration lives in two files in this cookbook. 134 | The first file is `attributes/ports.rb`. 135 | This just contains a port reservation for your service. 136 | 137 | The second, more important file, `attributes/services.rb`. 138 | Let's take a look at an example: 139 | 140 | ```ruby 141 | 'ssspy' => { 142 | 'synapse' => { 143 | 'server_options' => 'check inter 30s downinter 2s fastinter 2s rise 3 fall 1', 144 | 'discovery' => { 'method' => 'zookeeper', }, 145 | 'listen' => [ 146 | 'mode http', 147 | 'option httpchk GET /ping', 148 | ], 149 | }, 150 | 'nerve' => { 151 | 'port' => 3260, 152 | 'check_interval' => 2, 153 | 'checks' => [ 154 | { 'type' => 'http', 'uri' => '/health', 'timeout' => 0.5, 'rise' => 2, 'fall' => 1 }, 155 | ] 156 | }, 157 | }, 158 | ``` 159 | 160 | You can see, there are several sections here. 161 | Let's start with the nerve config: 162 | 163 | ```ruby 164 | 'nerve' => { 165 | 'port' => 3260, 166 | 'check_interval' => 2, 167 | 'checks' => [ 168 | { 'type' => 'http', 'uri' => '/health', 'timeout' => 0.5, 'rise' => 2, 'fall' => 1 }, 169 | ] 170 | }, 171 | ``` 172 | 173 | Nerve here is configured to make its health checks on port 3260. 174 | This means that `ssspy` is properly running on its own synapse port locally. 175 | The checks happen every 2 seconds, and there's only one check -- an http check to the `/health` endpoint. 176 | 177 | This is the most usual configuration. 178 | However, sometimes you might see multiple checks defined per service. 179 | For instance, here is the config for `flog_thrift`: 180 | 181 | ```ruby 182 | 'nerve' => { 183 | 'port' => 4567, 184 | 'check_interval' => 1, 185 | 'checks' => [ 186 | { 'type' => 'tcp', 'timeout' => 1, 'rise' => 5, 'fall' => 2 }, 187 | { 'type' => 'http', 'port' => 8422, 'uri' => '/health', 'timeout' => 1, 'rise' => 5, 'fall' => 2 }, 188 | ] 189 | }, 190 | ``` 191 | 192 | For `flog_thift` to be up, it has to both be listening on its thrift port via TCP and also pass its http health check. 193 | 194 | Lets look at ssspy's synapse config: 195 | 196 | ```ruby 197 | 'synapse' => { 198 | 'server_options' => 'check inter 30s downinter 2s fastinter 2s rise 3 fall 1', 199 | 'discovery' => { 200 | 'method' => 'zookeeper', 201 | 'hosts' => [] 202 | }, 203 | 'listen' => [ 204 | 'mode http', 205 | 'option httpchk GET /ping', 206 | ], 207 | }, 208 | ``` 209 | 210 | The `server_options` directive tells haproxy to run checks on each backend with proper check intervals. 211 | You can read more about the [haproxy check options](https://code.google.com/p/haproxy-docs/wiki/ServerOptions). 212 | The `discovery` section tells us how synapse will find ssspy; in this case, via zookeeper. 213 | 214 | Finally, the `listen` section contains additional haproxy configuration. 215 | It specifies how haproxy will conduct its own health checks. 216 | SSSPy is following convention by properly implemented a `/ping` endpoint for [connectivity checks](#connectivity-checks). 217 | 218 | ### Health Checks ### 219 | 220 | Nobody wants your service to recieve traffic when it's not actually functional. 221 | Your consumers do not want that, because they want their service calls to work. 222 | And you don't want that, because you also want your service to work. 223 | 224 | You can make sure that a broken service instance won't recieve traffic by making your `/health` checks fail when your service is broken. 225 | Simply return a non-`200` status code. 226 | Here is an example from [optica](https://github.com/airbnb/optica), a simple Sinatra service: 227 | 228 | ```ruby 229 | get '/health' do 230 | if settings.store.healthy? 231 | content_type 'text/plain', :charset => 'utf-8' 232 | return "OK" 233 | else 234 | halt(503) 235 | end 236 | end 237 | ``` 238 | 239 | The `healthy?` function does [real work](https://github.com/airbnb/optica/blob/164ee747425eb823994345203fd40089751724f5/store.rb#L94) to make sure the service actually functions. 240 | Only nerve will ever hit that endpoint, so you can and should feel free to make it take some time. 241 | 242 | ### Connectivity Checks ### 243 | 244 | If a particular backend for your service passes its [health checks](#health-checks), it might still be unavailable to consumers. 245 | One example is a network partition -- synapse has discovered your service, but can't actually reach it. 246 | To prevent such problems, we configure the haproxy on the consumer end to do connectivity checks when possible. 247 | 248 | We do this by utilizing [haproxy's built-in checking mechanism](http://cbonte.github.io/haproxy-dconv/configuration-1.4.html#5-check). 249 | To destinguish between health checks made by nerve and connectivity checks made by haproxy on the synapse end, we define a `/ping` endpoint. 250 | This endpoint should *always* return `200` with a conventional text body of `PONG`. 251 | 252 | Because the number of machines making connectivity checks may be large, you should strive to make the `/ping` check as lightweight as possible. 253 | 254 | ## Zookeeper and Smartstack ## 255 | 256 | Smartstack cannot function without [zookeeper](https://cwiki.apache.org/confluence/display/ZOOKEEPER/ProjectDescription). 257 | This shared file-like store provides the correct semantics for ensuring that service information is correct and distributed across our infrastructure. 258 | We use zookeeper because it provides the [ephemeral nodes](http://zookeeper.apache.org/doc/r3.2.1/zookeeperProgrammers.html#Ephemeral+Nodes) nerve uses to register services. 259 | Its distributed nature prevents it from becoming a scaling choke point or a single points of failure in our infrastructure. 260 | 261 | ### Debugging Smartstack ### 262 | 263 | You would like to use your service from another service, but something is not working. 264 | These instructions will tell you how to debug the situation. 265 | 266 | First, on a consumer box (a box which has `the_service` in its `'synapse' => { 'enabled_services'`) go to port 3212 in your browser. 267 | You'll see the haproxy stats page. 268 | There should be a section for `the_service` containing the boxes providing `the_service` 269 | 270 | If the section exists and contains some boxes, but they are all in red, those boxes are failing connectivity checks. 271 | You should double-check your security group settings with SRE. 272 | If the section is not there at all, or is missing some boxes, then there could be two reasons: 273 | 1. the service is not properly discovered 274 | 2. the service is not properly registered 275 | 276 | To check if it's (1), check `synapse` on the consumer box. 277 | 1. It should be running; check with `sv s synapse` 278 | 2. Try restarting it with `sv restart synapse` 279 | 3. Check the synapse logs in `/etc/service/synapse/log/current` for anything unusual 280 | 281 | If it looks like synapse is working, then the problem is probably (2) -- no registration. 282 | To debug, follow these steps: 283 | 284 | 1. Check the service on one of its instances 285 | * Is it running? Is it insta-crashing? watch `sv s the_service` 286 | 2. If it's insta-crashing, figure out why 287 | * Check `/etc/service/the_service/logs/current` 288 | * Run it live; `sv down the_service; cd /etc/service/the_service; ./run` 289 | 3. If it's running, is it passing health checks? 290 | * `curl -D - localhost:32xx/health` and ensure you get a 200 291 | 4. Is it passing health checks from a remote box? 292 | * this happens if you accidentally only bind to `lo` in your service 293 | * run the health check `curl` from another box 294 | 5. Is nerve running? 295 | * `sv s nerve`; if something is wrong with nerve, alert SRE 296 | 297 | 298 | You can also smartstack by directly looking in zookeeper for registered services, and watching how that list changes over time. 299 | You can do this via an exhibitor UI. 300 | Another way is to use a zkCli client and connect directly to one of the machines in the cluster. 301 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # this is going to be necessary with chef 11.6 environment support 5 | #smartstack_dir = File.dirname(File.expand_path __FILE__) 6 | 7 | Vagrant.configure("2") do |master_config| 8 | 9 | # Enabling the Berkshelf plugin. To enable this globally, add this configuration 10 | # option to your ~/.vagrant.d/Vagrantfile file 11 | master_config.berkshelf.enabled = true 12 | 13 | # An array of symbols representing groups of cookbook described in the Vagrantfile 14 | # to exclusively install and copy to Vagrant's shelf. 15 | # master_config.berkshelf.only = [] 16 | 17 | # An array of symbols representing groups of cookbook described in the Vagrantfile 18 | # to skip installing and copying to Vagrant's shelf. 19 | # master_config.berkshelf.except = [] 20 | 21 | master_config.vm.define "smartstack" do |config| 22 | config.vm.box = "smartstack" 23 | 24 | # The url from where the 'config.vm.box' box will be fetched if it 25 | # doesn't already exist on the user's system. 26 | # config.vm.box_url = "http://domain.com/path/to/above.box" 27 | 28 | config.vm.network :private_network, ip: '172.16.1.3' 29 | 30 | config.vm.provider "virtualbox" do |v| 31 | v.customize ['modifyvm', :id, 32 | '--memory', '512', 33 | '--cpus', '2', 34 | ] 35 | end 36 | 37 | config.vm.provision :chef_solo do |chef| 38 | chef.json = { 39 | :smartstack => { 40 | :development => true 41 | }, 42 | :env => 'test', 43 | :languages => { :ruby => { :default_version => '1.9.1' } }, 44 | } 45 | 46 | # uncomment to use with chef 11.6 47 | #chef.environments_path = File.join(smartstack_dir, 'environments') 48 | #chef.environment = 'test' 49 | 50 | chef.run_list = [ 51 | "recipe[apt]", 52 | "recipe[smartstack::synapse]", 53 | "recipe[smartstack::nerve]", 54 | "recipe[smartstack::test]", 55 | "recipe[minitest-handler]" 56 | ] 57 | end 58 | end 59 | end 60 | -------------------------------------------------------------------------------- /attributes/default.rb: -------------------------------------------------------------------------------- 1 | include_attribute 'smartstack::ports' 2 | include_attribute 'smartstack::services' 3 | 4 | default.smartstack.user = 'smartstack' 5 | default.smartstack.home = '/opt/smartstack' 6 | default.smartstack.gem_home = File.join(node.smartstack.home, '.gem') 7 | default.smartstack.jar_source = nil 8 | 9 | # you should override this in your environment with the real cluster 10 | default.zookeeper.smartstack_cluster = [ 'localhost:2181' ] 11 | -------------------------------------------------------------------------------- /attributes/nerve.rb: -------------------------------------------------------------------------------- 1 | include_attribute "smartstack::default" 2 | 3 | default.nerve.home = File.join(node.smartstack.home, 'nerve') 4 | default.nerve.install_dir = File.join(node.nerve.home,'src') 5 | default.nerve.config_file = File.join(node.nerve.home,'config.json') 6 | 7 | default.nerve.repository = 'https://github.com/airbnb/nerve.git' 8 | default.nerve.reference = 'v0.5.3' 9 | default.nerve.jarname = nil 10 | default.nerve.jvmopts = '-Xmx64m -XX:PermSize=64m' 11 | 12 | # a list of keys from node.smartstack.services (services cookbook) 13 | default.nerve.enabled_services = [] 14 | 15 | default.nerve.local.host = "127.0.0.1" 16 | default.nerve.local.port = 1025 17 | 18 | # everything below is used to configure nerve at runtime 19 | instance_id = node.hostname 20 | instance_id = node.ec2.instance_id if node.has_key? 'ec2' 21 | default.nerve.config = { 22 | 'instance_id' => instance_id, 23 | 'listen_port' => node.nerve.local.port, 24 | 'services' => {}, 25 | } 26 | -------------------------------------------------------------------------------- /attributes/ports.rb: -------------------------------------------------------------------------------- 1 | default.smartstack.ports = { 2 | # reserved for health checks on synapse itself 3 | # TODO: implement health checks on synapse 4 | 3210 => 'synapse', 5 | # reserved for a possible UI for nerve 6 | 3211 => 'nerve', 7 | # reserved for the haproxy stats socket 8 | 3212 => 'haproxy', 9 | 10 | # moar services 11 | 3333 => 'helloworld', 12 | 3334 => 'helloworld-leader', 13 | } 14 | 15 | # also create a mapping going the other way 16 | default.smartstack.service_ports = Hash[node.smartstack.ports.collect {|k, v| [v, k]}] 17 | -------------------------------------------------------------------------------- /attributes/services.rb: -------------------------------------------------------------------------------- 1 | include_attribute 'smartstack::ports' 2 | 3 | # on chef-solo < 11.6, we hack around lack of environment support 4 | # by using node.env because node.environment cannot be set 5 | default.smartstack.env = (node.has_key?('env') ? node.env : node.environment) 6 | 7 | default.smartstack.services = { 8 | 'synapse' => {}, 9 | 'nerve' => {}, 10 | 'haproxy' => {}, 11 | 12 | 'helloworld' => { 13 | 'synapse' => { 14 | 'discovery' => { 'method' => 'zookeeper' }, 15 | 'haproxy' => { 16 | 'server_options' => 'check inter 1s rise 1 fall 1', 17 | 'listen' => [ 18 | 'mode http', 19 | 'option httpchk GET /ping', 20 | ], 21 | }, 22 | }, 23 | 'nerve' => { 24 | 'port' => 9494, 25 | 'check_interval' => 1, 26 | 'checks' => [ 27 | { 'type' => 'http', 'uri' => '/health', 'timeout' => 1, 'rise' => 1, 'fall' => 2 }, 28 | ], 29 | }, 30 | }, 31 | 32 | 'helloworld-leader' => { 33 | 'zk_path' => "/#{node.smartstack.env}/services/helloworld/services", 34 | 'synapse' => { 35 | 'discovery' => { 'method' => 'zookeeper' }, 36 | 'leader_election' => true, 37 | 'haproxy' => { 38 | 'server_options' => 'check inter 1s rise 1 fall 1', 39 | 'listen' => [ 40 | 'mode http', 41 | 'option httpchk GET /ping', 42 | ], 43 | }, 44 | }, 45 | } 46 | } 47 | 48 | # make sure each service has a smartstack config 49 | default.smartstack.services.each do |name, service| 50 | # populate zk paths for all services 51 | unless service.has_key? 'zk_path' 52 | default.smartstack.services[name]['zk_path'] = "/#{node.smartstack.env}/services/#{name}/services" 53 | end 54 | 55 | # populate the local_port for all services 56 | port = node.smartstack.service_ports[name] 57 | if Integer === port 58 | service['local_port'] = port 59 | else 60 | Chef::Log.error "Service #{name} has no synapse port allocated; please see services/attributes/ports.rb" 61 | raise "Synapse port missing for #{name}" 62 | end 63 | end 64 | -------------------------------------------------------------------------------- /attributes/synapse.rb: -------------------------------------------------------------------------------- 1 | include_attribute "smartstack::default" 2 | 3 | default.synapse.home = File.join(node.smartstack.home, 'synapse') 4 | default.synapse.install_dir = File.join(node.synapse.home,'src') 5 | default.synapse.config_file = File.join(node.synapse.home,'config.json') 6 | 7 | default.synapse.repository = 'https://github.com/airbnb/synapse.git' 8 | default.synapse.reference = 'v0.10.0' 9 | default.synapse.jarname = nil 10 | default.synapse.jvmopts = '-Xmx64m -XX:PermSize=64m' 11 | 12 | # override this in your role file or wrapper cookbook 13 | default.synapse.enabled_services = [] 14 | 15 | default.synapse.haproxy.sock_dir = '/var/haproxy' 16 | default.synapse.haproxy.sock_file = File.join(node.synapse.haproxy.sock_dir, 'stats.sock') 17 | default.synapse.haproxy.channel = 'local1' 18 | 19 | default.synapse.config = { 20 | 'services' => {}, 21 | 'haproxy' => { 22 | 'reload_command' => "sudo service haproxy reload", 23 | 'config_file_path' => '/etc/haproxy/haproxy.cfg', 24 | 'socket_file_path' => node.synapse.haproxy.sock_file, 25 | 'do_writes' => true, 26 | 'do_reloads'=> true, 27 | 'do_socket' => true, 28 | 'global' => [ 29 | 'daemon', 30 | 'spread-checks 2', 31 | 'user haproxy', 32 | 'group haproxy', 33 | 'maxconn 8192', 34 | "log 127.0.0.1 #{node.synapse.haproxy.channel}", 35 | "stats socket #{node.synapse.haproxy.sock_file} group #{node.smartstack.user} mode 660 level admin", 36 | ], 37 | 'defaults' => [ 38 | # we log all services by default 39 | # services that are too high-volume should get an 40 | # option dontlog-normal 41 | # to avoid logging normal successful connections 42 | 'log global', 43 | 'option dontlognull', 44 | 'option log-separate-errors', 45 | 46 | # default timeouts; these should be overriden per service 47 | 'maxconn 2000', 48 | 'timeout connect 5s', 49 | 'timeout check 5s', 50 | 'timeout client 50s', 51 | 'timeout server 50s', 52 | 53 | # we re-try the request if a backend dies mid-connection 54 | 'option redispatch', 55 | 'retries 3', 56 | 57 | # default sane balancing between backends 58 | 'balance roundrobin', 59 | ], 60 | 'extra_sections' => { 61 | 'listen stats :3212' => [ 62 | 'mode http', 63 | 'stats enable', 64 | 'stats uri /', 65 | ], 66 | }, 67 | } 68 | } 69 | 70 | # add localhost aliases for each enabled service 71 | # at airbnb, this is handled by our infrastructure common cookbook, 72 | # which owns generating /etc/hosts from a template 73 | node.synapse.enabled_services.each do |service_name| 74 | default.common.localhost_aliases << "#{service_name}.synapse" 75 | end 76 | -------------------------------------------------------------------------------- /attributes/test.rb: -------------------------------------------------------------------------------- 1 | include_attribute "smartstack::services" 2 | include_attribute "smartstack::ports" 3 | include_attribute "smartstack::nerve" 4 | include_attribute "smartstack::synapse" 5 | 6 | # attributes loaded only during testing 7 | if node.smartstack.env == 'test' 8 | # which ports to run helloworld on? 9 | ports = [9494, 9495] 10 | default.smartstack.helloworld.ports = ports 11 | default.smartstack.services.helloworld.nerve.ports = ports 12 | 13 | # enable helloworld 14 | default.nerve.enabled_services << 'helloworld' 15 | default.synapse.enabled_services << 'helloworld' 16 | default.synapse.enabled_services << 'helloworld-leader' 17 | 18 | # zk settings 19 | default.smartstack.zk_version = '3.4.5' 20 | default.smartstack.zk_home = '/srv/zookeeper' 21 | default.zookeeper.smartstack_cluster = ['localhost:2181', 'localhost:3181', 'localhost:4181'] 22 | end 23 | -------------------------------------------------------------------------------- /environments/test.rb: -------------------------------------------------------------------------------- 1 | name "test" 2 | default_attributes({ 3 | "env" => "test" 4 | }) 5 | 6 | -------------------------------------------------------------------------------- /files/default/tests/minitest/support/helpers.rb: -------------------------------------------------------------------------------- 1 | require 'net/http' 2 | require 'json' 3 | 4 | module Helpers 5 | module SmartStack 6 | include MiniTest::Chef::Assertions 7 | include MiniTest::Chef::Context 8 | include MiniTest::Chef::Resources 9 | 10 | # supports the shell_out function 11 | require 'chef/mixin/shell_out' 12 | include Chef::Mixin::ShellOut 13 | 14 | # for querying zookeeper 15 | def zk_cli(command) 16 | script = File.join( 17 | node.smartstack.zk_home, 18 | "zookeeper-#{node.smartstack.zk_version}", 19 | 'bin/zkCli.sh') 20 | shell_out("#{script} #{command}") 21 | end 22 | 23 | # returns parsed json data from all nodes at the given path 24 | def zk_nodes(path) 25 | namelist = zk_cli("ls #{path}").stdout.split("\n").last 26 | node_names = namelist[1...-1].split(',').map{|n| n.strip} 27 | 28 | nodes = [] 29 | node_names.each do |name| 30 | lines = zk_cli("get #{path}/#{name}").stdout.split("\n") 31 | lines.select{|l| l.start_with? '{'}.each do |line| 32 | begin 33 | data = JSON.parse(line) 34 | nodes << data 35 | rescue 36 | # skip lines that are not valid json 37 | end 38 | end 39 | end 40 | 41 | nodes 42 | end 43 | 44 | def start_all(service, ports) 45 | ports.each do |port| 46 | shell_out("sv up #{service}#{port}") 47 | end 48 | 49 | ports.each do |port| 50 | raise RuntimeError, "Service #{service}#{port} nerver came up" unless http_wait_for_up(port) 51 | end 52 | 53 | sleep 2 # to let nerve catch up 54 | end 55 | 56 | def http_wait_for_up(port, opts = {}) 57 | host = opts['host'] || 'localhost' 58 | uri = opts['uri'] || '/health' 59 | max_wait = opts['max_wait'] || 10 60 | sleep_time = opts['sleep_time'] || 0.2 61 | 62 | success = false 63 | start = Time.now() 64 | while (Time.now() - max_wait) < start 65 | begin 66 | response = Net::HTTP.get_response(host, uri, port) 67 | rescue 68 | # nothing 69 | end 70 | 71 | if response.kind_of? Net::HTTPOK 72 | success = true 73 | break 74 | end 75 | 76 | sleep sleep_time 77 | end 78 | 79 | return success 80 | end 81 | 82 | def stop_all(service, ports) 83 | ports.each do |port| 84 | shell_out("sv down #{service}#{port}") 85 | end 86 | 87 | ports.each do |port| 88 | raise RuntimeError, "Service #{service}#{port} nerver went down" unless http_wait_for_down(port) 89 | end 90 | 91 | sleep 2 # to let nerve catch up 92 | end 93 | 94 | def http_wait_for_down(port, opts = {}) 95 | host = opts['host'] || 'localhost' 96 | uri = opts['uri'] || '/health' 97 | max_wait = opts['max_wait'] || 10 98 | sleep_time = opts['sleep_time'] || 0.2 99 | 100 | success = false 101 | start = Time.now() 102 | while (Time.now() - max_wait) < start 103 | begin 104 | response = Net::HTTP.get_response(host, uri, port) 105 | rescue Errno::ECONNREFUSED 106 | success = true 107 | break 108 | rescue StandardError 109 | # other errors are ignored 110 | else 111 | sleep sleep_time 112 | end 113 | end 114 | 115 | return success 116 | end 117 | 118 | # this is a very naive haproxy config parser, but it's good enough 119 | # for the testing we want to do 120 | def parsed_haproxy_config 121 | path = node.synapse.config.haproxy.config_file_path 122 | config = { 123 | 'global' => [], 124 | 'defaults' => [], 125 | 126 | 'frontend' => {}, 127 | 'backend' => {}, 128 | 'listen' => {}, 129 | } 130 | 131 | # state machine 132 | section = nil 133 | name = nil 134 | IO.readlines(path).each do |line| 135 | line.strip! 136 | next if line.start_with?('#') || line == '' 137 | 138 | # we match the beginning of a section, to enter that section 139 | first, second, rest = line.split(nil, 3) 140 | if config.keys.include?(first) 141 | section = first 142 | name = second 143 | if name 144 | config[section][name] ||= {'config' => []} 145 | end 146 | 147 | if %w{listen frontend}.include? section 148 | config[section][name]['address'] = rest 149 | end 150 | 151 | # otherwise, we should already be in a section 152 | else 153 | if %w{global defaults}.include? section 154 | config[section] << line 155 | else 156 | config[section][name]['config'] << line 157 | end 158 | end 159 | end 160 | 161 | return config 162 | end 163 | end 164 | end 165 | -------------------------------------------------------------------------------- /files/default/tests/minitest/test_test.rb: -------------------------------------------------------------------------------- 1 | require 'minitest/spec' 2 | require 'minitest-spec-context' 3 | require 'net/http' 4 | 5 | require File.expand_path('../support/helpers', __FILE__) 6 | 7 | describe_recipe 'smartstack::test' do 8 | include Helpers::SmartStack 9 | 10 | let(:synapse_config) { JSON.parse(File.open(node.synapse.config_file).read()) } 11 | let(:nerve_config) { JSON.parse(File.open(node.nerve.config_file).read()) } 12 | let(:helloworld_ports) { node.smartstack.helloworld.ports } 13 | 14 | describe 'service creation' do 15 | parallelize_me! 16 | 17 | it 'starts nerve' do 18 | service('nerve').must_be_running 19 | end 20 | 21 | it 'starts synapse' do 22 | service('synapse').must_be_running 23 | end 24 | 25 | it 'starts the helloworld services' do 26 | helloworld_ports.each do |port| 27 | service("helloworld#{port}").must_be_running 28 | end 29 | end 30 | 31 | it 'starts 3 zookeeper boxes' do 32 | service('zookeeper0').must_be_running 33 | service('zookeeper1').must_be_running 34 | service('zookeeper2').must_be_running 35 | end 36 | end 37 | 38 | describe 'nerve shutdown handling' do 39 | it 'restarts cleanly' do 40 | [0..5].each do |trial| 41 | down = shell_out!('sv down nerve') 42 | down.status.exitstatus.must_equal 0 43 | 44 | up = shell_out!('sv up nerve') 45 | up.status.exitstatus.must_equal 0 46 | end 47 | end 48 | 49 | it 'properly handles signals' do 50 | end 51 | end 52 | 53 | describe 'proper config' do 54 | parallelize_me! 55 | 56 | it 'includes helloworld sections in nerve config' do 57 | nerve_config.must_include 'services' 58 | 59 | helloworld_ports.each do |port| 60 | nerve_config['services'].must_include "helloworld_#{port}" 61 | end 62 | end 63 | 64 | describe 'properly configures synapse for helloworld' do 65 | it 'includes a helloworld section' do 66 | synapse_config.must_include 'services' 67 | synapse_config['services'].must_include 'helloworld' 68 | end 69 | 70 | it 'uses the proper port' do 71 | port = synapse_config['services']['helloworld']['haproxy']['port'] 72 | node.smartstack.ports[port].must_equal 'helloworld' 73 | end 74 | end 75 | end 76 | 77 | describe 'helloworld works' do 78 | parallelize_me! 79 | 80 | let(:ports) { helloworld_ports } 81 | 82 | it 'responds to /health' do 83 | ports.each do |port| 84 | response = Net::HTTP.get_response('localhost', '/health', port) 85 | response.must_be_kind_of Net::HTTPOK 86 | end 87 | end 88 | 89 | it 'responds to /ping' do 90 | ports.each do |port| 91 | response = Net::HTTP.get_response('localhost', '/ping', port) 92 | response.must_be_kind_of Net::HTTPOK 93 | end 94 | end 95 | end 96 | 97 | describe 'proper discovery' do 98 | context 'when the service is up' do 99 | it 'is properly registered in zookeeper' do 100 | nerve_config['services'].each do |name, service| 101 | service_entry = { 102 | 'host'=>service['host'], 'port'=>service['port'], 'name'=>nerve_config['instance_id']} 103 | nodes = zk_nodes(service['zk_path']) 104 | 105 | nodes.must_include service_entry 106 | end 107 | end 108 | 109 | it 'is available via synapse' do 110 | synapse_port = synapse_config['services']['helloworld']['haproxy']['port'] 111 | 112 | response = Net::HTTP.get_response('localhost', '/health', synapse_port) 113 | response.must_be_kind_of Net::HTTPOK 114 | end 115 | 116 | it 'is only available on localhost' do 117 | synapse_port = synapse_config['services']['helloworld']['haproxy']['port'] 118 | 119 | assert_raises(Errno::ECONNREFUSED) { 120 | Net::HTTP.get_response(node.ipaddress, '/health', synapse_port) 121 | } 122 | end 123 | end 124 | 125 | context 'when the service is down' do 126 | before do 127 | @pid = IO.read('/var/run/haproxy.pid') 128 | stop_all('helloworld', helloworld_ports) 129 | end 130 | 131 | after do 132 | start_all('helloworld', helloworld_ports) 133 | end 134 | 135 | it 'is unavailable in zookeeper' do 136 | nerve_config['services'].each do |name, service| 137 | service_entry = { 138 | 'host'=>service['host'], 'port'=>service['port'], 'name'=>nerve_config['instance_id']} 139 | nodes = zk_nodes(service['zk_path']) 140 | 141 | nodes.wont_include service_entry 142 | end 143 | end 144 | 145 | it 'is unreachable via synapse' do 146 | synapse_port = synapse_config['services']['helloworld']['haproxy']['port'] 147 | 148 | response = Net::HTTP.get_response('localhost', '/health', synapse_port) 149 | response.must_be_kind_of Net::HTTPServiceUnavailable 150 | end 151 | 152 | it "hasn't caused haproxy to restart" do 153 | IO.read('/var/run/haproxy.pid').must_equal @pid 154 | end 155 | end 156 | 157 | context 'when the service has been restarted' do 158 | before do 159 | stop_all('helloworld', helloworld_ports) 160 | start_all('helloworld', helloworld_ports) 161 | end 162 | 163 | it 'is again available in zookeeper' do 164 | nerve_config['services'].each do |name, service| 165 | service_entry = { 166 | 'host'=>service['host'], 'port'=>service['port'], 'name'=>nerve_config['instance_id']} 167 | nodes = zk_nodes(service['zk_path']) 168 | 169 | nodes.must_include service_entry 170 | end 171 | end 172 | end 173 | end 174 | 175 | describe 'synapse haproxy handling' do 176 | let(:haproxy_config) { parsed_haproxy_config } 177 | 178 | it %{generates the correct frontend and backend stanzas} do 179 | haproxy_config['frontend'].must_include 'helloworld' 180 | haproxy_config['backend'].must_include 'helloworld' 181 | 182 | haproxy_config['frontend']['helloworld']['config'].must_include 'default_backend helloworld' 183 | haproxy_config['frontend']['helloworld']['config'].must_include( 184 | "bind localhost:#{node.smartstack.service_ports['helloworld']}") 185 | 186 | helloworld_ports.each do |port| 187 | haproxy_config['backend']['helloworld']['config'].to_s.must_match( 188 | /"server[^"]*#{node.ipaddress}:#{port}/) 189 | end 190 | end 191 | 192 | it %{puts only one of multiple backends into haproxy config when leader-election is enabled} do 193 | nodes = zk_nodes(synapse_config['services']['helloworld-leader']['discovery']['path']) 194 | nodes.count.must_be :>, 1 195 | 196 | backend_lines = haproxy_config['backend']['helloworld-leader']['config'] 197 | server_lines = backend_lines.select{|l| l.strip.start_with? 'server'} 198 | 199 | server_lines.count.must_equal 1 200 | end 201 | end 202 | 203 | describe 'zookeeper handling' do 204 | context 'when zookeeper goes down' do 205 | it 'restarts synapse' 206 | it 'restarts nerve' 207 | end 208 | 209 | context 'when a single server in the ensemble is restarted' do 210 | it 'doesn\'t restart nerve if only a single zk node is down' do 211 | skip %{this isn't actually true right now} 212 | end 213 | end 214 | end 215 | end 216 | -------------------------------------------------------------------------------- /libraries/deep_to_hash.rb: -------------------------------------------------------------------------------- 1 | class Chef 2 | class Node 3 | class ImmutableMash 4 | def deep_to_hash 5 | h = {} 6 | self.each do |k,v| 7 | if v.respond_to?('deep_to_hash') 8 | h[k] = v.deep_to_hash 9 | elsif v.respond_to?('deep_to_a') 10 | h[k] = v.deep_to_a 11 | else 12 | h[k] = v 13 | end 14 | end 15 | return h 16 | end 17 | end 18 | 19 | class ImmutableArray 20 | def deep_to_a 21 | a = [] 22 | self.each do |v| 23 | if v.respond_to?('deep_to_hash') 24 | a << v.deep_to_hash 25 | elsif v.respond_to?('deep_to_a') 26 | a << v.deep_to_a 27 | else 28 | a << v 29 | end 30 | end 31 | return a 32 | end 33 | end 34 | end 35 | end 36 | 37 | -------------------------------------------------------------------------------- /metadata.rb: -------------------------------------------------------------------------------- 1 | name 'smartstack' 2 | maintainer 'Igor Serebryany' 3 | maintainer_email 'igor.serebryany@airbnb.com' 4 | license 'MIT' 5 | version '0.6.0' 6 | 7 | description 'The cookbook for configuring Airbnb SmartStack' 8 | long_description IO.read(File.join(File.dirname(__FILE__), 'README.md')) 9 | 10 | recipe 'smartstack::nerve', 'Installs and configures nerve, the service registry component' 11 | recipe 'smartstack::synapse', 'Installs and confgures a synapse, the service discovery component' 12 | 13 | depends 'runit', '>= 1.1.0' 14 | depends 'ruby', '~> 0.9.2' 15 | depends 'java' 16 | 17 | supports 'ubuntu', '= 12.04' 18 | -------------------------------------------------------------------------------- /recipes/default.rb: -------------------------------------------------------------------------------- 1 | # set up common smartstack stuff 2 | user node.smartstack.user do 3 | home node.smartstack.home 4 | shell '/sbin/nologin' 5 | system true 6 | end 7 | 8 | directory node.smartstack.home do 9 | owner node.smartstack.user 10 | group node.smartstack.user 11 | recursive true 12 | end 13 | 14 | # we need git to install smartstack 15 | package 'git' 16 | 17 | # we use runit to set up the services 18 | include_recipe 'runit' 19 | 20 | # we're going to need ruby too! 21 | include_recipe 'ruby' 22 | gem_package 'bundler' 23 | 24 | # clean up old crap 25 | # TODO: remove eventually 26 | %w{/opt/nerve /opt/synapse}.each do |old_dir| 27 | directory old_dir do 28 | action :delete 29 | recursive true 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /recipes/nerve.rb: -------------------------------------------------------------------------------- 1 | # set up common stuff first 2 | include_recipe 'smartstack::default' 3 | 4 | # set up nerve 5 | directory node.nerve.home do 6 | owner node.smartstack.user 7 | group node.smartstack.user 8 | recursive true 9 | end 10 | 11 | if node.smartstack.jar_source && node.nerve.jarname 12 | include_recipe 'java' 13 | 14 | url = "#{node.smartstack.jar_source}/nerve/#{node.nerve.jarname}" 15 | remote_file File.join(node.nerve.home, node.nerve.jarname) do 16 | source url 17 | mode 00644 18 | end 19 | else 20 | git node.nerve.install_dir do 21 | user node.smartstack.user 22 | group node.smartstack.user 23 | repository node.nerve.repository 24 | reference node.nerve.reference 25 | enable_submodules true 26 | action :sync 27 | notifies :run, 'execute[nerve_install]', :immediately 28 | notifies :restart, 'runit_service[nerve]' 29 | end 30 | 31 | # do the actual install of nerve and dependencies 32 | execute "nerve_install" do 33 | cwd node.nerve.install_dir 34 | user node.smartstack.user 35 | group node.smartstack.user 36 | action :nothing 37 | 38 | environment ({'GEM_HOME' => node.smartstack.gem_home}) 39 | command "bundle install --without development" 40 | end 41 | end 42 | 43 | # add all checks from all the enabled services 44 | # we do this in the recipe to avoid wierdness with attribute load order 45 | node.nerve.enabled_services.each do |service_name| 46 | unless node.smartstack.services.include? service_name 47 | Chef::Log.warn "[nerve] skipping non-existent service #{service_name}" 48 | next 49 | end 50 | 51 | service = node.smartstack.services[service_name].deep_to_hash 52 | 53 | unless service.include? 'nerve' 54 | Chef::Log.warn "[nerve] skipping unconfigured service #{service_name}" 55 | next 56 | end 57 | 58 | check = service['nerve'] 59 | check['zk_hosts'] = node.zookeeper.smartstack_cluster 60 | check['zk_path'] = service['zk_path'] 61 | check['host'] = node.ipaddress 62 | 63 | # support multiple copies of the service on one machine with multiple ports in services 64 | check['ports'] ||= [] 65 | check['ports'] << check['port'] if check['port'] 66 | Chef::Log.warn "[nerve] service #{service_name} has no check ports configured" if check['ports'].empty? 67 | 68 | # add the checks to the nerve config 69 | check['ports'].each do |port| 70 | check['port'] = port 71 | node.default.nerve.config.services["#{service_name}_#{port}"] = check 72 | end 73 | end 74 | 75 | # write the config to the config file for nerve 76 | file node.nerve.config_file do 77 | user node.smartstack.user 78 | group node.smartstack.user 79 | content JSON::pretty_generate(node.nerve.config.deep_to_hash) 80 | notifies :restart, 'runit_service[nerve]' 81 | end 82 | 83 | # set up runit service 84 | # we don't want a converge to randomly start nerve if someone is debugging 85 | # so, we only enable nerve; setting it up initially causes it to start, 86 | runit_service 'nerve' do 87 | action :enable 88 | default_logger true 89 | end 90 | -------------------------------------------------------------------------------- /recipes/nerve_disable.rb: -------------------------------------------------------------------------------- 1 | # uninstall nerve 2 | 3 | # disable runit servie 4 | include_recipe 'runit' 5 | runit_service 'nerve' do 6 | action :disable 7 | end 8 | 9 | # remove nerve home 10 | directory node.nerve.home do 11 | action :delete 12 | recursive true 13 | end 14 | -------------------------------------------------------------------------------- /recipes/synapse.rb: -------------------------------------------------------------------------------- 1 | # set up common stuff first 2 | include_recipe 'smartstack::default' 3 | 4 | # set up haproxy 5 | package 'haproxy' do 6 | action :upgrade 7 | end 8 | 9 | file '/etc/default/haproxy' do 10 | mode 00444 11 | content 'ENABLED=1' 12 | end 13 | 14 | directory node.synapse.haproxy.sock_dir do 15 | owner 'haproxy' 16 | group 'haproxy' 17 | end 18 | 19 | # allow synapse to write the haproxy config 20 | file node.synapse.config.haproxy.config_file_path do 21 | owner 'haproxy' 22 | group node.smartstack.user 23 | mode 00664 24 | end 25 | 26 | # allow synapse to restart haproxy 27 | file File.join("/etc/sudoers.d", node.smartstack.user) do 28 | owner "root" 29 | group "root" 30 | mode 0440 31 | content "#{node.smartstack.user} ALL= NOPASSWD: /usr/sbin/service haproxy reload\n" 32 | end 33 | 34 | # get the synapse code 35 | directory node.synapse.home do 36 | owner node.smartstack.user 37 | group node.smartstack.user 38 | recursive true 39 | end 40 | 41 | if node.smartstack.jar_source && node.synapse.jarname 42 | include_recipe 'java' 43 | 44 | url = "#{node.smartstack.jar_source}/synapse/#{node.synapse.jarname}" 45 | remote_file File.join(node.synapse.home, node.synapse.jarname) do 46 | source url 47 | mode 00644 48 | end 49 | else 50 | git node.synapse.install_dir do 51 | user node.smartstack.user 52 | group node.smartstack.user 53 | repository node.synapse.repository 54 | reference node.synapse.reference 55 | enable_submodules true 56 | action :sync 57 | notifies :run, 'execute[synapse_install]', :immediately 58 | notifies :restart, 'runit_service[synapse]' 59 | end 60 | 61 | # do the actual install of synapse and dependencies 62 | execute "synapse_install" do 63 | cwd node.synapse.install_dir 64 | user node.smartstack.user 65 | group node.smartstack.user 66 | action :nothing 67 | 68 | environment ({'GEM_HOME' => node.smartstack.gem_home}) 69 | command "bundle install --without development" 70 | end 71 | end 72 | 73 | # add the enabled services to the synapse config 74 | # we do this here to avoid wierdness with attribute load order 75 | node.synapse.enabled_services.each do |service_name| 76 | service = node.smartstack.services[service_name] 77 | unless service && service.include?('synapse') 78 | Chef::Log.warn "[synapse] skipping service #{service_name} -- it has no synapse config" 79 | next 80 | end 81 | 82 | # build the synapse config hash 83 | synapse_config = service['synapse'].deep_to_hash 84 | 85 | # set the haproxy port 86 | synapse_config['haproxy']['port'] = service['local_port'] 87 | 88 | # enable proper logging 89 | if synapse_config['haproxy'].include? 'listen' 90 | if synapse_config['haproxy']['listen'].include? 'mode http' 91 | synapse_config['haproxy']['listen'] << 'option httplog' 92 | elsif synapse_config['haproxy']['listen'].include? 'mode tcp' 93 | synapse_config['haproxy']['listen'] << 'option tcplog' 94 | end 95 | end 96 | 97 | # configure the discovery options 98 | if synapse_config['discovery']['method'] == "zookeeper" 99 | unless node['zookeeper'] and node['zookeeper']['smartstack_cluster'] 100 | Chef::Log.warn "[synapse] skipping service #{service_name} -- no zookeeper servers specified" 101 | next 102 | end 103 | 104 | synapse_config['discovery']['hosts'] = node['zookeeper']['smartstack_cluster'] 105 | synapse_config['discovery']['path'] = service['zk_path'] 106 | end 107 | 108 | node.default.synapse.config.services[service_name] = synapse_config 109 | end 110 | 111 | file node.synapse.config_file do 112 | owner node.smartstack.user 113 | group node.smartstack.user 114 | content JSON::pretty_generate(node.synapse.config.deep_to_hash) 115 | notifies :restart, "runit_service[synapse]" 116 | end 117 | 118 | # set up runit service 119 | runit_service "synapse" do 120 | action :enable 121 | default_logger true 122 | end 123 | -------------------------------------------------------------------------------- /recipes/synapse_disable.rb: -------------------------------------------------------------------------------- 1 | # uninstall synapse 2 | 3 | # disable runit service 4 | include_recipe 'runit' 5 | runit_service "synapse" do 6 | action :disable 7 | end 8 | 9 | # clean up haproxy 10 | package 'haproxy' do 11 | action :remove 12 | end 13 | 14 | file '/etc/defaults/haproxy' do 15 | action :delete 16 | end 17 | 18 | directory node.synapse.haproxy.sock_dir do 19 | action :delete 20 | recursive true 21 | end 22 | 23 | # remove synapse home 24 | directory node.synapse.home do 25 | action :delete 26 | recursive true 27 | end 28 | -------------------------------------------------------------------------------- /recipes/test.rb: -------------------------------------------------------------------------------- 1 | # set up the hello world service 2 | gem_package 'sinatra' 3 | 4 | include_recipe 'runit' 5 | node.smartstack.helloworld.ports.each do |port| 6 | runit_service "helloworld#{port}" do 7 | run_template_name 'helloworld' 8 | action [:enable, :start] 9 | default_logger true 10 | 11 | options({ 12 | :port => port 13 | }) 14 | end 15 | end 16 | 17 | # set up a zookeeper cluster 18 | include_recipe 'java' 19 | include_recipe 'runit' 20 | 21 | user 'zookeeper' do 22 | action :create 23 | home node.smartstack.zk_home 24 | end 25 | 26 | directory node.smartstack.zk_home do 27 | recursive true 28 | owner 'zookeeper' 29 | group 'zookeeper' 30 | end 31 | 32 | zk_source = "http://mirror.cogentco.com/pub/apache/zookeeper/" + 33 | "zookeeper-#{node.smartstack.zk_version}/zookeeper-#{node.smartstack.zk_version}.tar.gz " 34 | remote_file File.join(node.smartstack.zk_home, "#{node.smartstack.zk_version}.tar.gz") do 35 | source zk_source 36 | owner 'zookeeper' 37 | group 'zookeeper' 38 | mode 00644 39 | action :create_if_missing 40 | end 41 | 42 | zk_dir = File.join(node.smartstack.zk_home, "zookeeper-#{node.smartstack.zk_version}") 43 | execute 'extract_zookeeper' do 44 | cwd node.smartstack.zk_home 45 | user 'zookeeper' 46 | command "tar zxf #{node.smartstack.zk_version}.tar.gz" 47 | creates zk_dir 48 | end 49 | 50 | # set up 3 zookeeper services in a cluster 51 | (0..2).each do |zk_id| 52 | port = 2181 + zk_id * 1000 53 | dir = File.join(node.smartstack.zk_home, zk_id.to_s) 54 | 55 | directory File.join(dir, 'data') do 56 | recursive true 57 | owner 'zookeeper' 58 | group 'zookeeper' 59 | mode 00775 60 | end 61 | 62 | file File.join(dir, 'data', 'myid') do 63 | content zk_id.to_s 64 | owner 'zookeeper' 65 | group 'zookeeper' 66 | mode 00644 67 | end 68 | 69 | template File.join(dir, 'zookeeper.cfg') do 70 | owner 'zookeeper' 71 | group 'zookeeper' 72 | mode 00644 73 | notifies :restart, "runit_service[zookeeper#{zk_id}]" 74 | variables({ 75 | :dir => dir, 76 | :port => port, 77 | }) 78 | end 79 | 80 | runit_service "zookeeper#{zk_id}" do 81 | action [:enable, :start] 82 | default_logger true 83 | run_template_name 'zookeeper' 84 | 85 | options({ 86 | :dir => dir 87 | }) 88 | end 89 | end 90 | 91 | # make sure that nerve and synapse are running 92 | runit_service 'nerve' do 93 | action :start 94 | end 95 | 96 | runit_service 'synapse' do 97 | action :start 98 | end 99 | 100 | # we use this in our tests in this cookbook 101 | chef_gem 'minitest-spec-context' 102 | -------------------------------------------------------------------------------- /templates/default/sv-helloworld-run.erb: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | exec ruby -r sinatra -e " 4 | set :port, <%= @options[:port] %> 5 | set :bind, '0.0.0.0' 6 | 7 | get '/' do 8 | 'hello' 9 | end 10 | 11 | get '/health' do 12 | 'OK' 13 | end 14 | 15 | get '/ping' do 16 | 'PONG' 17 | end 18 | " 19 | -------------------------------------------------------------------------------- /templates/default/sv-nerve-run.erb: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | <% if node.nerve.jarname %> 4 | # run the java version of nerve 5 | cd <%= node.nerve.home %> 6 | exec chpst -u <%= node.smartstack.user %> java -Xmx64m -XX:PermSize=64m -jar <%= node.nerve.jarname %> --config <%= node.nerve.config_file %> 2>&1 7 | 8 | <% else %> 9 | # run the ruby version of nerve 10 | cd <%= node.nerve.install_dir %> 11 | export GEM_HOME=<%= node.smartstack.gem_home %> 12 | exec chpst -u <%= node.smartstack.user %> bundle exec ./bin/nerve --config <%= node.nerve.config_file %> 2>&1 13 | 14 | <% end %> 15 | -------------------------------------------------------------------------------- /templates/default/sv-synapse-run.erb: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | <% if node.synapse.jarname %> 4 | # run the java version of synapse 5 | cd <%= node.synapse.home %> 6 | exec chpst -u <%= node.smartstack.user %> java <%= node.synapse.jvmopts %> -jar <%= node.synapse.jarname %> --config <%= node.synapse.config_file %> 2>&1 7 | 8 | <% else %> 9 | # run the ruby version of synapse 10 | cd <%= node.synapse.install_dir %> 11 | export GEM_HOME=<%= node.smartstack.gem_home %> 12 | exec chpst -u <%= node.smartstack.user %> bundle exec ./bin/synapse --config <%= node.synapse.config_file %> 2>&1 13 | 14 | <% end %> 15 | -------------------------------------------------------------------------------- /templates/default/sv-zookeeper-run.erb: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit -o nounset -o pipefail 3 | 4 | zk_root=<%= File.join(node.smartstack.zk_home, "zookeeper-#{node.smartstack.zk_version}") %> 5 | zk_cfg=<%= File.join(@options[:dir], 'zookeeper.cfg') %> 6 | zk_main=org.apache.zookeeper.server.quorum.QuorumPeerMain 7 | 8 | java_opts=( -Xms64m -Xmx128m "$zk_main" "$zk_cfg" ) 9 | 10 | function set_classpath { 11 | for zk in $(find "$zk_root"/ -name 'zookeeper-*.jar') 12 | do 13 | echo 'zk.jar:' $zk 14 | CLASSPATH="$zk${CLASSPATH:+:$CLASSPATH}" 15 | done 16 | 17 | for lib in $(find "$zk_root/lib" -name '*.jar') 18 | do 19 | CLASSPATH="$lib${CLASSPATH:+:$CLASSPATH}" 20 | done 21 | export CLASSPATH 22 | } 23 | 24 | set_classpath 25 | echo "classpath is $CLASSPATH" 26 | 27 | ulimit -n 65536 28 | exec 2>&1 chpst -u zookeeper:zookeeper java "${java_opts[@]}" 29 | -------------------------------------------------------------------------------- /templates/default/zookeeper.cfg.erb: -------------------------------------------------------------------------------- 1 | # The number of milliseconds of each tick 2 | tickTime=2000 3 | # The number of ticks that the initial 4 | # synchronization phase can take 5 | initLimit=20 6 | # The number of ticks that can pass between 7 | # sending a request and getting an acknowledgement 8 | syncLimit=10 9 | # the directory where the snapshot is stored. 10 | dataDir=<%= @dir %>/data 11 | # the port at which the clients will connect 12 | clientPort=<%= @port %> 13 | # limit on queued clients - default: 1000 14 | globalOutstandingLimit=1000 15 | # number of transactions before snapshots are taken - default: 10000 16 | spanCount=10000 17 | # max # of clients - 0==unlimited 18 | maxClientCnxns=0 19 | # Election implementation to use. A value of "0" corresponds to the original 20 | # UDP-based version, "1" corresponds to the non-authenticated UDP-based 21 | # version of fast leader election, "2" corresponds to the authenticated 22 | # UDP-based version of fast leader election, and "3" corresponds to TCP-based 23 | # version of fast leader election. Currently, only 0 and 3 are supported, 24 | # 3 being the default 25 | electionAlg=3 26 | # Leader accepts client connections. Default value is "yes". The leader 27 | # machine coordinates updates. For higher update throughput at thes slight 28 | # expense of read throughput the leader can be configured to not accept 29 | # clients and focus on coordination. 30 | leaderServes=yes 31 | # Skips ACL checks. This results in a boost in throughput, but opens up full 32 | # access to the data tree to everyone. 33 | skipACL=yes 34 | 35 | server.0=127.0.0.1:2182:2183 36 | server.1=127.0.0.1:3182:3183 37 | server.2=127.0.0.1:4182:4183 38 | --------------------------------------------------------------------------------