├── .gitignore ├── API.md ├── Changelog.md ├── Example.scala ├── LICENSE ├── Readme.md ├── pom.xml └── src ├── main └── scala │ └── com │ └── boundary │ └── ordasity │ ├── Claimer.scala │ ├── Cluster.scala │ ├── ClusterConfig.scala │ ├── ClusterListener.scala │ ├── Deserializers.scala │ ├── TokenQueue.scala │ ├── ZKUtils.scala │ ├── balancing │ ├── BalancingPolicy.scala │ ├── CountBalancingPolicy.scala │ └── MeteredBalancingPolicy.scala │ └── listeners │ ├── ClusterNodesChangedListener.scala │ ├── HandoffResultsListener.scala │ └── VerifyIntegrityListener.scala └── test └── scala └── com └── boundary └── ordasity ├── ClusterConfigSpec.scala ├── ClusterSpec.scala ├── Deserializers.scala ├── ZKUtilsSpec.scala ├── balancing ├── BalancingPolicySpec.scala ├── CountBalancingPolicySpec.scala └── MeteredBalancingPolicySpec.scala └── listeners ├── ClusterNodesChangedListenerSpec.scala ├── HandoffResultsListenerSpec.scala └── VerifyIntegrityListenerSpec.scala /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | target/ 3 | build/ 4 | .settings 5 | .project 6 | .classpath 7 | *.class 8 | *.ipr 9 | *.iml 10 | *.iws 11 | *~ 12 | ~* 13 | #* 14 | *# 15 | .idea -------------------------------------------------------------------------------- /API.md: -------------------------------------------------------------------------------- 1 | # Ordasity API Documentation 2 | 3 | ## Table of Contents. 4 | 1. Overview 5 | 2. Cluster 6 | 3. ClusterConfig 7 | 4. ClusterListener and SmartListener 8 | 5. JMX 9 | 10 | ## Overview 11 | 12 | Ordasity's public API is small, consisting of the "Cluster" class, "ClusterConfig" (its configuration), and a listener ("ClusterListener" or "SmartListener"). The library also exposes runtime metrics / instrumentation and a management interface via JMX. 13 | 14 | --- 15 | 16 | ## Cluster 17 | #### Class Name: com.boundary.ordasity.Cluster 18 | 19 | The "Cluster" class is the main point of interaction with Ordasity. It is used to initialize, join, and exit a cluster of nodes. 20 | 21 | To initialize a new cluster, use its constructor: 22 | 23 | new Cluster(name: String, listener: Listener, config: ClusterConfig) 24 | 25 | – The "Name" field denotes the name of the clustered service you're launching, as it should appear in Zookeeper. 26 | 27 | – The Listener is either a ClusterListener or SmartListener (see #4), which directs your application to either start or shut down work. 28 | 29 | – Finally, the ClusterConfig is a simple configuration class which defines the options and behavior of your cluster. 30 | 31 | The Cluster class exposes two public methods intended to be used as the public API: 32 | 33 | #### **join()** 34 | Calling *cluster.join()* initializes the node's connection to Zookeeper, joins the cluster, claims work based on the policy specified, and begins operation. 35 | 36 | #### **shutdown()** 37 | Calling *cluster.shutdown() drains all work claimed by this node over the time period provided in the ClusterConfig (default: 60 seconds), prevents it from claiming new work, and exits the cluster. 38 | 39 | --- 40 | 41 | ## ClusterConfig 42 | #### Class Name: com.boundary.ordasity.ClusterConfig 43 | 44 | ClusterConfig defines your node's configuration. It is important that each node in your cluster be launched with the same configuration; behavior is otherwise undefined and could result in work units not being claimed. 45 | 46 | A ClusterConfig is initialized by calling its constructor, which takes a Zookeeper connection string: 47 | 48 | new ClusterConfig("zookeeper-0:2181,zookeeper-1:2181,zookeeper-2:2181") 49 | 50 | ClusterConfig uses a builder pattern (with defaults), allowing you to specify a configuration like so: 51 | 52 | val config = new ClusterConfig("localhost:2181"). 53 | setAutoRebalance(true). 54 | setRebalanceInterval(60). 55 | useSmartBalancing(true). 56 | setDrainTime(60). 57 | setZKTimeout(3000) 58 | 59 | ClusterConfig exposes 10 configuration options. Here are these options and their defaults: 60 | 61 | #### enableAutoRebalance 62 | *Default:* true 63 | 64 | *Setter method:* setAutoRebalance(to: Boolean) 65 | 66 | *Description*: The "enableAutoRebalance" parameter determines whether or not Ordasity should schedule periodic rebalancing automatically. If enabled, this will occur at the autoRebalanceInterval (see below). 67 | 68 | #### autoRebalanceInterval 69 | *Default:* 60 (seconds) 70 | 71 | *Setter method:* setAutoRebalanceInterval(to: Boolean) 72 | 73 | *Description*: The "autoRebalanceInterval" parameter determines how frequently Ordasity should schedule rebalancing of this node's workload, if auto-rebalancing is enabled. If auto-rebalancing is not enabled, this parameter has no effect. 74 | 75 | #### drainTime 76 | *Default:* 60 (seconds) 77 | 78 | *Setter method:* setDrainTime(to: Int) 79 | 80 | *Description*: The "drainTime" parameter determines the period over which Ordasity should release work units to the rest of the cluster during a rebalance or upon shutdown. For example, if 15 work units were to be released over a 60-second period (drainTime == 60), the library would release one work unit every four seconds. 81 | 82 | 83 | #### useSmartBalancing 84 | *Default*: false 85 | 86 | *Setter method:* useSmartBalancing(to: Boolean) 87 | 88 | *Description*: The "useSmartBalancing" parameter determines whether Ordasity should use "load-based" (smart) balancing, or "count-based" (simple) balancing. Load-based balancing attempts to distribute load by the amount of "work" required for each work unit, ideally resulting in an even CPU/IO load throughout the cluster. See *Section 5: Rebalancing* in the primary Readme for more information on smart balancing. 89 | 90 | **Note:** If you enable smart balancing, be sure to initialize your Cluster with a SmartListener rather than a ClusterListener. 91 | 92 | #### zkTimeout 93 | *Default*: 3000 (ms) 94 | 95 | *Setter method:* setZKTimeout(to: Int) 96 | 97 | *Description*: The "zkTimeout" parameter determines the timeout to be passed to the Zookeeper client library. If the connection to Zookeeper times out, the node will consider itself disconnected from the cluster. Ordasity passes this parameter directly to the Zookeeper client; it is not used otherwise. 98 | 99 | #### workUnitName and workUnitShortName 100 | *Default*: "work-units" and "work," respectively 101 | 102 | *Setter methods*: setWorkUnitName(to: String) and setWorkUnitShortName(to: String) 103 | 104 | *Description*: The workUnitName and workUnitShortName parameters allow you to specify a specific name to be given to the type of work being performed by the cluster. These parameters determine the paths to be used in Zookeeper for fetching a list of work units. By default, Ordasity will look for work units at "/work-units". If workUnitName is set to something else (such as "shards"), Ordasity will look for them at "/shards". Both of these methods can be considered "sugar" in that they're primarily used to tie configuration in Zookeeper to your specific application more closely, and for friendlier logging. 105 | 106 | #### nodeId 107 | *Default*: InetAddress.getLocalHost().getHostName() (i.e., the system's hostname) 108 | 109 | *Setter method*: setNodeId(to: String) 110 | 111 | *Description*: The "nodeId" parameter determines how a node in an Ordasity cluster should identify itself to others in the cluster. This defaults to the system's hostname, but you are welcome to set a custom name if you wish. 112 | 113 | #### useSoftHandoff 114 | *Default*: false 115 | 116 | *Setter method*: setUseSoftHandoff(to: Boolean) 117 | 118 | *Description*: The "useSoftHandoff" parameter determines whether or not a node in the cluster should attempt to hand off work to other nodes in the cluster before shutting down a work unit when it is being drained for shutdown or during a rebalance. If enabled, when releasing a work unit to another node in the cluster, Ordasity will initiate a handoff and ensure a period of overlap (see handoffShutdownDelay below) before calling stopWork on your listener. 119 | 120 | #### handoffShutdownDelay 121 | *Default*: 10 (seconds) 122 | 123 | *Setter method:* setHandoffShutdownDelay 124 | 125 | *Description*: The "setHandoffShutdownDelay" parameter determines the overlap period for a handoff operation. More specifically, when one node hands off a work unit to another, this parameter controls the amount of time the original node should continue serving a work unit before calling "stopWork" on your listener. 126 | 127 | --- 128 | 129 | ## ClusterListener and SmartListener 130 | #### Class Name: com.boundary.ordasity.{ClusterListener, SmartListener} 131 | 132 | When you initialize a new Cluster, you must supply a "listener" which Ordasity uses to direct your application's workload. ClusterListener and SmartListener are nearly identical, with the sole difference being that ClusterListener is for use with Ordasity's "simple" or count-based load balancing strategy, and SmartListener is for use with Ordasity's "smart" or load-based balancing strategy. In terms of implementation, the only difference is that SmartListener will hand you a Meter to mark as your application performs work. 133 | 134 | In general, we recommend enabling Smart Balancing and initializing your cluster with SmartListeners. 135 | 136 | Here's how to implement a **ClusterListener**: 137 | 138 | val listener = new ClusterListener { 139 | // Called when the node has joined the cluster 140 | def onJoin(client: ZooKeeperClient) = { } 141 | 142 | // Called when this node should begin serving a work unit 143 | def startWork(workUnit: String) { } 144 | 145 | // Called when this node should stop serving a work unit 146 | def shutdownWork(workUnit: String) = { } 147 | 148 | // Called when this node has left the cluster. 149 | def onLeave() = { } 150 | } 151 | 152 | Here's how to implement a **SmartListener**: 153 | 154 | val listener = new ClusterListener { 155 | // Called when the node has joined the cluster 156 | def onJoin(client: ZooKeeperClient) = { } 157 | 158 | // Called when this node should begin serving a work unit 159 | def startWork(workUnit: String, meter: Meter) { } 160 | 161 | // Called when this node should stop serving a work unit 162 | def shutdownWork(workUnit: String) = { } 163 | 164 | // Called when this node has left the cluster. 165 | def onLeave() = { } 166 | } 167 | 168 | As your application performs work (be it processing an event, serving a query, or handling a request), just call meter.mark() (or meter.mark(someAmount: Int)) to indicate to Ordasity how much "work" is actually being done in service of each work item. 169 | 170 | --- 171 | 172 | ## JMX 173 | 174 | Ordasity exposes several metrics via JMX for runtime instrumentation. These metrics include the share of the cluster's load this node is serving (if smart balancing is enabled), the number of work units being served by this node, and a list of these work units. They'll be located in JConsole under "com.boundary.ordasity.Cluster". 175 | 176 | Finally, Ordasity exposes the "join()" and "shutdown()" methods of Cluster via JMX to allow for remote management of your application. These methods are located at *serviceName.Cluster* in JConsole. 177 | 178 | These methods are useful for removing a node from your cluster without exiting the process for maintenance, and for "draining" a node before restarting it during a deploy. 179 | 180 | **Here is an example that triggers a drain and shutdown:** 181 | 182 | import java.util.Hashtable 183 | import javax.management.ObjectName 184 | import javax.management.remote.JMXServiceURL 185 | import javax.management.remote.JMXConnectorFactory 186 | 187 | val jmxPort = "8083" 188 | val jmxHost = "localhost" 189 | val serviceName = "example" 190 | val workUnitShortName = "work" 191 | 192 | val jmxUrl = new JMXServiceURL("service:jmx:rmi:///jndi/rmi://" + jmxHost + ":" + jmxPort + "/jmxrmi") 193 | val remote = JMXConnectorFactory.connect(jmxUrl).getMBeanServerConnection 194 | 195 | println("Invoking shutdown...") 196 | remote.invoke(new ObjectName(serviceName + ":name=Cluster"), "shutdown", Array[Object](), Array[String]()) 197 | 198 | val hash = new Hashtable[String, String] 199 | hash.put("type", "Cluster") 200 | hash.put("name", "my_" + workUnitShortName + "_count") 201 | 202 | var workUnitCount = remote.getAttribute(new ObjectName("com.boundary.ordasity", hash), "Value").asInstanceOf[Int] 203 | while (workUnitCount > 0) { 204 | workUnitCount = remote.getAttribute(new ObjectName("com.boundary.ordasity", hash), "Value").asInstanceOf[Int] 205 | println("Waiting for drain to complete. Remaining work units: " + workUnitCount) 206 | Thread.sleep(2000) 207 | } 208 | 209 | println("Graceful handoff complete. Node shut down.") -------------------------------------------------------------------------------- /Changelog.md: -------------------------------------------------------------------------------- 1 | # Ordasity Changelog 2 | ### Version History 3 | 4 | #### Current: Ordasity 0.2.0 5 | **Changes Since Previous Release:** 6 | 7 | – Resolved a distributed race condition which can occur when using Ordasity's "handoff" capability at the very end of the process. 8 | 9 | – Resolved a case in which a node may fail to recognize that it has completed the claiming process after accepting handoff of a work unit from another node. 10 | 11 | #### Previous Releases: 12 | ###### Ordasity 0.1.9 :: 13 | – Resolved a case in which a node in an Ordasity cluster could find itself in a confused state after losing connection to a Zookeeper node, but successfully re-establishing the connection prior to the expiration of the Zookeeper session timeout. 14 | 15 | – Migrated node state information stored in Zookeeper from a flat string to structured JSON. 16 | 17 | Nodes running a previous version of Ordasity will see nodes running Ordasity 0.1.9+, but will view them in a "fallback" mode and operate under the safe assumption that the unrecognized nodes might not attempt to claim work. This safeguard ensures that all work units will remain claimed during the upgrade, but may result in 0.1.8 nodes claiming more than their fair share while it's in progress. As such, this release is safe for a rolling upgrade with no special treatment required. 18 | 19 | 20 | ###### Ordasity 0.1.8 :: 21 | Initial release -------------------------------------------------------------------------------- /Example.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | import java.util.Random 18 | import java.util.concurrent.CountDownLatch 19 | import com.boundary.ordasity.{Cluster, ClusterConfig, SmartListener} 20 | import com.yammer.metrics.scala.Meter 21 | import com.twitter.common.zookeeper.ZooKeeperClient 22 | import java.util.concurrent.{ScheduledThreadPoolExecutor, TimeUnit, ScheduledFuture} 23 | import java.util.{HashMap, TimerTask} 24 | 25 | val random = new Random() 26 | val latch = new CountDownLatch(1) 27 | val pool = new ScheduledThreadPoolExecutor(1) 28 | 29 | val futures = new HashMap[String, ScheduledFuture[_]] 30 | 31 | val config = ClusterConfig.builder(). 32 | setHosts("localhost:2181"). 33 | setEnableAutoRebalance(true). 34 | setAutoRebalanceInterval(15). 35 | setUseSmartBalancing(true). 36 | setDrainTime(3). 37 | setZkTimeout(3). 38 | setUseSoftHandoff(true). 39 | setNodeId(java.util.UUID.randomUUID().toString).build() 40 | 41 | val listener = new SmartListener { 42 | def onJoin(client: ZooKeeperClient) = {} 43 | def onLeave() = {} 44 | 45 | // Do yer thang, mark dat meter. 46 | def startWork(workUnit: String, meter: Meter) = { 47 | val task = new TimerTask { 48 | def run() = meter.mark(random.nextInt(1000)) 49 | } 50 | val future = pool.scheduleAtFixedRate(task, 0, 1, TimeUnit.SECONDS) 51 | futures.put(workUnit, future) 52 | } 53 | 54 | // Stop doin' that thang 55 | def shutdownWork(workUnit: String) { 56 | futures.get(workUnit).cancel(true) 57 | } 58 | } 59 | 60 | val clustar = new Cluster("example_service", listener, config) 61 | 62 | clustar.join() 63 | latch.await() 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Ordasity 2 | 3 | ## Table of Contents 4 | 1. Overview, Use Cases, and Features 5 | 2. A Clustered Service in 30 Seconds 6 | 3. In Action at Boundary 7 | 4. Distribution / Coordination Strategy 8 | 5. Rebalancing 9 | 6. Draining and Handoff 10 | 7. Wrapping Up 11 | 8. [API Documentation](https://github.com/boundary/ordasity/wiki/Ordasity-API-Documentation) 12 | 13 | 14 | ## Building Stateful Clustered Services on the JVM 15 | 16 | Ordasity is a library designed to make building and deploying reliable clustered services on the JVM as straightforward as possible. It's written in Scala and uses Zookeeper for coordination. 17 | 18 | Ordasity's simplicity and flexibility allows us to quickly write, deploy, and (most importantly) operate distributed systems on the JVM without duplicating distributed "glue" code or revisiting complex reasoning about distribution strategies. 19 | 20 | --- 21 | 22 | ### Primary Use Cases 23 | 24 | Ordasity is designed to spread persistent or long-lived workloads across several machines. It's a toolkit for building systems which can be described in terms of individual nodes serving a partition or shard of a cluster's total load. Ordasity is not designed to express a "token range" (though it may be possible to implement one); the focus is on discrete work units. 25 | 26 | --- 27 | 28 | ### Features 29 | - Cluster membership (joining / leaving / mutual awareness) 30 | - Work claiming and distribution 31 | - Load-based workload balancing 32 | - Count-based workload balancing 33 | - Automatic periodic rebalancing 34 | - Graceful cluster exiting ("draining") 35 | - Graceful handoff of work units between nodes 36 | - Pegging of work units to a specific node 37 | 38 | --- 39 | 40 | ### A Clustered Service in 30 Seconds 41 | 42 | Let's get started with an example. Here's how to build a clustered service in 25 lines of code with Ordasity: 43 | 44 | ```scala 45 | import com.yammer.metrics.scala.Meter 46 | import com.twitter.common.zookeeper.ZooKeeperClient 47 | import com.boundary.ordasity.{Cluster, ClusterConfig, SmartListener} 48 | 49 | class MyService { 50 | val listener = new SmartListener { 51 | 52 | // Called after successfully joining the cluster. 53 | def onJoin(client: ZooKeeperClient) { } 54 | 55 | // Do yer thang, mark that meter. 56 | def startWork(workUnit: String, meter: Meter) { } 57 | 58 | // Stop doin' that thang. 59 | def shutdownWork(workUnit: String) { } 60 | 61 | // Called after leaving the cluster. 62 | def onLeave() { } 63 | } 64 | 65 | val config = ClusterConfig.builder().setHosts("localhost:2181").build() 66 | val cluster = new Cluster("ServiceName", listener, config) 67 | 68 | cluster.join() 69 | } 70 | ``` 71 | 72 | **Maven** folks and friends with compatible packaging systems, here's the info for your pom.xml: 73 | 74 | ```xml 75 | 76 | 77 | com.boundary 78 | ordasity-scala_2.9.1 79 | 0.4.5 80 | 81 | 82 | 83 | 84 | boundary-public 85 | Boundary Public 86 | http://maven.boundary.com/artifactory/external 87 | 88 | ``` 89 | 90 | --- 91 | 92 | ### In Action at Boundary 93 | 94 | At Boundary, the library holds together our pubsub and event stream processing systems. It's a critical part of ensuring that at any moment, we're consuming and aggregating data from our network of collectors at one tier, and processing this data at hundreds of megabits a second in another. Ordasity also helps keep track of the mappings between these services, wiring everything together for us behind the scenes. 95 | 96 | Ordasity's distribution enables us to spread the work of our pubsub aggregation and event stream processing systems across any number of nodes. Automatic load balancing keeps the cluster's workload evenly distributed, with nodes handing off work to others as workload changes. Graceful draining and handoff allows us to iterate rapidly on these systems, continously deploying updates without disrupting operation of the cluster. Ordasity's membership and work claiming approach ensures transparent failover within a couple seconds if a node becomes unavailable due to a network partition or system failure. 97 | 98 | --- 99 | 100 | ### Distribution / Coordination Strategy 101 | 102 | Ordasity's architecture is masterless, relying on Zookeeper only for coordination between individual nodes. The service is designed around the principle that many nodes acting together under a common set of rules can cooperatively form a self-organizing, self-regulating system. 103 | 104 | Ordasity supports two work claiming strategies: "simple" (count-based), and "smart" (load-based). 105 | 106 | #### Count-Based Distribution 107 | The count-based distribution strategy is simple. When in effect, each node in the cluster will attempt to claim its fair share of available work units according to the following formula: 108 | 109 | ```scala 110 | val maxToClaim = { 111 | if (allWorkUnits.size <= 1) allWorkUnits.size 112 | else (allWorkUnits.size / nodeCount.toDouble).ceil 113 | } 114 | ``` 115 | 116 | If zero or one work units are present, the node will attempt to claim up to one work unit. Otherwise, the node will attempt to claim up to the number of work units divided by the number of active nodes. 117 | 118 | #### Load-Based Distribution 119 | Ordasity's load-based distribution strategy assumes that all work units are not equal. It's unlikely that balancing simply by count will result in an even load distribution -- some nodes would probably end up much busier than others. The load-based strategy is smarter. It divides up work based on the amount of actual "work" done. 120 | 121 | 122 | ##### Meters Measure Load 123 | When you enable smart balancing and initialize Ordasity with a SmartListener, you get back a "meter" to mark when work occurs. Here's a simple, contrived example: 124 | 125 | ```scala 126 | val listener = new SmartListener { 127 | ... 128 | def startWork(workUnit: String, meter: Meter) = { 129 | 130 | val somethingOrOther = new Runnable() { 131 | def run() { 132 | while (true) { 133 | val processingAmount = process(workUnit) 134 | meter.mark(processingAmount) 135 | Thread.sleep(100) 136 | } 137 | } 138 | } 139 | 140 | new Thread(somethingOrOther).start() 141 | } 142 | 143 | ... 144 | } 145 | ``` 146 | 147 | Ordasity uses this meter to determine how much "work" each work unit in the cluster represents. If the application were a database or frontend to a data service, you might mark the meter each time a query is performed. In a messaging system, you'd mark it each time a message is sent or received. In an event stream processing system, you'd mark it each time an event is processed. You get the idea. 148 | 149 | *(Bonus: Each of these meters expose their metrics via JMX, providing you and your operations team with insight into what's happening when your service is in production).* 150 | 151 | ##### Knowing the Load Lets us Balance 152 | Ordasity checks the meters once per minute (configurable) and updates this information in Zookeeper. The "load map" determines the actual load represented by each work unit. All nodes watch the cluster's "load map" and are notified via Zookeeper's Atomic Broadcast mechanism when this changes. Each node in the cluster will attempt to claim its fair share of available work units according to the following formula: 153 | 154 | ```scala 155 | def evenDistribution() : Double = { 156 | loadMap.values.sum / activeNodeSize().toDouble 157 | } 158 | ``` 159 | 160 | As the number of nodes or the load of individual work units change, each node's idea of an "even distribution" changes as well. Using this "even distribution" value, each node will choose to claim additional work, or in the event of a rebalance, drain its workload to other nodes if it's processing more than its fair share. 161 | 162 | --- 163 | 164 | ### Rebalancing 165 | 166 | Ordasity supports automatic and manual rebalancing to even out the cluster's load distribution as workloads change. 167 | 168 | To trigger a manual rebalance on all nodes, touch "/service-name/meta/rebalance" in Zookeeper. However, automatic rebalancing is preferred. To enable it, just turn it on in your cluster config: 169 | 170 | ```scala 171 | val config = ClusterConfig.builder(). 172 | setHosts("localhost:2181"). 173 | setAutoRebalance(true). 174 | setRebalanceInterval(60 * 60).build() // One hour 175 | ``` 176 | 177 | As a masterless service, the rebalance process is handled uncoordinated by the node itself. The rebalancing logic is very simple. If a node has more than its fair share of work when a rebalance is triggered, it will drain or release this work to other nodes in the cluster. As the cluster sees this work become available, lighter-loaded nodes will claim it (or receive handoff) and begin processing. 178 | 179 | If you're using **count-based distribution**, it looks like this: 180 | 181 | ```scala 182 | def simpleRebalance() { 183 | val target = fairShare() 184 | 185 | if (myWorkUnits.size > target) { 186 | log.info("Simple Rebalance triggered. Load: %s. Target: %s.format(myWorkUnits.size, target)) 187 | drainToCount(target) 188 | } 189 | } 190 | ``` 191 | 192 | If you're using **load-based distribution**, it looks like this: 193 | 194 | ```scala 195 | def smartRebalance() { 196 | val target = evenDistribution() 197 | 198 | if (myLoad() > target) { 199 | log.info("Smart Rebalance triggered. Load: %s. Target: %s".format(myLoad(), target)) 200 | drainToLoad(target.longValue) 201 | } 202 | } 203 | ``` 204 | 205 | --- 206 | 207 | ### Draining and Handoff 208 | 209 | To avoid dumping a bucket of work on an already-loaded cluster at once, Ordasity supports "draining." Draining is a process by which a node can gradually release work to other nodes in the cluster. In addition to draining, Ordasity also supports graceful handoff, allowing for a period of overlap during which a new node can begin serving a work unit before the previous owner shuts it down. 210 | 211 | #### Draining 212 | 213 | Ordasity's work claiming strategies (count-based and load-based) have internal counterparts for releasing work: *drainToLoad* and *drainToCount*. 214 | 215 | The *drainToCount* and *drainToLoad* strategies invoked by a rebalance will release work units until the node's load is just greater than its fair share. That is to say, each node is "generous" in that it will strive to maintain slightly greater than a mathematically even distribution of work to guard against a scenario where work units are caught in a cycle of being claimed, released, and reclaimed continually. (Similarly, both claiming strategies will attempt to claim one unit beyond their fair share to avoid a scenario in which a work unit is claimed by no one). 216 | 217 | Ordasity allows you to configure the period of time for a drain to complete: 218 | 219 | ```scala 220 | val config = ClusterConfig.builder().setHosts("localhost:2181").setDrainTime(60).build() // 60 Seconds 221 | ``` 222 | 223 | When a drain is initiated, Ordasity will pace the release of work units over the time specified. If 15 work units were to be released over a 60-second period, the library would release one every four seconds. 224 | 225 | Whether you're using count-based or load-based distribution, the drain process is the same. Ordasity makes a list of work units to unclaim, then paces their release over the configured drain time. 226 | 227 | Draining is especially useful for scheduled maintenance and deploys. Ordasity exposes a "shutdown" method via JMX. When invoked, the node will set its status to "Draining," cease claiming new work, and release all existing work to other nodes in the cluster over the configured interval before exiting the cluster. 228 | 229 | #### Handoff 230 | When Handoff is enabled, Ordasity will allow another node to begin processing for a work unit before the former owner shuts it down. This eliminates the very brief gap between one node releasing and another node claiming a work unit. Handoff ensures that at any point, a work unit is being served. 231 | 232 | To enable it, just turn it on in your ClusterConfig: 233 | 234 | ```scala 235 | val clusterConfig = ClusterConfig.builder(). 236 | setHosts("localhost:2181"). 237 | setUseSoftHandoff(true). 238 | setHandoffShutdownDelay(10).build() // Seconds 239 | ``` 240 | 241 | The handoff process is fairly straightforward. When a node has decided to release a work unit (either due to a rebalance or because it is being drained for shutdown), it creates an entry in Zookeeper at /service-name/handoff-requests. Following their count-based or load-based claiming policies, other nodes will claim the work being handed off by creating an entry at /service-name/handoff-results. 242 | 243 | When a node has successfully accepted handoff by creating this entry, the new owner will begin work. The successful "handoff-results" entry signals to the original owner that handoff has occurred and that it is free to cease processing after a configurable overlap (default: 10 seconds). After this time, Ordasity will call the "shutdownWork" method on your listener. 244 | 245 | --- 246 | 247 | ### Registering work units 248 | 249 | Work units are registered by creating ZooKeeper nodes under `/work-units`. (If you have set `Cluster.workUnitName` to a custom value then this ZooKeeper path will change accordingly.) 250 | 251 | The name of the work unit is the same as the name of the ZooKeeper node. So, for example to create 3 work units called "a", "b", and "c", your ZK directory should look like this: 252 | 253 | /work-units 254 | /a 255 | /b 256 | /c 257 | 258 | Any String that is a valid ZK node name can be used as a work unit name. This is the string that is passed to your `ClusterListener` methods. 259 | 260 | The ZK node data must be a JSON-encoded `Map[String, String]`. This may be simply an empty map (`{}`), or you may want to include information about the work unit, for use by your cluster nodes. 261 | 262 | Note that Ordasity does not pass the ZK node data to your `ClusterListener`, so you will have to retrieve it yourself using the ZK client. It also does not provide a helper to deserialize the JSON string. 263 | 264 | #### Pegging 265 | 266 | The ZK node data can also be used for pegging work units to specific nodes. 267 | 268 | To do this, include a key-value pair of the form `"servicename": "nodeId"` in the JSON map. 269 | 270 | Here `servicename` is the name of the cluster, as specified in `Cluster`'s constructor, and `nodeId` is the unique ID of a node, as set in `ClusterConfig`. 271 | 272 | For example to peg a work unit to Node `node123` in cluster `mycluster`, set the ZK node's data to `{"mycluster": "node123"}`. 273 | 274 | --- 275 | 276 | ### Wrapping Up 277 | 278 | So, that's Ordasity! We hope you enjoy using it to build reliable distributed services quickly. 279 | 280 | #### Questions 281 | If you have any questions, please feel free to shoot us an e-mail or get in touch on Twitter. 282 | 283 | #### Bug Reports and Contributions 284 | Think you've found a bug? Sorry about that. Please open an issue on GitHub and we'll check it out as soon as possible. 285 | 286 | Want to contribute to Ordasity? Awesome! Fork the repo, make your changes, and issue a pull request. Please make effort to keep commits small, clean, and confined to specific changes. If you'd like to propose a new feature, give us a heads-up by getting in touch beforehand. We'd like to talk with you. 287 | 288 | 289 | 290 | 291 | 292 | 293 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | com.boundary 5 | ordasity-scala_2.9.1 6 | 0.5.22-SNAPSHOT 7 | Ordasity 8 | http://www.boundary.com 9 | jar 10 | 11 | 12 | 2.9.1 13 | UTF-8 14 | 15 | 16 | 17 | 18 | 19 | com.boundary 20 | overlock-scala_${scala.version} 21 | 0.8.6 22 | 23 | 24 | 25 | org.apache.zookeeper 26 | zookeeper 27 | 3.3.6 28 | 29 | 30 | log4j 31 | log4j 32 | 33 | 34 | jline 35 | jline 36 | 37 | 38 | 39 | 40 | 41 | com.twitter.common.zookeeper 42 | client 43 | 0.0.46 44 | 45 | 46 | log4j 47 | log4j 48 | 49 | 50 | jline 51 | jline 52 | 53 | 54 | org.apache.zookeeper 55 | zookeeper 56 | 57 | 58 | 59 | 60 | 61 | com.twitter.common.zookeeper 62 | map 63 | 0.0.39 64 | 65 | 66 | jline 67 | jline 68 | 69 | 70 | org.apache.zookeeper 71 | zookeeper 72 | 73 | 74 | 75 | 76 | 77 | com.fasterxml.jackson.core 78 | jackson-core 79 | 2.1.4 80 | 81 | 82 | 83 | com.fasterxml.jackson.core 84 | jackson-databind 85 | 2.1.4 86 | 87 | 88 | 89 | com.fasterxml.jackson.module 90 | jackson-module-scala 91 | 2.1.2 92 | 93 | 94 | 95 | com.simple 96 | simplespec_${scala.version} 97 | 0.6.0 98 | test 99 | 100 | 101 | 102 | org.mockito 103 | mockito-all 104 | 1.9.5 105 | test 106 | 107 | 108 | 109 | org.slf4j 110 | slf4j-simple 111 | 1.7.7 112 | test 113 | 114 | 115 | 116 | org.slf4j 117 | log4j-over-slf4j 118 | 1.7.7 119 | 120 | 121 | 122 | 123 | 124 | src/main/scala 125 | src/test/scala 126 | 127 | 128 | 129 | org.scala-tools 130 | maven-scala-plugin 131 | 2.15.2 132 | 133 | 134 | 135 | compile 136 | testCompile 137 | 138 | 139 | 140 | 141 | 142 | -unchecked 143 | -deprecation 144 | 145 | UTF-8 146 | ${scala.version} 147 | 148 | 149 | 150 | org.apache.maven.plugins 151 | maven-compiler-plugin 152 | 2.5.1 153 | 154 | 1.7 155 | 1.7 156 | 157 | 158 | 159 | 160 | org.apache.maven.plugins 161 | maven-surefire-plugin 162 | 2.8.1 163 | 164 | false 165 | false 166 | 167 | **/*Spec.java 168 | 169 | 170 | **/*Test.java 171 | 172 | 173 | 174 | 175 | 176 | org.apache.maven.plugins 177 | maven-source-plugin 178 | 2.2.1 179 | 180 | 181 | attach-sources 182 | package 183 | 184 | jar-no-fork 185 | 186 | 187 | 188 | 189 | 190 | 191 | org.codehaus.mojo 192 | cobertura-maven-plugin 193 | 2.6 194 | 195 | 196 | xml 197 | html 198 | 199 | 200 | 201 | 202 | 203 | clean 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | twitter 213 | http://maven.twttr.com/ 214 | 215 | 216 | boundary-public 217 | Boundary Public 218 | http://maven.boundary.com/artifactory/repo 219 | 220 | 221 | 222 | 223 | 224 | maven.boundary.com 225 | maven.boundary.com-releases 226 | http://maven.boundary.com/artifactory/external 227 | 228 | 229 | maven.boundary.com 230 | maven.boundary.com-snapshots 231 | http://maven.boundary.com/artifactory/external 232 | 233 | 234 | 235 | 236 | scm:git:git@github.com:boundary/ordasity.git 237 | HEAD 238 | 239 | 240 | 241 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/Claimer.scala: -------------------------------------------------------------------------------- 1 | package com.boundary.ordasity 2 | 3 | import java.util.concurrent.BlockingQueue 4 | 5 | import org.slf4j.LoggerFactory 6 | 7 | /** 8 | * Thread responsible for claiming work in Ordasity. This thread waits for a 9 | * claim token to arrive (see TokenQueue for explanation), then performs an 10 | * Ordasity claim cycle for unclaimed work units. 11 | */ 12 | class Claimer(cluster: Cluster, name: String = "ordasity-claimer") extends Thread(name) { 13 | 14 | val log = LoggerFactory.getLogger(getClass) 15 | private val claimQueue : BlockingQueue[ClaimToken] = new TokenQueue 16 | def requestClaim() : Boolean = claimQueue.offer(ClaimToken.token) 17 | 18 | override def run() { 19 | log.info("Claimer started.") 20 | try { 21 | while (cluster.getState() != NodeState.Shutdown) { 22 | claimQueue.take() 23 | try { 24 | cluster.claimWork() 25 | } catch { 26 | case e: InterruptedException => 27 | // Don't swallow these 28 | throw e 29 | case e: Exception => 30 | log.error("Claimer failed to claim work", e) 31 | } 32 | } 33 | } catch { 34 | case e: Throwable => 35 | log.error("Claimer failed unexpectedly", e) 36 | throw e 37 | } finally { 38 | log.info("Claimer shutting down.") 39 | } 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/Cluster.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity 18 | 19 | import com.yammer.metrics.scala.{Meter, Instrumented} 20 | import java.lang.management.ManagementFactory 21 | import javax.management.ObjectName 22 | 23 | import java.util.{Collections, HashMap, Map} 24 | import org.slf4j.LoggerFactory 25 | import org.apache.zookeeper.data.Stat 26 | 27 | import scala.collection.JavaConversions._ 28 | import org.cliffc.high_scale_lib.NonBlockingHashSet 29 | import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference} 30 | 31 | import java.net.InetSocketAddress 32 | import org.apache.zookeeper.KeeperException.NoNodeException 33 | import com.twitter.common.quantity.{Time, Amount} 34 | import com.twitter.common.zookeeper.{ZooKeeperMap => ZKMap, ZooKeeperClient} 35 | 36 | import listeners._ 37 | import balancing.{CountBalancingPolicy, MeteredBalancingPolicy} 38 | import org.apache.zookeeper.{WatchedEvent, Watcher} 39 | import org.apache.zookeeper.Watcher.Event.KeeperState 40 | import java.util.concurrent._ 41 | import overlock.threadpool.NamedThreadFactory 42 | import com.fasterxml.jackson.databind.node.ObjectNode 43 | import scala.Some 44 | import com.boundary.ordasity.NodeInfo 45 | 46 | trait ClusterMBean { 47 | def join() : String 48 | def shutdown() 49 | def rebalance() 50 | } 51 | 52 | class Cluster(val name: String, val listener: Listener, config: ClusterConfig) 53 | extends ClusterMBean with Instrumented { 54 | 55 | val log = LoggerFactory.getLogger(getClass) 56 | var myNodeID = config.nodeId 57 | val watchesRegistered = new AtomicBoolean(false) 58 | val initialized = new AtomicBoolean(false) 59 | val initializedLatch = new CountDownLatch(1) 60 | val connected = new AtomicBoolean(false) 61 | 62 | // Register Ordasity with JMX for management / instrumentation. 63 | ManagementFactory.getPlatformMBeanServer.registerMBean( 64 | this, new ObjectName(name + ":" + "name=Cluster")) 65 | 66 | // Cluster, node, and work unit state 67 | var nodes : Map[String, NodeInfo] = null 68 | val myWorkUnits = new NonBlockingHashSet[String] 69 | var allWorkUnits : Map[String, ObjectNode] = null 70 | var workUnitMap : Map[String, String] = null 71 | var handoffRequests : Map[String, String] = null 72 | var handoffResults : Map[String, String] = null 73 | val claimedForHandoff = new NonBlockingHashSet[String] 74 | var loadMap : Map[String, Double] = Collections.emptyMap() 75 | val workUnitsPeggedToMe = new NonBlockingHashSet[String] 76 | val claimer = new Claimer(this, "ordasity-claimer-" + name) 77 | val handoffResultsListener = new HandoffResultsListener(this, config) 78 | 79 | var balancingPolicy = { 80 | if (config.useSmartBalancing) 81 | new MeteredBalancingPolicy(this, config).init() 82 | else 83 | new CountBalancingPolicy(this, config).init() 84 | } 85 | 86 | // Scheduled executions 87 | val pool = new AtomicReference[ScheduledThreadPoolExecutor](createScheduledThreadExecutor()) 88 | var autoRebalanceFuture : Option[ScheduledFuture[_]] = None 89 | 90 | // Metrics 91 | val shortName = config.workUnitShortName 92 | val listGauge = metrics.gauge[String]("my_" + shortName) { myWorkUnits.mkString(", ") } 93 | val countGauge = metrics.gauge[Int]("my_" + shortName + "_count") { myWorkUnits.size } 94 | val connStateGauge = metrics.gauge[String]("zk_connection_state") { connected.get().toString } 95 | val nodeStateGauge = metrics.gauge[String]("node_state") { getState().toString } 96 | 97 | val state = new AtomicReference[NodeState.Value](NodeState.Fresh) 98 | def getState() : NodeState.Value = state.get() 99 | 100 | var zk : ZooKeeperClient = null 101 | 102 | private[this] def createScheduledThreadExecutor() : ScheduledThreadPoolExecutor = { 103 | new ScheduledThreadPoolExecutor(1, new NamedThreadFactory("ordasity-scheduler")) 104 | } 105 | 106 | /** 107 | * Joins the cluster, claims work, and begins operation. 108 | */ 109 | def join() : String = { 110 | join(None) 111 | } 112 | 113 | /** 114 | * Joins the cluster using a custom zk client, claims work, and begins operation. 115 | */ 116 | def join(injectedClient: Option[ZooKeeperClient]) : String = { 117 | state.get() match { 118 | case NodeState.Fresh => connect(injectedClient) 119 | case NodeState.Shutdown => connect(injectedClient) 120 | case NodeState.Draining => log.warn("'join' called while draining; ignoring.") 121 | case NodeState.Started => log.warn("'join' called after started; ignoring.") 122 | } 123 | 124 | state.get().toString 125 | } 126 | 127 | /** 128 | * registers a shutdown hook which causes cleanup of ephemeral state in zookeeper 129 | * when the JVM exits normally (via Ctrl+C or SIGTERM for example) 130 | * 131 | * this alerts other applications which have discovered this instance that it is 132 | * down so they may avoid remitting requests. otherwise this will not happen until 133 | * the default zookeeper timeout of 10s during which requests will fail until 134 | * the application is up and accepting requests again 135 | */ 136 | def addShutdownHook() { 137 | Runtime.getRuntime().addShutdownHook( 138 | new Thread() { 139 | override def run() { 140 | log.info("Cleaning up ephemeral ZooKeeper state") 141 | completeShutdown() 142 | } 143 | } 144 | ) 145 | } 146 | 147 | val connectionWatcher = new Watcher { 148 | def process(event: WatchedEvent) { 149 | event.getState match { 150 | case KeeperState.SyncConnected => { 151 | log.info("ZooKeeper session established.") 152 | connected.set(true) 153 | try { 154 | if (state.get() != NodeState.Shutdown) 155 | onConnect() 156 | else 157 | log.info("This node is shut down. ZK connection re-established, but not relaunching.") 158 | } catch { 159 | case e:Exception => 160 | log.error("Exception during zookeeper connection established callback", e) 161 | } 162 | } 163 | case KeeperState.Expired => 164 | log.info("ZooKeeper session expired.") 165 | connected.set(false) 166 | forceShutdown() 167 | awaitReconnect() 168 | case KeeperState.Disconnected => 169 | log.info("ZooKeeper session disconnected. Awaiting reconnect...") 170 | connected.set(false) 171 | awaitReconnect() 172 | case x: Any => 173 | log.info("ZooKeeper session interrupted. Shutting down due to %s".format(x)) 174 | connected.set(false) 175 | awaitReconnect() 176 | } 177 | } 178 | 179 | def awaitReconnect() { 180 | while (true) { 181 | try { 182 | log.info("Awaiting reconnection to ZooKeeper...") 183 | zk.get(Amount.of(1L, Time.SECONDS)) 184 | return 185 | } catch { 186 | case e: TimeoutException => log.warn("Timed out reconnecting to ZooKeeper.", e) 187 | case e: Exception => log.error("Error reconnecting to ZooKeeper", e) 188 | } 189 | } 190 | 191 | } 192 | 193 | } 194 | 195 | /** 196 | * Directs the ZooKeeperClient to connect to the ZooKeeper ensemble and wait for 197 | * the connection to be established before continuing. 198 | */ 199 | def connect(injectedClient: Option[ZooKeeperClient] = None) { 200 | if (!initialized.get) { 201 | val hosts = config.hosts.split(",").map { server => 202 | val host = server.split(":")(0) 203 | val port = Integer.parseInt(server.split(":")(1)) 204 | new InetSocketAddress(host, port) 205 | }.toList 206 | 207 | claimer.start() 208 | log.info("Connecting to hosts: %s".format(hosts.toString)) 209 | zk = injectedClient.getOrElse( 210 | new ZooKeeperClient(Amount.of(config.zkTimeout, Time.MILLISECONDS), hosts)) 211 | log.info("Registering connection watcher.") 212 | zk.register(connectionWatcher) 213 | } 214 | 215 | zk.get() 216 | } 217 | 218 | /** 219 | * Drains all work claimed by this node over the time period provided in the config 220 | * (default: 60 seconds), prevents it from claiming new work, and exits the cluster. 221 | */ 222 | def shutdown() { 223 | if (state.get() == NodeState.Shutdown) return 224 | balancingPolicy.shutdown() 225 | if (autoRebalanceFuture.isDefined) autoRebalanceFuture.get.cancel(true) 226 | log.info("Shutdown initiated; beginning drain...") 227 | setState(NodeState.Draining) 228 | balancingPolicy.drainToCount(0, true) 229 | } 230 | 231 | def forceShutdown() { 232 | balancingPolicy.shutdown() 233 | if (autoRebalanceFuture.isDefined) autoRebalanceFuture.get.cancel(true) 234 | log.warn("Forcible shutdown initiated due to connection loss...") 235 | myWorkUnits.map(w => shutdownWork(w)) 236 | myWorkUnits.clear() 237 | listener.onLeave() 238 | } 239 | 240 | /** 241 | * Finalizes the shutdown sequence. Called once the drain operation completes. 242 | */ 243 | def completeShutdown() { 244 | setState(NodeState.Shutdown) 245 | myWorkUnits.map(w => shutdownWork(w)) 246 | myWorkUnits.clear() 247 | deleteFromZk() 248 | if (claimer != null) { 249 | claimer.interrupt() 250 | claimer.join() 251 | } 252 | // The connection watcher will attempt to reconnect - unregister it 253 | if (connectionWatcher != null) { 254 | zk.unregister(connectionWatcher) 255 | } 256 | try { 257 | zk.close() 258 | } catch { 259 | case e: Exception => log.warn("Zookeeper reported exception on shutdown.", e) 260 | } 261 | listener.onLeave() 262 | } 263 | 264 | /** 265 | * remove this worker's ephemeral node from zk 266 | */ 267 | def deleteFromZk() { 268 | ZKUtils.delete(zk, "/" + name + "/nodes/" + myNodeID) 269 | } 270 | 271 | /** 272 | * Primary callback which is triggered upon successful Zookeeper connection. 273 | */ 274 | def onConnect() { 275 | if (state.get() != NodeState.Fresh) { 276 | if (previousZKSessionStillActive()) { 277 | log.info("ZooKeeper session re-established before timeout.") 278 | return 279 | } 280 | log.warn("Rejoined after session timeout. Forcing shutdown and clean startup.") 281 | ensureCleanStartup() 282 | } 283 | 284 | log.info("Connected to Zookeeper (ID: %s).".format(myNodeID)) 285 | ZKUtils.ensureOrdasityPaths(zk, name, config) 286 | 287 | joinCluster() 288 | 289 | listener.onJoin(zk) 290 | 291 | if (watchesRegistered.compareAndSet(false, true)) 292 | registerWatchers() 293 | initialized.set(true) 294 | initializedLatch.countDown() 295 | 296 | setState(NodeState.Started) 297 | claimer.requestClaim() 298 | verifyIntegrity() 299 | 300 | balancingPolicy.onConnect() 301 | 302 | 303 | if (config.enableAutoRebalance) 304 | scheduleRebalancing() 305 | } 306 | 307 | /** 308 | * In the event that the node has been evicted and is reconnecting, this method 309 | * clears out all existing state before relaunching to ensure a clean launch. 310 | */ 311 | def ensureCleanStartup() { 312 | forceShutdown() 313 | val oldPool = pool.getAndSet(createScheduledThreadExecutor()) 314 | oldPool.shutdownNow() 315 | myWorkUnits.map(w => shutdownWork(w)) 316 | myWorkUnits.clear() 317 | claimedForHandoff.clear() 318 | workUnitsPeggedToMe.clear() 319 | state.set(NodeState.Fresh) 320 | } 321 | 322 | /** 323 | * Schedules auto-rebalancing if auto-rebalancing is enabled. The task is 324 | * scheduled to run every 60 seconds by default, or according to the config. 325 | */ 326 | def scheduleRebalancing() { 327 | val interval = config.autoRebalanceInterval 328 | val runRebalance = new Runnable { 329 | def run() { 330 | try { 331 | rebalance() 332 | } catch { 333 | case e: Exception => log.error("Error running auto-rebalance.", e) 334 | } 335 | } 336 | } 337 | 338 | autoRebalanceFuture = Some( 339 | pool.get.scheduleAtFixedRate(runRebalance, interval, interval, TimeUnit.SECONDS)) 340 | } 341 | 342 | 343 | /** 344 | * Registers this node with Zookeeper on startup, retrying until it succeeds. 345 | * This retry logic is important in that a node which restarts before Zookeeper 346 | * detects the previous disconnect could prohibit the node from properly launching. 347 | */ 348 | def joinCluster() { 349 | while (true) { 350 | val myInfo = new NodeInfo(NodeState.Fresh.toString, zk.get().getSessionId) 351 | val encoded = JsonUtils.OBJECT_MAPPER.writeValueAsString(myInfo) 352 | if (ZKUtils.createEphemeral(zk, "/" + name + "/nodes/" + myNodeID, encoded)) { 353 | return 354 | } else { 355 | val stat = new Stat() 356 | val bytes = zk.get().getData("/" + name + "/nodes/" + myNodeID, false, stat) 357 | val nodeInfo = JsonUtils.OBJECT_MAPPER.readValue(bytes, classOf[NodeInfo]) 358 | if (nodeInfo.connectionID == zk.get().getSessionId) { 359 | return 360 | } 361 | log.warn("Unable to register with Zookeeper on launch. " + 362 | "Is %s already running on this host? Retrying in 1 second...", name) 363 | Thread.sleep(1000) 364 | } 365 | log.warn("Unable to register with Zookeeper on launch. " + 366 | "Is %s already running on this host? Retrying in 1 second...".format(name)) 367 | Thread.sleep(1000) 368 | } 369 | } 370 | 371 | /** 372 | * Registers each of the watchers that we're interested in in Zookeeper, and callbacks. 373 | * This includes watchers for changes to cluster topology (/nodes), work units 374 | * (/work-units), and claimed work (//claimed-work). We also register 375 | * watchers for calls to "/meta/rebalance", and if smart balancing is enabled, we'll 376 | * watch "/meta/workload" for changes to the cluster's workload. 377 | */ 378 | def registerWatchers() { 379 | 380 | val nodesChangedListener = new ClusterNodesChangedListener(this) 381 | val verifyIntegrityListener = new VerifyIntegrityListener[String](this, config) 382 | val stringDeser = new StringDeserializer() 383 | 384 | nodes = ZKMap.create(zk, "/%s/nodes".format(name), 385 | new NodeInfoDeserializer(), nodesChangedListener) 386 | 387 | allWorkUnits = ZKMap.create(zk, "%s/%s".format(config.workUnitZkChRoot.getOrElse(""), config.workUnitName), 388 | new ObjectNodeDeserializer, new VerifyIntegrityListener[ObjectNode](this, config)) 389 | 390 | workUnitMap = ZKMap.create(zk, "/%s/claimed-%s".format(name, config.workUnitShortName), 391 | stringDeser, verifyIntegrityListener) 392 | 393 | // Watch handoff requests and results. 394 | if (config.useSoftHandoff) { 395 | handoffRequests = ZKMap.create(zk, "/%s/handoff-requests".format(name), 396 | stringDeser, verifyIntegrityListener) 397 | 398 | handoffResults = ZKMap.create(zk, "/%s/handoff-result".format(name), 399 | stringDeser, handoffResultsListener) 400 | } else { 401 | handoffRequests = new HashMap[String, String] 402 | handoffResults = new HashMap[String, String] 403 | } 404 | 405 | // If smart balancing is enabled, watch for changes to the cluster's workload. 406 | if (config.useSmartBalancing) 407 | loadMap = ZKMap.create[Double](zk, "/%s/meta/workload".format(name), new DoubleDeserializer) 408 | } 409 | 410 | 411 | /** 412 | * Triggers a work-claiming cycle. If smart balancing is enabled, claim work based 413 | * on node and cluster load. If simple balancing is in effect, claim by count. 414 | */ 415 | def claimWork() { 416 | if (state.get != NodeState.Started || !connected.get) return 417 | balancingPolicy.claimWork() 418 | } 419 | 420 | 421 | /** 422 | * Requests that another node take over for a work unit by creating a ZNode 423 | * at handoff-requests. This will trigger a claim cycle and adoption. 424 | */ 425 | def requestHandoff(workUnit: String) { 426 | log.info("Requesting handoff for %s.".format(workUnit)) 427 | ZKUtils.createEphemeral(zk, "/" + name + "/handoff-requests/" + workUnit) 428 | } 429 | 430 | 431 | /** 432 | * Verifies that all nodes are hooked up properly. Shuts down any work units 433 | * which have been removed from the cluster or have been assigned to another node. 434 | */ 435 | def verifyIntegrity() { 436 | val noLongerActive = myWorkUnits -- allWorkUnits.keys.toSet 437 | for (workUnit <- noLongerActive) 438 | shutdownWork(workUnit) 439 | 440 | // Check the status of pegged work units to ensure that this node is not serving 441 | // a work unit that is pegged to another node in the cluster. 442 | myWorkUnits.map { workUnit => 443 | val claimPath = workUnitClaimPath(workUnit) 444 | if (!balancingPolicy.isFairGame(workUnit) && !balancingPolicy.isPeggedToMe(workUnit)) { 445 | log.info("Discovered I'm serving a work unit that's now " + 446 | "pegged to someone else. Shutting down %s".format(workUnit)) 447 | shutdownWork(workUnit) 448 | 449 | } else if (workUnitMap.contains(workUnit) && !workUnitMap.get(workUnit).equals(myNodeID) && 450 | !claimedForHandoff.contains(workUnit) && !znodeIsMe(claimPath)) { 451 | log.info("Discovered I'm serving a work unit that's now " + 452 | "claimed by %s according to ZooKeeper. Shutting down %s".format(workUnitMap.get(workUnit), workUnit)) 453 | shutdownWork(workUnit) 454 | } 455 | } 456 | } 457 | 458 | def workUnitClaimPath(workUnit: String) = { 459 | "/%s/claimed-%s/%s".format(name, config.workUnitShortName, workUnit) 460 | } 461 | 462 | 463 | /** 464 | * Starts up a work unit that this node has claimed. 465 | * If "smart rebalancing" is enabled, hand the listener a meter to mark load. 466 | * Otherwise, just call "startWork" on the listener and let the client have at it. 467 | * TODO: Refactor to remove check and cast. 468 | */ 469 | def startWork(workUnit: String, meter: Option[Meter] = None) { 470 | log.info("Successfully claimed %s: %s. Starting...".format(config.workUnitName, workUnit)) 471 | val added = myWorkUnits.add(workUnit) 472 | 473 | if (added) { 474 | if (balancingPolicy.isInstanceOf[MeteredBalancingPolicy]) { 475 | val mbp = balancingPolicy.asInstanceOf[MeteredBalancingPolicy] 476 | val meter = mbp.persistentMeterCache.getOrElseUpdate( 477 | workUnit, metrics.meter(workUnit, "processing")) 478 | mbp.meters.put(workUnit, meter) 479 | listener.asInstanceOf[SmartListener].startWork(workUnit, meter) 480 | } else { 481 | listener.asInstanceOf[ClusterListener].startWork(workUnit) 482 | } 483 | } else { 484 | log.warn("Detected that %s is already a member of my work units; not starting twice!".format(workUnit)) 485 | } 486 | } 487 | 488 | 489 | /** 490 | * Shuts down a work unit by removing the claim in ZK and calling the listener. 491 | */ 492 | def shutdownWork(workUnit: String, doLog: Boolean = true) { 493 | if (doLog) log.info("Shutting down %s: %s...".format(config.workUnitName, workUnit)) 494 | myWorkUnits.remove(workUnit) 495 | claimedForHandoff.remove(workUnit) 496 | balancingPolicy.onShutdownWork(workUnit) 497 | try { 498 | listener.shutdownWork(workUnit) 499 | } finally { 500 | ZKUtils.deleteAtomic(zk, workUnitClaimPath(workUnit), myNodeID) 501 | } 502 | } 503 | 504 | 505 | /** 506 | * Initiates a cluster rebalance. If smart balancing is enabled, the target load 507 | * is set to (total cluster load / node count), where "load" is determined by the 508 | * sum of all work unit meters in the cluster. If smart balancing is disabled, 509 | * the target load is set to (# of work items / node count). 510 | */ 511 | def rebalance() { 512 | if (state.get() == NodeState.Fresh) return 513 | balancingPolicy.rebalance() 514 | } 515 | 516 | 517 | /** 518 | * Given a path, determines whether or not the value of a ZNode is my node ID. 519 | */ 520 | def znodeIsMe(path: String) : Boolean = { 521 | val value = ZKUtils.get(zk, path) 522 | (value != null && value == myNodeID) 523 | } 524 | 525 | /** 526 | * Sets the state of the current Ordasity node and notifies others via ZooKeeper. 527 | */ 528 | def setState(to: NodeState.Value) { 529 | val myInfo = new NodeInfo(to.toString, zk.get().getSessionId) 530 | val encoded = JsonUtils.OBJECT_MAPPER.writeValueAsString(myInfo) 531 | ZKUtils.set(zk, "/" + name + "/nodes/" + myNodeID, encoded) 532 | state.set(to) 533 | } 534 | 535 | 536 | /** 537 | * Determines if another ZooKeeper session is currently active for the current node 538 | * by comparing the ZooKeeper session ID of the connection stored in NodeState. 539 | */ 540 | def previousZKSessionStillActive() : Boolean = { 541 | try { 542 | val data = zk.get().getData("/%s/nodes/%s".format(name, myNodeID), false, null) 543 | val nodeInfo = new NodeInfoDeserializer().apply(data) 544 | nodeInfo.connectionID == zk.get().getSessionId 545 | } catch { 546 | case e: NoNodeException => 547 | false 548 | case e: Exception => 549 | log.error("Encountered unexpected error in checking ZK session status.", e) 550 | false 551 | } 552 | } 553 | 554 | 555 | def getOrElse(map: Map[String, String], key: String, orElse: String) : String = { 556 | val result = map.get(key) 557 | if (result == null) orElse 558 | else result 559 | } 560 | 561 | 562 | def getOrElse(map: Map[String, Double], key: String, orElse: Double) : Double = { 563 | if (map.containsKey(key)) map.get(key) else orElse 564 | } 565 | 566 | def isMe(other: String) : Boolean = { 567 | myNodeID.equals(other) 568 | } 569 | 570 | } 571 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/ClusterConfig.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity 18 | 19 | import java.net.InetAddress 20 | import scala.reflect.BeanProperty 21 | 22 | class ClusterConfig { 23 | 24 | // Defaults 25 | @BeanProperty var hosts = "" 26 | @BeanProperty var enableAutoRebalance = true 27 | @BeanProperty var autoRebalanceInterval = 60 28 | @BeanProperty var drainTime = 60 29 | @BeanProperty var useSmartBalancing = false 30 | @BeanProperty var zkTimeout = 3000 31 | @BeanProperty var workUnitZkChRoot: Option[String] = None 32 | @BeanProperty var workUnitName = "work-units" 33 | @BeanProperty var workUnitShortName = "work" 34 | @BeanProperty var nodeId = InetAddress.getLocalHost.getHostName 35 | @BeanProperty var useSoftHandoff = false 36 | @BeanProperty var handoffShutdownDelay = 10 37 | 38 | } 39 | 40 | object ClusterConfig { 41 | def builder() = new ClusterConfigBuilder(new ClusterConfig) 42 | } 43 | 44 | class ClusterConfigBuilder(config: ClusterConfig) { 45 | def setHosts(hosts: String) : ClusterConfigBuilder = { 46 | config.hosts = hosts 47 | this 48 | } 49 | 50 | def setWorkUnitZkChRoot(root: Option[String]): ClusterConfigBuilder ={ 51 | config.workUnitZkChRoot = root 52 | this 53 | } 54 | 55 | def setEnableAutoRebalance(enableAutoRebalance: Boolean) : ClusterConfigBuilder = { 56 | config.enableAutoRebalance = enableAutoRebalance 57 | this 58 | } 59 | 60 | def setAutoRebalanceInterval(autoRebalanceInterval: Int) : ClusterConfigBuilder = { 61 | config.autoRebalanceInterval = autoRebalanceInterval 62 | this 63 | } 64 | 65 | def setZkTimeout(zkTimeout: Int) : ClusterConfigBuilder = { 66 | config.zkTimeout = zkTimeout 67 | this 68 | } 69 | 70 | def setUseSmartBalancing(useSmartBalancing: Boolean) : ClusterConfigBuilder = { 71 | config.useSmartBalancing = useSmartBalancing 72 | this 73 | } 74 | 75 | def setDrainTime(drainTime: Int) : ClusterConfigBuilder = { 76 | config.drainTime = drainTime 77 | this 78 | } 79 | 80 | def setWorkUnitName(workUnitName: String) : ClusterConfigBuilder = { 81 | config.workUnitName = workUnitName 82 | this 83 | } 84 | 85 | def setWorkUnitShortName(workUnitShortName: String) : ClusterConfigBuilder = { 86 | config.workUnitShortName = workUnitShortName 87 | this 88 | } 89 | 90 | def setNodeId(nodeId: String) : ClusterConfigBuilder = { 91 | config.nodeId = nodeId 92 | this 93 | } 94 | 95 | def setUseSoftHandoff(useSoftHandoff: Boolean) : ClusterConfigBuilder = { 96 | config.useSoftHandoff = useSoftHandoff 97 | this 98 | } 99 | 100 | def setHandoffShutdownDelay(handoffShutdownDelay: Int) : ClusterConfigBuilder = { 101 | config.handoffShutdownDelay = handoffShutdownDelay 102 | this 103 | } 104 | 105 | def build() : ClusterConfig = { 106 | config 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/ClusterListener.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity 18 | 19 | import com.yammer.metrics.scala.Meter 20 | import com.twitter.common.zookeeper.ZooKeeperClient 21 | 22 | abstract class Listener { 23 | def onJoin(client: ZooKeeperClient) 24 | def onLeave() 25 | def shutdownWork(workUnit: String) 26 | } 27 | 28 | abstract class SmartListener extends Listener { 29 | def startWork(workUnit: String, meter: Meter) 30 | } 31 | 32 | abstract class ClusterListener extends Listener { 33 | def startWork(workUnit: String) 34 | } 35 | 36 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/Deserializers.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity 18 | 19 | import com.google.common.base.Function 20 | import com.fasterxml.jackson.databind.node.ObjectNode 21 | import com.fasterxml.jackson.databind.ObjectMapper 22 | import com.fasterxml.jackson.module.scala.DefaultScalaModule 23 | import org.slf4j.LoggerFactory 24 | 25 | /** 26 | * Case class representing the state of a node and its ZooKeeper connection 27 | * ID. Used to differentiate nodes and connection states in the event that a 28 | * node is evicted and comes back up. 29 | */ 30 | case class NodeInfo(state: String, connectionID: Long) 31 | 32 | 33 | /** 34 | * Enum representing the state of an Ordasity node. 35 | * One of: {Fresh, Started, Draining, Shutdown} 36 | */ 37 | object NodeState extends Enumeration { 38 | type NodeState = Value 39 | val Fresh, Started, Draining, Shutdown = Value 40 | 41 | def valueOf(s: String) : Option[NodeState.Value] = { 42 | try { 43 | withName(s) match { 44 | case e: Value => Some(e) 45 | case _ => None 46 | } 47 | } catch { 48 | case e: NoSuchElementException => None 49 | } 50 | } 51 | } 52 | 53 | 54 | /** 55 | * Utility method for converting an array of bytes to a NodeInfo object. 56 | */ 57 | class NodeInfoDeserializer extends Function[Array[Byte], NodeInfo] { 58 | private[this] val log = LoggerFactory.getLogger(getClass) 59 | def apply(bytes: Array[Byte]) : NodeInfo = { 60 | try { 61 | JsonUtils.OBJECT_MAPPER.readValue(bytes, classOf[NodeInfo]) 62 | } catch { 63 | case e: Exception => 64 | val data = if (bytes == null) "" else new String(bytes) 65 | val parsedState = NodeState.valueOf(data).getOrElse(NodeState.Shutdown) 66 | val info = new NodeInfo(parsedState.toString, 0) 67 | log.warn("Saw node data in non-JSON format. Interpreting %s as: %s".format(data, info), e) 68 | info 69 | } 70 | } 71 | } 72 | 73 | /** 74 | * Utility method for converting an array of bytes to a String. 75 | */ 76 | class StringDeserializer extends Function[Array[Byte], String] { 77 | def apply(a: Array[Byte]) : String = { 78 | try { 79 | new String(a) 80 | } catch { 81 | case e: Exception => "" 82 | } 83 | } 84 | } 85 | 86 | object JsonUtils { 87 | val OBJECT_MAPPER = new ObjectMapper() 88 | OBJECT_MAPPER.registerModule(new DefaultScalaModule) 89 | } 90 | 91 | class ObjectNodeDeserializer extends Function[Array[Byte], ObjectNode] { 92 | private[this] val log = LoggerFactory.getLogger(getClass) 93 | 94 | override def apply(input: Array[Byte]): ObjectNode = { 95 | if (input != null && input.length > 0) { 96 | try { 97 | return JsonUtils.OBJECT_MAPPER.readTree(input).asInstanceOf[ObjectNode] 98 | } catch { 99 | case e: Exception => 100 | log.error("Failed to de-serialize ZNode", e) 101 | } 102 | } 103 | JsonUtils.OBJECT_MAPPER.createObjectNode() 104 | } 105 | } 106 | 107 | /** 108 | * Utility method for converting an array of bytes to a Double. 109 | */ 110 | class DoubleDeserializer extends Function[Array[Byte], Double] { 111 | def apply(a: Array[Byte]) : Double = { 112 | try { 113 | new String(a).toDouble 114 | } catch { 115 | case e: Exception => 0d 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/TokenQueue.scala: -------------------------------------------------------------------------------- 1 | package com.boundary.ordasity 2 | 3 | import java.util.concurrent.LinkedBlockingQueue 4 | import com.yammer.metrics.scala.Instrumented 5 | 6 | class ClaimToken() 7 | object ClaimToken { val token = new ClaimToken } 8 | 9 | /** 10 | * Implementation of a BlockingQueue that operates via a "claim token." 11 | * The desired properties for this queue are one that works like a set, collapsing 12 | * duplicate events into a single one. While contains() is an O(N) operation, N will 13 | * equal at most one. An alternative would be an Object.wait() / Object.notify() impl, 14 | * but it seems desirable to stick with j/u/c classes. This implementation will 15 | * also allow for other types of events aside from a ClaimToken, if necessary. 16 | */ 17 | class TokenQueue[ClaimToken] extends LinkedBlockingQueue[ClaimToken] with Instrumented { 18 | val suppressedMeter = metrics.meter("ordasity", "suppressedClaimCycles") 19 | val requestedMeter = metrics.meter("ordasity", "claimCycles") 20 | 21 | override def offer(obj: ClaimToken) : Boolean = { 22 | if (contains(obj)) { 23 | suppressedMeter.mark() 24 | false 25 | } else { 26 | requestedMeter.mark() 27 | super.offer(obj) 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/ZKUtils.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity 18 | 19 | import com.twitter.common.zookeeper.{ZooKeeperUtils, ZooKeeperClient} 20 | 21 | import org.apache.zookeeper.ZooDefs.Ids 22 | import org.apache.zookeeper.KeeperException.{NoNodeException, NodeExistsException} 23 | import org.apache.zookeeper.{Watcher, CreateMode} 24 | import org.apache.zookeeper.data.Stat 25 | import org.slf4j.LoggerFactory 26 | 27 | object ZKUtils { 28 | 29 | val log = LoggerFactory.getLogger(getClass) 30 | 31 | def ensureOrdasityPaths(zk: ZooKeeperClient, name: String, config: ClusterConfig) { 32 | val acl = Ids.OPEN_ACL_UNSAFE 33 | ZooKeeperUtils.ensurePath(zk, acl, "/%s/nodes".format(name)) 34 | ZooKeeperUtils.ensurePath(zk, acl, "%s/%s".format(config.workUnitZkChRoot.getOrElse(""), config.workUnitName)) 35 | ZooKeeperUtils.ensurePath(zk, acl, "/%s/meta/rebalance".format(name)) 36 | ZooKeeperUtils.ensurePath(zk, acl, "/%s/meta/workload".format(name)) 37 | ZooKeeperUtils.ensurePath(zk, acl, "/%s/claimed-%s".format(name, config.workUnitShortName)) 38 | ZooKeeperUtils.ensurePath(zk, acl, "/%s/handoff-requests".format(name)) 39 | ZooKeeperUtils.ensurePath(zk, acl, "/%s/handoff-result".format(name)) 40 | } 41 | 42 | def createEphemeral(zk: ZooKeeperClient, path: String, value: String = "") : Boolean = { 43 | val created = { 44 | try { 45 | zk.get().create(path, value.getBytes, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL) 46 | true 47 | } catch { 48 | case e: NodeExistsException => false 49 | } 50 | } 51 | 52 | created 53 | } 54 | 55 | def delete(zk: ZooKeeperClient, path: String) : Boolean = { 56 | try { 57 | zk.get().delete(path, -1) 58 | true 59 | } catch { 60 | case e: NoNodeException => 61 | log.warn("No ZNode to delete for %s".format(path)) 62 | false 63 | case e: Exception => 64 | log.error("Unexpected error deleting ZK node %s".format(path), e) 65 | false 66 | } 67 | } 68 | 69 | /** 70 | * Attempts to atomically delete the ZNode with the specified path and value. Should be preferred over calling 71 | * delete() if the value is known. 72 | * 73 | * @param zk ZooKeeper client. 74 | * @param path Path to be deleted. 75 | * @param expectedValue The expected value of the ZNode at the specified path. 76 | * @return True if the path was deleted, false otherwise. 77 | */ 78 | def deleteAtomic(zk: ZooKeeperClient, path: String, expectedValue: String) : Boolean = { 79 | val stat = new Stat() 80 | val value = getWithStat(zk, path, Some(stat)) 81 | if (!expectedValue.equals(value)) { 82 | return false 83 | } 84 | try { 85 | zk.get().delete(path, stat.getVersion) 86 | true 87 | } catch { 88 | case e: Exception => 89 | log.error("Failed to delete path %s with expected value %s".format(path, expectedValue), e) 90 | false 91 | } 92 | } 93 | 94 | def set(zk: ZooKeeperClient, path: String, data: String) : Boolean = { 95 | try { 96 | zk.get().setData(path, data.getBytes, -1) 97 | true 98 | } catch { 99 | case e: Exception => 100 | log.error("Error setting %s to %s.".format(path, data), e) 101 | false 102 | } 103 | } 104 | 105 | 106 | def setOrCreate(zk: ZooKeeperClient, path: String, 107 | data: String, mode: CreateMode = CreateMode.EPHEMERAL) { 108 | try { 109 | zk.get().setData(path, data.getBytes, -1) 110 | } catch { 111 | case e: NoNodeException => 112 | zk.get().create(path, data.getBytes, Ids.OPEN_ACL_UNSAFE, mode) 113 | } 114 | } 115 | 116 | def get(zk: ZooKeeperClient, path: String) : String = { 117 | getWithStat(zk, path, None) 118 | } 119 | 120 | def getWithStat(zk: ZooKeeperClient, path: String, stat: Option[Stat]) : String = { 121 | try { 122 | val value = zk.get.getData(path, false, stat.orNull) 123 | new String(value) 124 | } catch { 125 | case e: NoNodeException => 126 | null 127 | case e: Exception => 128 | log.error("Error getting data for ZNode at path %s".format(path), e) 129 | null 130 | } 131 | } 132 | 133 | def exists(zk: ZooKeeperClient, path: String, watcher: Watcher = null) : Option[Stat] = { 134 | try { 135 | Option(zk.get().exists(path, watcher)) 136 | } catch { 137 | case e: InterruptedException => 138 | throw e 139 | case e: Exception => 140 | log.error("Failed to get stat for ZNode at path %s".format(path)) 141 | None 142 | } 143 | } 144 | 145 | } 146 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/balancing/BalancingPolicy.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.balancing 18 | 19 | import org.slf4j.LoggerFactory 20 | 21 | import collection.JavaConversions._ 22 | import com.boundary.ordasity.{ZKUtils, NodeState, ClusterConfig, Cluster} 23 | import com.yammer.metrics.scala.Instrumented 24 | import java.util.{TimerTask, LinkedList} 25 | import java.util.concurrent.{CountDownLatch, TimeUnit} 26 | 27 | /** 28 | * A balancing policy determines how a node in an Ordasity cluster should claim / 29 | * unclaim work and rebalance load about the cluster. Currently, there are two 30 | * implementations: CountBalancingPolicy and MeteredBalancingPolicy. 31 | */ 32 | abstract class BalancingPolicy(cluster: Cluster, config: ClusterConfig) 33 | extends Instrumented { 34 | 35 | val log = LoggerFactory.getLogger(getClass) 36 | 37 | // Implementation required 38 | def claimWork() 39 | def rebalance() 40 | 41 | // Implementation optional 42 | def init() : BalancingPolicy = this 43 | def shutdown() { } 44 | def onConnect() { } 45 | def onShutdownWork(workUnit: String) { } 46 | 47 | def activeNodeSize() : Int = { 48 | cluster.nodes.filter { n => 49 | val (nodeName, nodeInfo) = n 50 | nodeInfo != null && nodeInfo.state == NodeState.Started.toString 51 | }.size 52 | } 53 | 54 | /** 55 | * Returns a set of work units which are unclaimed throughout the cluster. 56 | */ 57 | def getUnclaimed() : Set[String] = cluster.allWorkUnits.synchronized { 58 | cluster.allWorkUnits.keys.toSet -- 59 | cluster.workUnitMap.keys.toSet ++ 60 | cluster.handoffRequests.keySet -- 61 | cluster.handoffResults.keys -- 62 | cluster.myWorkUnits 63 | } 64 | 65 | /** 66 | * Determines whether or not a given work unit is designated "claimable" by this node. 67 | * If the ZNode for this work unit is empty, or contains JSON mapping this node to that 68 | * work unit, it's considered "claimable." 69 | */ 70 | def isFairGame(workUnit: String) : Boolean = { 71 | val workUnitData = cluster.allWorkUnits.get(workUnit) 72 | if (workUnitData == null || workUnitData.size() == 0) 73 | return true 74 | 75 | try { 76 | val pegged = workUnitData.get(cluster.name) 77 | if (pegged == null) { 78 | return true 79 | } 80 | log.debug("Pegged status for %s: %s.".format(workUnit, pegged)) 81 | pegged.asText().equals(cluster.myNodeID) 82 | } catch { 83 | case e: Exception => 84 | log.error("Error parsing mapping for %s: %s".format(workUnit, workUnitData), e) 85 | true 86 | } 87 | } 88 | 89 | 90 | /** 91 | * Determines whether or not a given work unit is pegged to this instance. 92 | */ 93 | def isPeggedToMe(workUnitId: String) : Boolean = { 94 | val zkWorkData = cluster.allWorkUnits.get(workUnitId) 95 | if (zkWorkData == null || zkWorkData.size() == 0) { 96 | cluster.workUnitsPeggedToMe.remove(workUnitId) 97 | return false 98 | } 99 | 100 | try { 101 | val pegged = zkWorkData.get(cluster.name) 102 | val isPegged = pegged != null && pegged.asText().equals(cluster.myNodeID) 103 | 104 | if (isPegged) { 105 | cluster.workUnitsPeggedToMe.add(workUnitId) 106 | } else { 107 | cluster.workUnitsPeggedToMe.remove(workUnitId) 108 | } 109 | 110 | isPegged 111 | } catch { 112 | case e: Exception => 113 | log.error("Error parsing mapping for %s: %s".format(workUnitId, zkWorkData), e) 114 | false 115 | } 116 | } 117 | 118 | /** 119 | * Attempts to claim a given work unit by creating an ephemeral node in ZooKeeper 120 | * with this node's ID. If the claim succeeds, start work. If not, move on. 121 | */ 122 | def attemptToClaim(workUnit: String, claimForHandoff: Boolean = false) : Boolean = { 123 | log.debug("Attempting to claim %s. For handoff? %s".format(workUnit, claimForHandoff)) 124 | 125 | val path = { 126 | if (claimForHandoff) "/%s/handoff-result/%s".format(cluster.name, workUnit) 127 | else cluster.workUnitClaimPath(workUnit) 128 | } 129 | 130 | val created = ZKUtils.createEphemeral(cluster.zk, path, cluster.myNodeID) 131 | 132 | if (created) { 133 | if (claimForHandoff) cluster.claimedForHandoff.add(workUnit) 134 | cluster.startWork(workUnit) 135 | true 136 | } else if (isPeggedToMe(workUnit)) { 137 | claimWorkPeggedToMe(workUnit) 138 | true 139 | } else { 140 | false 141 | } 142 | } 143 | 144 | /** 145 | * Claims a work unit pegged to this node, waiting for the ZNode to become available 146 | * (i.e., deleted by the node which previously owned it). 147 | */ 148 | protected def claimWorkPeggedToMe(workUnit: String) { 149 | val path = cluster.workUnitClaimPath(workUnit) 150 | 151 | while (true) { 152 | if (ZKUtils.createEphemeral(cluster.zk, path, cluster.myNodeID) || cluster.znodeIsMe(path)) { 153 | cluster.startWork(workUnit) 154 | return 155 | } 156 | log.warn("Attempting to establish ownership of %s. Retrying in one second...".format(workUnit)) 157 | Thread.sleep(1000) 158 | } 159 | } 160 | 161 | /** 162 | * Drains this node's share of the cluster workload down to a specific number 163 | * of work units over a period of time specified in the configuration with 164 | * soft handoff if enabled.. 165 | */ 166 | def drainToCount(targetCount: Int, doShutdown: Boolean = false, 167 | useHandoff: Boolean = config.useSoftHandoff, 168 | latch: Option[CountDownLatch] = None) { 169 | val msg = if (useHandoff) " with handoff" else "" 170 | log.info("Draining %s%s. Target count: %s, Current: %s".format( 171 | config.workUnitName, msg, targetCount, cluster.myWorkUnits.size)) 172 | 173 | if (targetCount >= cluster.myWorkUnits.size) { 174 | if (!doShutdown) 175 | return 176 | else if (targetCount == 0 && doShutdown) 177 | cluster.completeShutdown() 178 | } 179 | 180 | val amountToDrain = cluster.myWorkUnits.size - targetCount 181 | 182 | val msgPrefix = if (useHandoff) "Requesting handoff for" else "Shutting down" 183 | log.info("%s %s of %s %s over %s seconds".format( 184 | msgPrefix, amountToDrain, cluster.myWorkUnits.size, config.workUnitName, config.drainTime)) 185 | 186 | // Build a list of work units to hand off. 187 | val toHandOff = new LinkedList[String] 188 | val wuList = new LinkedList[String](cluster.myWorkUnits -- cluster.workUnitsPeggedToMe) 189 | for (i <- (0 to amountToDrain - 1)) 190 | if (wuList.size - 1 >= i) toHandOff.add(wuList(i)) 191 | 192 | val drainInterval = ((config.drainTime.toDouble / toHandOff.size) * 1000).intValue() 193 | 194 | val handoffTask = new TimerTask { 195 | def run() { 196 | if (toHandOff.isEmpty) { 197 | if (targetCount == 0 && doShutdown) { 198 | cluster.completeShutdown() 199 | } 200 | latch.foreach(l => l.countDown()) 201 | return 202 | } else { 203 | val workUnit = toHandOff.poll() 204 | if (useHandoff && !isPeggedToMe(workUnit)) cluster.requestHandoff(workUnit) 205 | else cluster.shutdownWork(workUnit) 206 | } 207 | cluster.pool.get.schedule(this, drainInterval, TimeUnit.MILLISECONDS) 208 | } 209 | } 210 | 211 | log.info("Releasing %s / %s work units over %s seconds: %s".format( 212 | amountToDrain, cluster.myWorkUnits.size, config.drainTime, toHandOff.mkString(", "))) 213 | 214 | if (!cluster.myWorkUnits.isEmpty) 215 | cluster.pool.get.schedule(handoffTask, 0, TimeUnit.SECONDS) 216 | } 217 | 218 | } 219 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/balancing/CountBalancingPolicy.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.balancing 18 | 19 | import collection.JavaConversions._ 20 | import com.boundary.ordasity.{ClusterConfig, Cluster} 21 | 22 | /** 23 | * Ordasity's count-based load balancing policy is simple. A node in the cluster 24 | * will attempt to claim ( work units / nodes + 1) work units. It may 25 | * be initialized with either a simple ClusterListener or a metered SmartListener. 26 | */ 27 | class CountBalancingPolicy(cluster: Cluster, config: ClusterConfig) extends BalancingPolicy(cluster, config) { 28 | 29 | /** 30 | * Claims work in Zookeeper. This method will attempt to divide work about the cluster 31 | * by claiming up to (( Work Unit Count / Nodes) + 1) work units. While 32 | * this doesn't necessarily represent an even load distribution based on work unit load, 33 | * it should result in a relatively even "work unit count" per node. This randomly-distributed 34 | * amount is in addition to any work units which are pegged to this node. 35 | */ 36 | def claimWork() { 37 | var claimed = cluster.myWorkUnits.size 38 | val nodeCount = activeNodeSize() 39 | 40 | cluster.allWorkUnits.synchronized { 41 | val maxToClaim = getMaxToClaim(nodeCount) 42 | 43 | log.debug("%s Nodes: %s. %s: %s.".format(cluster.name, nodeCount, config.workUnitName.capitalize, cluster.allWorkUnits.size)) 44 | log.debug("Claiming %s pegged to me, and up to %s more.".format(config.workUnitName, maxToClaim)) 45 | 46 | val unclaimed = getUnclaimed() 47 | log.debug("Handoff requests: %s, Handoff Results: %s, Unclaimed: %s".format( 48 | cluster.handoffRequests.mkString(", "), cluster.handoffResults.mkString(", "), unclaimed.mkString(", "))) 49 | 50 | for (workUnit <- unclaimed) { 51 | if ((isFairGame(workUnit) && claimed < maxToClaim) || isPeggedToMe(workUnit)) { 52 | if (config.useSoftHandoff && cluster.handoffRequests.contains(workUnit) && attemptToClaim(workUnit, true)) { 53 | log.info("Accepted handoff of %s.".format(workUnit)) 54 | cluster.handoffResultsListener.finishHandoff(workUnit) 55 | claimed += 1 56 | } else if (!cluster.handoffRequests.contains(workUnit) && attemptToClaim(workUnit)) { 57 | claimed += 1 58 | } 59 | } 60 | } 61 | } 62 | } 63 | 64 | /** 65 | * Determines the maximum number of work units the policy should attempt to claim. 66 | */ 67 | def getMaxToClaim(nodeCount: Int) : Int = cluster.allWorkUnits.synchronized { 68 | if (cluster.allWorkUnits.size <= 1) cluster.allWorkUnits.size 69 | else (cluster.allWorkUnits.size / nodeCount.toDouble).ceil.intValue() 70 | } 71 | 72 | 73 | /** 74 | * Performs a simple rebalance. Target load is set to (# of work items / node count). 75 | */ 76 | def rebalance() { 77 | val target = fairShare() 78 | 79 | if (cluster.myWorkUnits.size > target) { 80 | log.info("Simple Rebalance triggered. My Share: %s. Target: %s.".format(cluster.myWorkUnits.size, target)) 81 | super.drainToCount(target) 82 | } 83 | } 84 | 85 | /** 86 | * Determines the fair share of work units this node should claim. 87 | */ 88 | def fairShare() : Int = { 89 | (cluster.allWorkUnits.size.toDouble / activeNodeSize()).ceil.toInt 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/balancing/MeteredBalancingPolicy.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.balancing 18 | 19 | import collection.JavaConversions._ 20 | import overlock.atomicmap.AtomicMap 21 | import com.boundary.ordasity._ 22 | import java.util.concurrent.{TimeUnit, ScheduledFuture} 23 | import com.yammer.metrics.scala.Meter 24 | import java.util.{TimerTask, LinkedList} 25 | import org.apache.zookeeper.CreateMode 26 | 27 | /** 28 | * Ordasity's count-based load balancing policy is simple. A node in the cluster 29 | * will attempt to claim ( work work units / nodes + 1) work units. It may 30 | * be initialized with either a simple ClusterListener or a metered SmartListener. 31 | */ 32 | class MeteredBalancingPolicy(cluster: Cluster, config: ClusterConfig) 33 | extends BalancingPolicy(cluster, config) { 34 | 35 | val meters = AtomicMap.atomicNBHM[String, Meter] 36 | val persistentMeterCache = AtomicMap.atomicNBHM[String, Meter] 37 | val loadGauge = metrics.gauge[Double]("my_load") { myLoad() } 38 | var loadFuture : Option[ScheduledFuture[_]] = None 39 | 40 | override def init() : BalancingPolicy = { 41 | if (!cluster.listener.isInstanceOf[SmartListener]) { 42 | throw new RuntimeException("Ordasity's metered balancing policy must be initialized with " + 43 | "a SmartListener, but you provided a simple listener. Please flip that so we can tick " + 44 | "the meter as your application performs work!") 45 | } 46 | 47 | this 48 | } 49 | 50 | /** 51 | * Begins by claimng all work units that are pegged to this node. 52 | * Then, continues to claim work from the available pool until we've claimed 53 | * equal to or slightly more than the total desired load. 54 | */ 55 | def claimWork() { 56 | cluster.allWorkUnits.synchronized { 57 | for (workUnit <- getUnclaimed()) 58 | if (isPeggedToMe(workUnit)) 59 | claimWorkPeggedToMe(workUnit) 60 | 61 | val unclaimed = new LinkedList[String](getUnclaimed()) 62 | while (myLoad() <= evenDistribution && !unclaimed.isEmpty) { 63 | val workUnit = unclaimed.poll() 64 | 65 | if (config.useSoftHandoff && cluster.handoffRequests.contains(workUnit) 66 | && isFairGame(workUnit) && attemptToClaim(workUnit, claimForHandoff = true)) { 67 | log.info("Accepted handoff for %s.".format(workUnit)) 68 | cluster.handoffResultsListener.finishHandoff(workUnit) 69 | } 70 | 71 | else if (isFairGame(workUnit)) 72 | attemptToClaim(workUnit) 73 | } 74 | } 75 | } 76 | 77 | /** 78 | * Performs a "smart rebalance." The target load is set to (cluster load / node count), 79 | * where "load" is determined by the sum of all work unit meters in the cluster. 80 | */ 81 | def rebalance() { 82 | val target = evenDistribution() 83 | if (myLoad() > target) { 84 | log.info("Smart Rebalance triggered. Load: %s. Target: %s".format(myLoad(), target)) 85 | drainToLoad(target.longValue) 86 | } 87 | } 88 | 89 | 90 | /** 91 | * When smart balancing is enabled, calculates the even distribution of load about 92 | * the cluster. This is determined by the total load divided by the number of alive nodes. 93 | */ 94 | def evenDistribution() : Double = { 95 | cluster.loadMap.values.sum / activeNodeSize().doubleValue() 96 | } 97 | 98 | 99 | /** 100 | * Determines the current load on this instance when smart rebalancing is enabled. 101 | * This load is determined by the sum of all of this node's meters' one minute rate. 102 | */ 103 | def myLoad() : Double = { 104 | var load = 0d 105 | log.debug(cluster.loadMap.toString) 106 | log.debug(cluster.myWorkUnits.toString) 107 | cluster.myWorkUnits.foreach(u => load += cluster.getOrElse(cluster.loadMap, u, 0)) 108 | load 109 | } 110 | 111 | /** 112 | * Once a minute, pass off information about the amount of load generated per 113 | * work unit off to Zookeeper for use in the claiming and rebalancing process. 114 | */ 115 | private def scheduleLoadTicks() { 116 | val sendLoadToZookeeper = new Runnable { 117 | def run() { 118 | try { 119 | meters.foreach { case(workUnit, meter) => 120 | val loadPath = "/%s/meta/workload/%s".format(cluster.name, workUnit) 121 | ZKUtils.setOrCreate(cluster.zk, loadPath, meter.oneMinuteRate.toString, CreateMode.PERSISTENT) 122 | } 123 | 124 | val myInfo = new NodeInfo(cluster.getState.toString, cluster.zk.get().getSessionId) 125 | val nodeLoadPath = "/%s/nodes/%s".format(cluster.name, cluster.myNodeID) 126 | val myInfoEncoded = JsonUtils.OBJECT_MAPPER.writeValueAsString(myInfo) 127 | ZKUtils.setOrCreate(cluster.zk, nodeLoadPath, myInfoEncoded, CreateMode.EPHEMERAL) 128 | 129 | log.info("My load: %s".format(myLoad())) 130 | } catch { 131 | case e: Exception => log.error("Error reporting load info to ZooKeeper.", e) 132 | } 133 | } 134 | } 135 | 136 | loadFuture = Some(cluster.pool.get.scheduleAtFixedRate( 137 | sendLoadToZookeeper, 0, 1, TimeUnit.MINUTES)) 138 | } 139 | 140 | 141 | /** 142 | * Drains excess load on this node down to a fraction distributed across the cluster. 143 | * The target load is set to (clusterLoad / # nodes). 144 | */ 145 | def drainToLoad(targetLoad: Long, time: Int = config.drainTime, 146 | useHandoff: Boolean = config.useSoftHandoff) { 147 | val startingLoad = myLoad() 148 | var currentLoad = myLoad() 149 | val drainList = new LinkedList[String] 150 | val eligibleToDrop = new LinkedList[String](cluster.myWorkUnits -- cluster.workUnitsPeggedToMe) 151 | 152 | while (currentLoad > targetLoad && !eligibleToDrop.isEmpty) { 153 | val workUnit = eligibleToDrop.poll() 154 | var workUnitLoad : Double = cluster.getOrElse(cluster.loadMap, workUnit, 0) 155 | 156 | if (workUnitLoad > 0 && (currentLoad - workUnitLoad) > targetLoad) { 157 | drainList.add(workUnit) 158 | currentLoad -= workUnitLoad 159 | } 160 | } 161 | 162 | val drainInterval = ((config.drainTime.toDouble / drainList.size) * 1000).intValue() 163 | val drainTask = buildDrainTask(drainList, drainInterval, useHandoff, currentLoad) 164 | 165 | if (!drainList.isEmpty) { 166 | log.info("Releasing work units over %s seconds. Current load: %s. Target: %s. Releasing: %s" 167 | .format(time, startingLoad, targetLoad, drainList.mkString(", "))) 168 | cluster.pool.get.schedule(drainTask, 0, TimeUnit.SECONDS) 169 | } 170 | } 171 | 172 | def buildDrainTask(drainList: LinkedList[String], drainInterval: Int, useHandoff: Boolean, 173 | currentLoad: Double) : TimerTask = { 174 | new TimerTask { 175 | def run() { 176 | if (drainList.isEmpty || myLoad <= evenDistribution) { 177 | log.info("Finished the drain list, or my load is now less than an even distribution. " + 178 | "Stopping rebalance. Remaining work units: %s".format(drainList.mkString(", "))) 179 | return 180 | } 181 | else if (useHandoff) 182 | cluster.requestHandoff(drainList.poll) 183 | else 184 | cluster.shutdownWork(drainList.poll) 185 | 186 | cluster.pool.get.schedule(this, drainInterval, TimeUnit.MILLISECONDS) 187 | } 188 | } 189 | } 190 | 191 | override def onConnect() = scheduleLoadTicks() 192 | 193 | override def shutdown() { 194 | if (loadFuture.isDefined) 195 | loadFuture.get.cancel(true) 196 | } 197 | 198 | override def onShutdownWork(workUnit: String) = 199 | meters.remove(workUnit) 200 | 201 | } 202 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/listeners/ClusterNodesChangedListener.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.listeners 18 | 19 | import org.slf4j.LoggerFactory 20 | 21 | import collection.JavaConversions._ 22 | import com.boundary.ordasity.{Cluster, NodeInfo} 23 | import com.twitter.common.zookeeper.ZooKeeperMap 24 | 25 | /** 26 | * As the nodes in an Ordasity cluster come, go, or change state, we must update 27 | * our internal view of the cluster's topology, then claim work and verify the 28 | * integrity of existing mappings as appropriate. 29 | */ 30 | class ClusterNodesChangedListener(cluster: Cluster) 31 | extends ZooKeeperMap.Listener[NodeInfo] { 32 | 33 | val log = LoggerFactory.getLogger(getClass) 34 | def nodeChanged(nodeName: String, data: NodeInfo) { 35 | if (!cluster.initialized.get()) return 36 | 37 | log.info("Nodes: %s".format(cluster.nodes.map(n => n._1).mkString(", "))) 38 | cluster.claimer.requestClaim() 39 | cluster.verifyIntegrity() 40 | } 41 | 42 | def nodeRemoved(nodeName: String) { 43 | if (!cluster.initialized.get()) return 44 | log.info("%s has left the cluster.".format(nodeName)) 45 | cluster.claimer.requestClaim() 46 | cluster.verifyIntegrity() 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/listeners/HandoffResultsListener.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2013, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.listeners 18 | 19 | import com.boundary.ordasity._ 20 | import java.util.concurrent.TimeUnit 21 | import com.twitter.common.zookeeper.ZooKeeperMap 22 | import org.apache.zookeeper.{WatchedEvent, Watcher} 23 | import org.slf4j.LoggerFactory 24 | 25 | /* The HandoffResultsListener keeps track of the handoff state of work units 26 | * around the cluster. As events fire, this listener determines whether or not 27 | * the current node is offering handoff of a work unit or accepting it, and 28 | * managing that lifecycle as appropriate. 29 | */ 30 | class HandoffResultsListener(cluster: Cluster, config: ClusterConfig) 31 | extends ZooKeeperMap.Listener[String] { 32 | 33 | val log = LoggerFactory.getLogger(getClass) 34 | 35 | def nodeChanged(nodeName: String, data: String) = apply(nodeName) 36 | def nodeRemoved(nodeName: String) = apply(nodeName) 37 | 38 | /** 39 | * If I am the node which accepted this handoff, finish the job. 40 | * If I'm the node that requested to hand off this work unit to 41 | * another node, shut it down after seconds. 42 | */ 43 | def apply(workUnit: String) { 44 | if (!cluster.initialized.get()) return 45 | 46 | if (iRequestedHandoff(workUnit)) { 47 | log.info("Handoff of %s to %s completed. Shutting down %s in %s seconds.".format(workUnit, 48 | cluster.getOrElse(cluster.handoffResults, workUnit, "(None)"), workUnit, config.handoffShutdownDelay)) 49 | ZKUtils.delete(cluster.zk, "/%s/handoff-requests/%s".format(cluster.name, workUnit)) 50 | cluster.pool.get.schedule(shutdownAfterHandoff(workUnit), config.handoffShutdownDelay, TimeUnit.SECONDS) 51 | } 52 | } 53 | 54 | /** 55 | * Determines if this Ordasity node requested handoff of a work unit to someone else. 56 | * I have requested handoff of a work unit if it's currently a member of my active set 57 | * and its destination node is another node in the cluster. 58 | */ 59 | def iRequestedHandoff(workUnit: String) : Boolean = { 60 | val destinationNode = cluster.getOrElse(cluster.handoffResults, workUnit, "") 61 | cluster.myWorkUnits.contains(workUnit) && !destinationNode.equals("") && 62 | !cluster.isMe(destinationNode) 63 | } 64 | 65 | /** 66 | * Builds a runnable to shut down a work unit after a configurable delay once handoff 67 | * has completed. If the cluster has been instructed to shut down and the last work unit 68 | * has been handed off, this task also directs this Ordasity instance to shut down. 69 | */ 70 | def shutdownAfterHandoff(workUnit: String) : Runnable = { 71 | new Runnable { 72 | def run() { 73 | log.info("Shutting down %s following handoff to %s.".format( 74 | workUnit, cluster.getOrElse(cluster.handoffResults, workUnit, "(None)"))) 75 | cluster.shutdownWork(workUnit, doLog = false) 76 | 77 | if (cluster.myWorkUnits.size() == 0 && cluster.state.get() == NodeState.Draining) 78 | cluster.shutdown() 79 | } 80 | } 81 | } 82 | 83 | /** 84 | * Completes the process of handing off a work unit from one node to the current one. 85 | * Attempts to establish a final claim to the node handed off to me in ZooKeeper, and 86 | * repeats execution of the task every two seconds until it is complete. 87 | */ 88 | def finishHandoff(workUnit: String) { 89 | log.info("Handoff of %s to me acknowledged. Deleting claim ZNode for %s and waiting for %s to shutdown work." 90 | format(workUnit, workUnit, cluster.getOrElse(cluster.workUnitMap, workUnit, "(None)"))) 91 | 92 | val path = cluster.workUnitClaimPath(workUnit) 93 | val completeHandoff = () => { 94 | try { 95 | log.info("Completing handoff of %s".format(workUnit)) 96 | if (ZKUtils.createEphemeral(cluster.zk, path, cluster.myNodeID) || cluster.znodeIsMe(path)) { 97 | log.info("Handoff of %s to me complete. Peer has shut down work.".format(workUnit)) 98 | } 99 | else { 100 | log.warn("Failed to completed handoff of %s - couldn't create ephemeral node".format(workUnit)) 101 | } 102 | } catch { 103 | case e: Exception => 104 | log.error("Error completing handoff of %s to me."format(workUnit), e) 105 | } finally { 106 | ZKUtils.delete(cluster.zk, "/" + cluster.name + "/handoff-result/" + workUnit) 107 | cluster.claimedForHandoff.remove(workUnit) 108 | } 109 | } 110 | 111 | val stat = ZKUtils.exists(cluster.zk, path, new Watcher { 112 | def process(event: WatchedEvent) { 113 | // Don't really care about the type of event here - call unconditionally to clean up state 114 | completeHandoff() 115 | } 116 | }) 117 | // Unlikely that peer will have already deleted znode, but handle it regardless 118 | if (stat.isEmpty) { 119 | log.warn("Peer already deleted znode of %s".format(workUnit)) 120 | completeHandoff() 121 | } 122 | } 123 | 124 | } 125 | -------------------------------------------------------------------------------- /src/main/scala/com/boundary/ordasity/listeners/VerifyIntegrityListener.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.listeners 18 | 19 | import org.slf4j.LoggerFactory 20 | 21 | import collection.JavaConversions._ 22 | import com.twitter.common.zookeeper.ZooKeeperMap 23 | import com.boundary.ordasity.{ClusterConfig, Cluster} 24 | 25 | /** 26 | * As work units distributed about the cluster change, we must verify the 27 | * integrity of this node's mappings to ensure it matches reality, and attempt 28 | * to claim work if the topology of nodes and work units in the cluster has changed. 29 | */ 30 | class VerifyIntegrityListener[T](cluster: Cluster, config: ClusterConfig) 31 | extends ZooKeeperMap.Listener[T] { 32 | 33 | val log = LoggerFactory.getLogger(getClass) 34 | 35 | def nodeChanged(nodeName: String, data: T) { 36 | if (!cluster.initialized.get()) return 37 | 38 | log.debug(config.workUnitName.capitalize + 39 | " IDs: %s".format(cluster.allWorkUnits.keys.mkString(", "))) 40 | 41 | cluster.claimer.requestClaim() 42 | cluster.verifyIntegrity() 43 | } 44 | 45 | def nodeRemoved(nodeName: String) { 46 | if (!cluster.initialized.get()) return 47 | 48 | cluster.claimer.requestClaim() 49 | cluster.verifyIntegrity() 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/test/scala/com/boundary/ordasity/ClusterConfigSpec.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity 18 | 19 | import org.junit.Test 20 | import java.net.InetAddress 21 | import com.simple.simplespec.Spec 22 | 23 | class ClusterConfigSpec extends Spec { 24 | 25 | class `Test Cluster Config` { 26 | @Test def `test defaults` { 27 | val conf = new ClusterConfig 28 | 29 | conf.hosts.must(be("")) 30 | conf.enableAutoRebalance.must(be(true)) 31 | conf.autoRebalanceInterval.must(be(60)) 32 | conf.drainTime.must(be(60)) 33 | conf.useSmartBalancing.must(be(false)) 34 | conf.zkTimeout.must(be(3000)) 35 | conf.workUnitName.must(be("work-units")) 36 | conf.workUnitShortName.must(be("work")) 37 | conf.nodeId.must(be(InetAddress.getLocalHost.getHostName)) 38 | conf.useSoftHandoff.must(be(false)) 39 | conf.handoffShutdownDelay.must(be(10)) 40 | } 41 | 42 | 43 | @Test def `test mutators` { 44 | ClusterConfig.builder().setHosts("foo").build().hosts.must(be("foo")) 45 | ClusterConfig.builder().setEnableAutoRebalance(false).build().enableAutoRebalance.must(be(false)) 46 | ClusterConfig.builder().setAutoRebalanceInterval(10000).build().autoRebalanceInterval.must(be(10000)) 47 | ClusterConfig.builder().setZkTimeout(333).build().zkTimeout.must(be(333)) 48 | ClusterConfig.builder().setUseSmartBalancing(true).build().useSmartBalancing.must(be(true)) 49 | ClusterConfig.builder().setDrainTime(100).build().drainTime.must(be(100)) 50 | ClusterConfig.builder().setWorkUnitName("tacos").build().workUnitName.must(be("tacos")) 51 | ClusterConfig.builder().setWorkUnitShortName("taquitos").build().workUnitShortName.must(be("taquitos")) 52 | ClusterConfig.builder().setNodeId("skelter").build().nodeId.must(be("skelter")) 53 | ClusterConfig.builder().setUseSoftHandoff(true).build().useSoftHandoff.must(be(true)) 54 | ClusterConfig.builder().setHandoffShutdownDelay(90).build().handoffShutdownDelay.must(be(90)) 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/test/scala/com/boundary/ordasity/ClusterSpec.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity 18 | 19 | import collection.JavaConversions._ 20 | import balancing.{CountBalancingPolicy, BalancingPolicy} 21 | import org.junit.Test 22 | import java.util.{UUID, HashMap} 23 | import org.apache.zookeeper.data.Stat 24 | import org.apache.zookeeper.ZooDefs.Ids 25 | import com.twitter.common.zookeeper.{ZooKeeperMap, ZooKeeperClient} 26 | import org.apache.zookeeper.{Watcher, CreateMode, ZooKeeper} 27 | import com.simple.simplespec.Spec 28 | import com.google.common.base.Charsets 29 | import org.mockito.invocation.InvocationOnMock 30 | import org.apache.zookeeper.KeeperException.NoNodeException 31 | import com.fasterxml.jackson.databind.node.ObjectNode 32 | 33 | class ClusterSpec extends Spec { 34 | 35 | val id = UUID.randomUUID().toString 36 | val config = ClusterConfig.builder(). 37 | setNodeId("testNode"). 38 | setAutoRebalanceInterval(1). 39 | setDrainTime(1). 40 | setHosts("no_existe:2181").build() 41 | 42 | val mockClusterListener = mock[Listener] 43 | val cluster = new Cluster(id, mockClusterListener, config) 44 | 45 | class `Test Cluster` { 46 | 47 | @Test def `previous ZK session still active` { 48 | val nodeInfo = NodeInfo(NodeState.Started.toString, 101L) 49 | 50 | val mockZKClient = mock[ZooKeeper] 51 | mockZKClient.getSessionId.returns(101L) 52 | mockZKClient.getData(equalTo("/%s/nodes/testNode".format(id)), any[Boolean], any[Stat]). 53 | returns(JsonUtils.OBJECT_MAPPER.writeValueAsBytes(nodeInfo)) 54 | 55 | val mockZK = mock[ZooKeeperClient] 56 | mockZK.get().returns(mockZKClient) 57 | 58 | cluster.zk = mockZK 59 | cluster.previousZKSessionStillActive().must(be(true)) 60 | 61 | mockZKClient.getSessionId.returns(102L) 62 | cluster.previousZKSessionStillActive().must(be(false)) 63 | } 64 | 65 | @Test def `setState` { 66 | val nodeInfo = NodeInfo(NodeState.Draining.toString, 101L) 67 | val serialized = JsonUtils.OBJECT_MAPPER.writeValueAsBytes(nodeInfo) 68 | 69 | val mockZKClient = mock[ZooKeeper] 70 | mockZKClient.setData("/%s/nodes/%s".format(id, "testNode"), serialized, -1).returns(mock[Stat]) 71 | mockZKClient.getSessionId.returns(101L) 72 | 73 | val mockZK = mock[ZooKeeperClient] 74 | mockZK.get().returns(mockZKClient) 75 | 76 | cluster.zk = mockZK 77 | cluster.setState(NodeState.Draining) 78 | 79 | verify.one(mockZKClient).setData("/%s/nodes/%s".format(id, "testNode"), serialized, -1) 80 | cluster.state.get().must(be(NodeState.Draining)) 81 | } 82 | 83 | @Test def `zNode is Me` { 84 | val path = "/foo/bar" 85 | val (mockZK, mockZKClient) = getMockZK() 86 | cluster.zk = mockZKClient 87 | 88 | mockZK.getData(equalTo(path), any[Boolean], any[Stat]).returns("testNode".getBytes) 89 | cluster.znodeIsMe(path).must(be(true)) 90 | verify.one(mockZK).getData(equalTo(path), any[Boolean], any[Stat]) 91 | 92 | mockZK.getData(equalTo(path), any[Boolean], any[Stat]).returns("SOME OTHER NODE".getBytes) 93 | cluster.znodeIsMe(path).must(be(false)) 94 | verify.exactly(2)(mockZK).getData(equalTo(path), any[Boolean], any[Stat]) 95 | } 96 | 97 | @Test def `rebalance invokes rebalance (only if not fresh)` { 98 | val policy = mock[BalancingPolicy] 99 | cluster.balancingPolicy = policy 100 | 101 | cluster.state.set(NodeState.Fresh) 102 | cluster.rebalance() 103 | verify.exactly(0)(policy).rebalance() 104 | 105 | cluster.state.set(NodeState.Started) 106 | cluster.rebalance() 107 | verify.one(policy).rebalance() 108 | } 109 | 110 | @Test def `shutdown work` { 111 | val policy = mock[BalancingPolicy] 112 | cluster.balancingPolicy = policy 113 | 114 | val work = "taco" 115 | val path = cluster.workUnitClaimPath(work) 116 | 117 | val (mockZK, mockZKClient) = getMockZK() 118 | cluster.zk = mockZKClient 119 | cluster.allWorkUnits = new HashMap[String, ObjectNode] 120 | 121 | cluster.myWorkUnits.add(work) 122 | cluster.myWorkUnits.contains(work).must(be(true)) 123 | 124 | mockZK.getData(equalTo(path), any[Boolean], any[Stat]).answersWith((inv: InvocationOnMock) => { 125 | inv.getArguments()(2).asInstanceOf[Stat].setVersion(100) 126 | cluster.myNodeID.getBytes(Charsets.UTF_8) 127 | }) 128 | cluster.shutdownWork(work, doLog = false) 129 | verify.one(mockZK).delete(path, 100) 130 | verify.one(policy).onShutdownWork(work) 131 | verify.one(mockClusterListener).shutdownWork(work) 132 | cluster.myWorkUnits.contains(work).must(be(false)) 133 | 134 | // Then, test the case where we do not want the ZNode to be deleted. 135 | val (mockZK2, mockZKClient2) = getMockZK() 136 | mockZK2.getData(equalTo(path), any[Boolean], any[Stat]).returns("othernode".getBytes(Charsets.UTF_8)) 137 | cluster.zk = mockZKClient2 138 | cluster.myWorkUnits.add(work) 139 | cluster.shutdownWork(work, doLog = false) 140 | verify.exactly(0)(mockZK2).delete(equalTo(path), any[Int]) 141 | cluster.myWorkUnits.contains(work).must(be(false)) 142 | } 143 | 144 | @Test def `claim work` { 145 | val policy = mock[BalancingPolicy] 146 | cluster.balancingPolicy = policy 147 | 148 | cluster.state.set(NodeState.Fresh) 149 | cluster.claimWork() 150 | verify.exactly(0)(policy).claimWork() 151 | 152 | cluster.state.set(NodeState.Started) 153 | cluster.connected.set(true) 154 | cluster.claimWork() 155 | verify.one(policy).claimWork() 156 | } 157 | 158 | @Test def `schedule rebalancing` { 159 | val pol = mock[BalancingPolicy] 160 | cluster.balancingPolicy = pol 161 | 162 | cluster.state.set(NodeState.Started) 163 | verify.exactly(0)(pol).rebalance() 164 | 165 | cluster.scheduleRebalancing() 166 | Thread.sleep(1200) 167 | verify.one(pol).rebalance() 168 | 169 | Thread.sleep(1000) 170 | verify.exactly(2)(pol).rebalance() 171 | 172 | cluster.autoRebalanceFuture.get.cancel(true) 173 | cluster.autoRebalanceFuture = None 174 | } 175 | 176 | @Test def `ensure clean startup` { 177 | val pol = mock[BalancingPolicy] 178 | val (mockZK, mockZKClient) = getMockZK() 179 | cluster.zk = mockZKClient 180 | cluster.balancingPolicy = pol 181 | 182 | cluster.myWorkUnits.add("foo") 183 | cluster.claimedForHandoff.add("bar") 184 | cluster.workUnitsPeggedToMe.add("baz") 185 | cluster.state.set(NodeState.Draining) 186 | cluster.allWorkUnits = new HashMap[String, ObjectNode] 187 | 188 | mockZK.getData(equalTo(cluster.workUnitClaimPath("foo")), any[Boolean], any[Stat]) 189 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 190 | 191 | cluster.ensureCleanStartup() 192 | verify.one(pol).shutdown() 193 | 194 | val future = cluster.autoRebalanceFuture 195 | (future.isEmpty || future.get.isCancelled).must(be(true)) 196 | verify.one(mockClusterListener).shutdownWork("foo") 197 | verify.one(mockClusterListener).onLeave() 198 | 199 | cluster.myWorkUnits.isEmpty.must(be(true)) 200 | cluster.claimedForHandoff.isEmpty.must(be(true)) 201 | cluster.workUnitsPeggedToMe.isEmpty.must(be(true)) 202 | cluster.state.get().must(be(NodeState.Fresh)) 203 | } 204 | 205 | @Test def `complete shutdown` { 206 | val (mockZK, mockZKClient) = getMockZK() 207 | cluster.zk = mockZKClient 208 | 209 | cluster.state.set(NodeState.Started) 210 | cluster.completeShutdown() 211 | verify.one(mockZKClient).close() 212 | verify.one(mockClusterListener).onLeave() 213 | cluster.state.get().must(be(NodeState.Shutdown)) 214 | } 215 | 216 | @Test def `request handoff` { 217 | val (mockZK, mockZKClient) = getMockZK() 218 | cluster.zk = mockZKClient 219 | 220 | val workUnit = "burrito" 221 | val path = "/%s/handoff-requests/%s".format(cluster.name, workUnit) 222 | 223 | cluster.requestHandoff(workUnit) 224 | verify.one(mockZK).create(path, "".getBytes, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL) 225 | } 226 | 227 | @Test def `shutdown` { 228 | val pol = new CountBalancingPolicy(cluster, config) 229 | cluster.balancingPolicy = pol 230 | cluster.myWorkUnits.clear() 231 | cluster.allWorkUnits = new HashMap[String, ObjectNode] 232 | 233 | val (_, mockZKClient) = getMockZK() 234 | cluster.zk = mockZKClient 235 | 236 | cluster.state.set(NodeState.Started) 237 | cluster.shutdown() 238 | 239 | // Must cancel the auto rebalance future 240 | val future = cluster.autoRebalanceFuture 241 | (future.isEmpty || future.get.isCancelled).must(be(true)) 242 | 243 | // Ensure that 'completeShutdown' was called, which closes the ZK conn, 244 | // resets the cluster state, and calls the cluster listener's onLeave method. 245 | verify.one(mockZKClient).close() 246 | verify.one(mockClusterListener).onLeave() 247 | cluster.state.get.must(be(NodeState.Shutdown)) 248 | } 249 | 250 | @Test def `force shutdown` { 251 | val (mockZK, mockZKClient) = getMockZK() 252 | cluster.zk = mockZKClient 253 | val pol = mock[BalancingPolicy] 254 | cluster.balancingPolicy = pol 255 | cluster.myWorkUnits.add("foo") 256 | cluster.allWorkUnits = new HashMap[String, ObjectNode] 257 | 258 | mockZK.getData(equalTo(cluster.workUnitClaimPath("foo")), any[Boolean], any[Stat]) 259 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 260 | cluster.forceShutdown() 261 | 262 | // Must cancel the auto rebalance future 263 | val future = cluster.autoRebalanceFuture 264 | (future.isEmpty || future.get.isCancelled).must(be(true)) 265 | 266 | verify.one(pol).shutdown() 267 | cluster.myWorkUnits.isEmpty.must(be(true)) 268 | verify.one(mockClusterListener).onLeave() 269 | } 270 | 271 | @Test def `join cluster` { 272 | val (mockZK, mockZKClient) = getMockZK() 273 | mockZK.getSessionId.returns(101L) 274 | cluster.zk = mockZKClient 275 | val path = "/%s/nodes/%s".format(id, cluster.myNodeID) 276 | val nodeInfo = JsonUtils.OBJECT_MAPPER.writeValueAsBytes(NodeInfo(NodeState.Fresh.toString, 101L)) 277 | 278 | cluster.joinCluster() 279 | verify.one(mockZK).create(path, nodeInfo, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL) 280 | } 281 | 282 | @Test def `register watchers` { 283 | val (mockZK, mockZKClient) = getMockZK() 284 | cluster.zk = mockZKClient 285 | 286 | // Pretend that the paths exist for the ZooKeeperMaps we're creating 287 | mockZK.exists(any[String], any[Watcher]).returns(mock[Stat]) 288 | 289 | cluster.registerWatchers() 290 | 291 | cluster.nodes.isInstanceOf[ZooKeeperMap[String]].must(be(true)) 292 | cluster.allWorkUnits.isInstanceOf[ZooKeeperMap[ObjectNode]].must(be(true)) 293 | cluster.workUnitMap.isInstanceOf[ZooKeeperMap[String]].must(be(true)) 294 | 295 | // Not using soft handoff (TODO: assert ZKMap w/soft handoff on) 296 | cluster.handoffRequests.isInstanceOf[HashMap[String, String]].must(be(true)) 297 | cluster.handoffResults.isInstanceOf[HashMap[String, String]].must(be(true)) 298 | 299 | // TODO: Test loadMap isinstanceof zkmap with smart balancing on. 300 | } 301 | 302 | @Test def `verify integrity` { 303 | val (mockZK, mockZKClient) = getMockZK() 304 | cluster.zk = mockZKClient 305 | cluster.allWorkUnits = new HashMap[String, ObjectNode] 306 | cluster.workUnitMap = new HashMap[String, String] 307 | 308 | val nonexistent = collection.mutable.Set("shut", "me", "down") 309 | val mine = collection.mutable.Set("foo", "dong") 310 | val noLongerMine = collection.mutable.Set("bar", "baz") 311 | val claimedForHandoff = collection.mutable.Set("taco") 312 | 313 | cluster.myWorkUnits.addAll(mine) 314 | cluster.myWorkUnits.addAll(nonexistent) 315 | cluster.myWorkUnits.addAll(noLongerMine) 316 | cluster.myWorkUnits.addAll(claimedForHandoff) 317 | 318 | nonexistent.foreach(el => 319 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath(el)), any[Boolean], any[Stat]) 320 | .throws(new NoNodeException()) 321 | ) 322 | 323 | val workUnitMap = collection.mutable.Map( 324 | "foo" -> "testNode", "bar" -> "bar", "baz" -> "baz", 325 | "dong" -> "testNode", "taco" -> "bong") 326 | 327 | val peg = JsonUtils.OBJECT_MAPPER.createObjectNode() 328 | peg.put(id, "NOTTESTNODE") 329 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath("dong")), any[Boolean], any[Stat]) 330 | .returns("NOTTESTNODE".getBytes(Charsets.UTF_8)) 331 | val allUnits = collection.mutable.Map[String, ObjectNode]() 332 | workUnitMap.keySet.foreach(workUnit => { 333 | if ("dong".equals(workUnit)) { 334 | allUnits.put(workUnit, peg) 335 | } else { 336 | allUnits.put(workUnit, JsonUtils.OBJECT_MAPPER.createObjectNode()) 337 | } 338 | }) 339 | 340 | cluster.allWorkUnits.putAll(allUnits) 341 | cluster.workUnitMap.putAll(workUnitMap) 342 | cluster.claimedForHandoff.addAll(claimedForHandoff) 343 | 344 | nonexistent.foreach(node => 345 | mockZK.getData(equalTo(cluster.workUnitClaimPath(node)), any[Boolean], any[Stat]).throws(new NoNodeException()) 346 | ) 347 | 348 | mockZK.getData(equalTo(cluster.workUnitClaimPath("bar")), any[Boolean], any[Stat]).returns("testNode".getBytes) 349 | mockZK.getData(equalTo(cluster.workUnitClaimPath("baz")), any[Boolean], any[Stat]).returns("someoneElse".getBytes) 350 | 351 | cluster.verifyIntegrity() 352 | 353 | // Should shut down {shut, me, down} because they've been removed from the cluster. 354 | // Should leave {bar} active because it's marked as served by me in ZK despite the ZK 355 | // map not yet reflecting this node's claim of the organization. 356 | // Should shut down {baz} because it's now being served by someone else. 357 | // Should shut down {dong} because it has been pegged to someone else. 358 | // Should leave {foo} active because it's currently served by me. 359 | // Should leave {taco} active because I have claimed it for handoff. 360 | List("shut", "me", "down", "baz", "dong").foreach { workUnit => 361 | cluster.myWorkUnits.contains(workUnit).must(be(false)) 362 | } 363 | 364 | List("foo", "taco", "bar").foreach { workUnit => 365 | cluster.myWorkUnits.contains(workUnit).must(be(true)) 366 | } 367 | } 368 | 369 | @Test def `on connect after already started` { 370 | val (mockZK, mockZKClient) = getMockZK() 371 | cluster.zk = mockZKClient 372 | cluster.state.set(NodeState.Started) 373 | 374 | // Ensure that previousZKSessionStillActive() returns true 375 | val nodeInfo = NodeInfo(NodeState.Started.toString, 101L) 376 | mockZK.getSessionId.returns(101L) 377 | mockZK.getData(equalTo("/%s/nodes/testNode".format(id)), any[Boolean], any[Stat]). 378 | returns(JsonUtils.OBJECT_MAPPER.writeValueAsBytes(nodeInfo)) 379 | 380 | cluster.onConnect() 381 | 382 | // No attempts to create paths etc. should be made, and the method should 383 | // short-circuit / exit early. We can verify this by ensuring that the ZK 384 | // client was only touched twice. 385 | verify.exactly(2)(mockZKClient).get() 386 | } 387 | 388 | @Test def `on connect and started, but unclean shutdown` { 389 | val (mockZK, mockZKClient) = getMockZK() 390 | cluster.zk = mockZKClient 391 | cluster.state.set(NodeState.Started) 392 | cluster.allWorkUnits = new HashMap[String, ObjectNode] 393 | 394 | // Ensure that previousZKSessionStillActive() returns false 395 | val nodeInfo = NodeInfo(NodeState.Started.toString, 102L) 396 | mockZK.getSessionId.returns(101L) 397 | mockZK.getData(equalTo("/%s/nodes/testNode".format(id)), any[Boolean], any[Stat]). 398 | returns(JsonUtils.OBJECT_MAPPER.writeValueAsBytes(nodeInfo)) 399 | 400 | // Pretend that the paths exist for the ZooKeeperMaps we're creating 401 | mockZK.exists(any[String], any[Watcher]).returns(mock[Stat]) 402 | 403 | // Ensure that on an unclean startup, the "ensureCleanStartup" method is 404 | // called, which clears out existing work units among other things. 405 | cluster.myWorkUnits.add("foo") 406 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath("foo")), any[Boolean], any[Stat]) 407 | .throws(new NoNodeException()) 408 | cluster.onConnect() 409 | cluster.myWorkUnits.isEmpty.must(be(true)) 410 | } 411 | 412 | @Test def `on connect - standard fresh launch` { 413 | val (mockZK, mockZKClient) = getMockZK() 414 | cluster.zk = mockZKClient 415 | cluster.watchesRegistered.set(false) 416 | 417 | val policy = mock[BalancingPolicy] 418 | cluster.balancingPolicy = policy 419 | 420 | cluster.state.set(NodeState.Fresh) 421 | 422 | // Pretend that the paths exist for the ZooKeeperMaps we're creating 423 | mockZK.exists(any[String], any[Watcher]).returns(mock[Stat]) 424 | 425 | cluster.onConnect() 426 | 427 | verify.one(mockClusterListener).onJoin(any) 428 | verify.one(policy).onConnect() 429 | cluster.state.get().must(be(NodeState.Started)) 430 | cluster.watchesRegistered.set(true) 431 | } 432 | 433 | @Test def `connect` { 434 | val (mockZK, mockZKClient) = getMockZK() 435 | val policy = mock[BalancingPolicy] 436 | cluster.balancingPolicy = policy 437 | 438 | // Pretend that the paths exist for the ZooKeeperMaps we're creating 439 | mockZK.exists(any[String], any[Watcher]).returns(mock[Stat]) 440 | 441 | cluster.connect(Some(mockZKClient)) 442 | 443 | // Apply same verifications as onConnect, as all of these should be called. 444 | //verify.one(mockClusterListener).onJoin(any) 445 | //verify.one(policy).onConnect() 446 | //cluster.state.get().must(be(NodeState.Started)) 447 | //cluster.watchesRegistered.set(true) 448 | } 449 | } 450 | 451 | @Test def `join` { 452 | val (mockZK, mockZKClient) = getMockZK() 453 | cluster.zk = mockZKClient 454 | 455 | val policy = mock[BalancingPolicy] 456 | cluster.balancingPolicy = policy 457 | 458 | // Should no-op if draining. 459 | cluster.setState(NodeState.Draining) 460 | cluster.join().must(be(NodeState.Draining.toString)) 461 | verify.exactly(0)(mockZKClient).get() 462 | 463 | // Should no-op if started. 464 | cluster.setState(NodeState.Started) 465 | cluster.join().must(be(NodeState.Started.toString)) 466 | verify.exactly(0)(mockZKClient).get() 467 | 468 | // Pretend that the paths exist for the ZooKeeperMaps we're creating 469 | mockZK.exists(any[String], any[Watcher]).returns(mock[Stat]) 470 | 471 | cluster.setState(NodeState.Fresh) 472 | cluster.join().must(be(NodeState.Started.toString)) 473 | 474 | // Apply same verifications as connect, as all of these should be called. 475 | verify.one(mockClusterListener).onJoin(any) 476 | verify.one(policy).onConnect() 477 | cluster.state.get().must(be(NodeState.Started)) 478 | cluster.watchesRegistered.set(true) 479 | } 480 | 481 | 482 | @Test def `join after shutdown` { 483 | val (mockZK, mockZKClient) = getMockZK() 484 | cluster.zk = mockZKClient 485 | 486 | val policy = mock[BalancingPolicy] 487 | cluster.balancingPolicy = policy 488 | 489 | // Pretend that the paths exist for the ZooKeeperMaps we're creating 490 | mockZK.exists(any[String], any[Watcher]).returns(mock[Stat]) 491 | 492 | cluster.setState(NodeState.Shutdown) 493 | cluster.join().must(be(NodeState.Started.toString)) 494 | 495 | // Apply same verifications as connect, as all of these should be called. 496 | verify.one(mockClusterListener).onJoin(any) 497 | verify.one(policy).onConnect() 498 | cluster.state.get().must(be(NodeState.Started)) 499 | cluster.watchesRegistered.set(true) 500 | } 501 | 502 | 503 | @Test def `cluster constructor` { 504 | val cluster = new Cluster("foo", mockClusterListener, config) 505 | cluster.name.must(be("foo")) 506 | cluster.listener.must(be(mockClusterListener)) 507 | } 508 | 509 | 510 | @Test def `getOrElse String` { 511 | val foo = new HashMap[String, String] 512 | foo.put("foo", "bar") 513 | 514 | cluster.getOrElse(foo, "foo", "taco").must(be("bar")) 515 | cluster.getOrElse(foo, "bar", "taco").must(be("taco")) 516 | } 517 | 518 | @Test def `getOrElse Double` { 519 | val foo = new HashMap[String, Double] 520 | foo.put("foo", 0.01d) 521 | cluster.getOrElse(foo, "foo", 0.02d).must(be(0.01d)) 522 | cluster.getOrElse(foo, "bar", 0.02d).must(be(0.02d)) 523 | } 524 | 525 | def getMockZK() : (ZooKeeper, ZooKeeperClient) = { 526 | val mockZK = mock[ZooKeeper] 527 | val mockZKClient = mock[ZooKeeperClient] 528 | mockZKClient.get().returns(mockZK) 529 | (mockZK, mockZKClient) 530 | } 531 | 532 | } 533 | 534 | -------------------------------------------------------------------------------- /src/test/scala/com/boundary/ordasity/Deserializers.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity 18 | 19 | import org.junit.Test 20 | import com.simple.simplespec.Spec 21 | 22 | class DeserializersSpec extends Spec { 23 | 24 | class `Test Deserializers` { 25 | 26 | @Test def `nodeState` { 27 | NodeState.valueOf("Fresh").must(be(Some(NodeState.Fresh))) 28 | NodeState.valueOf("Started").must(be(Some(NodeState.Started))) 29 | NodeState.valueOf("Draining").must(be(Some(NodeState.Draining))) 30 | NodeState.valueOf("Shutdown").must(be(Some(NodeState.Shutdown))) 31 | NodeState.valueOf("taco").must(be(None)) 32 | } 33 | 34 | @Test def `nodeinfo case class` { 35 | val info = NodeInfo("foo", 101L) 36 | info.state.must(be("foo")) 37 | info.connectionID.must(be(101L)) 38 | } 39 | 40 | @Test def `node info deserializer` { 41 | val deser = new NodeInfoDeserializer 42 | 43 | val valid = NodeInfo("foo", 101L) 44 | val bytes = JsonUtils.OBJECT_MAPPER.writeValueAsBytes(valid) 45 | 46 | deser.apply(bytes).must(be(valid)) 47 | deser.apply(null).must(be(NodeInfo(NodeState.Shutdown.toString, 0))) 48 | } 49 | 50 | 51 | @Test def `string deserializer` { 52 | val deser = new StringDeserializer 53 | deser.apply("foo".getBytes).must(be("foo")) 54 | deser.apply(null).must(be("")) 55 | } 56 | 57 | @Test def `double deserializer` { 58 | val deser = new DoubleDeserializer 59 | deser.apply("0.151".getBytes).must(be(0.151)) 60 | deser.apply(null).must(be(0d)) 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/test/scala/com/boundary/ordasity/ZKUtilsSpec.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity 18 | 19 | import org.junit.Test 20 | import com.twitter.common.zookeeper.ZooKeeperClient 21 | import org.apache.zookeeper.ZooDefs.Ids 22 | import org.apache.zookeeper.{CreateMode, ZooKeeper} 23 | import org.apache.zookeeper.KeeperException.NoNodeException 24 | import com.simple.simplespec.Spec 25 | import org.apache.zookeeper.data.Stat 26 | 27 | class ZKUtilsSpec extends Spec { 28 | 29 | class `Test ZK Utils` { 30 | 31 | @Test def `test ensure ordasity paths` { 32 | val (mockZK, mockZKClient) = getMockZK() 33 | val clusterName = "foo" 34 | val unitName = "organizations" 35 | val unitShortName = "orgs" 36 | val roots : List[Option[String]] = List(None, Some("/root")) 37 | 38 | for (root <- roots) { 39 | val config = ClusterConfig.builder() 40 | .setWorkUnitName(unitName) 41 | .setWorkUnitShortName(unitShortName) 42 | .setWorkUnitZkChRoot(root) 43 | .build() 44 | 45 | ZKUtils.ensureOrdasityPaths(mockZKClient, clusterName, config) 46 | 47 | val paths = List( 48 | "/%s".format(clusterName), 49 | "/%s/nodes".format(clusterName), 50 | "%s/%s".format(root.getOrElse(""), unitName), 51 | "/%s/meta/rebalance".format(clusterName), 52 | "/%s/claimed-%s".format(clusterName, unitShortName), 53 | "/%s/handoff-requests".format(clusterName), 54 | "/%s/handoff-result".format(clusterName) 55 | ) 56 | 57 | paths.foreach(path => 58 | verify.atLeastOne(mockZK).create(path, null, 59 | Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT) 60 | ) 61 | } 62 | } 63 | 64 | @Test def `test create ephemeral node` { 65 | val (mockZK, mockZKClient) = getMockZK() 66 | val path = "/foo" 67 | val data = "data" 68 | 69 | ZKUtils.createEphemeral(mockZKClient, path, data).must(be(true)) 70 | verify.atLeastOne(mockZK).create(path, data.getBytes, 71 | Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL) 72 | } 73 | 74 | @Test def `delete znode` { 75 | val (mockZK, mockZKClient) = getMockZK() 76 | val path = "/delete_me" 77 | ZKUtils.delete(mockZKClient, path).must(be(true)) 78 | verify.atLeastOne(mockZK).delete(path, -1) 79 | } 80 | 81 | @Test def `set znode to value` { 82 | val (mockZK, mockZKClient) = getMockZK() 83 | val path = "/set_me" 84 | val data = "to this" 85 | ZKUtils.set(mockZKClient, path, data).must(be(true)) 86 | verify.atLeastOne(mockZK).setData(path, data.getBytes, -1) 87 | } 88 | 89 | @Test def `set or create` { 90 | val (mockZK, mockZKClient) = getMockZK() 91 | val path = "/set_me" 92 | val data = "to this" 93 | ZKUtils.setOrCreate(mockZKClient, path, data).must(be(true)) 94 | verify.atLeastOne(mockZK).setData(path, data.getBytes, -1) 95 | } 96 | 97 | @Test def `set or *create*` { 98 | val (mockZK, mockZKClient) = getMockZK() 99 | val path = "/set_me" 100 | val data = "to this" 101 | 102 | mockZK.setData(path, data.getBytes, -1).throws(new NoNodeException()) 103 | 104 | ZKUtils.setOrCreate(mockZKClient, path, data).must(be(true)) 105 | verify.atLeastOne(mockZK).setData(path, data.getBytes, -1) 106 | verify.atLeastOne(mockZK).create(path, data.getBytes, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL) 107 | } 108 | 109 | @Test def `test get` { 110 | val (mockZK, mockZKClient) = getMockZK() 111 | val path = "/foo" 112 | val data = "ohai" 113 | mockZK.getData(equalTo(path), any[Boolean], any[Stat]).returns(data.getBytes) 114 | 115 | ZKUtils.get(mockZKClient, path).must(be(data)) 116 | } 117 | 118 | 119 | def getMockZK() : (ZooKeeper, ZooKeeperClient) = { 120 | val mockZK = mock[ZooKeeper] 121 | val mockZKClient = mock[ZooKeeperClient] 122 | mockZKClient.get().returns(mockZK) 123 | (mockZK, mockZKClient) 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/test/scala/com/boundary/ordasity/balancing/BalancingPolicySpec.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.balancing 18 | 19 | import java.util.concurrent.{TimeUnit, CountDownLatch} 20 | 21 | import org.junit.Test 22 | import com.boundary.ordasity._ 23 | import java.util.{HashMap, UUID} 24 | import collection.JavaConversions._ 25 | import org.apache.zookeeper.ZooKeeper 26 | import com.twitter.common.zookeeper.ZooKeeperClient 27 | import org.mockito.Mockito 28 | import org.apache.zookeeper.KeeperException.NodeExistsException 29 | import com.simple.simplespec.Spec 30 | import org.apache.zookeeper.data.Stat 31 | import com.google.common.base.Charsets 32 | import com.fasterxml.jackson.databind.node.ObjectNode 33 | 34 | 35 | class DummyBalancingPolicy(cluster: Cluster, config: ClusterConfig) 36 | extends BalancingPolicy(cluster, config) { 37 | def claimWork() = null 38 | def rebalance() = null 39 | } 40 | 41 | class BalancingPolicySpec extends Spec { 42 | 43 | val config = ClusterConfig.builder(). 44 | setNodeId("testNode"). 45 | setAutoRebalanceInterval(1). 46 | setDrainTime(1). 47 | setHosts("no_existe:2181"). 48 | setEnableAutoRebalance(false).build() 49 | 50 | class `Base Balancing Policy Tests` { 51 | 52 | @Test def `active node size` { 53 | val cluster = makeCluster() 54 | val balancer = new DummyBalancingPolicy(cluster, config) 55 | 56 | cluster.nodes.put("foo", NodeInfo(NodeState.Fresh.toString, 0L)) 57 | cluster.nodes.put("bar", NodeInfo(NodeState.Shutdown.toString, 0L)) 58 | cluster.nodes.put("baz", NodeInfo(NodeState.Started.toString, 0L)) 59 | cluster.nodes.put("taco", NodeInfo(NodeState.Draining.toString, 0L)) 60 | cluster.nodes.put("nacho", NodeInfo("how did i get here?", 0L)) 61 | 62 | balancer.activeNodeSize().must(be(1)) 63 | } 64 | 65 | @Test def `is fair game` { 66 | val cluster = makeCluster() 67 | val balancer = new DummyBalancingPolicy(cluster, config) 68 | 69 | cluster.allWorkUnits.put("vanilla", JsonUtils.OBJECT_MAPPER.createObjectNode()) 70 | val peggedToMe = JsonUtils.OBJECT_MAPPER.createObjectNode() 71 | peggedToMe.put(cluster.name, cluster.myNodeID) 72 | cluster.allWorkUnits.put("peggedToMe", peggedToMe) 73 | val peggedToOther = JsonUtils.OBJECT_MAPPER.createObjectNode() 74 | peggedToOther.put(cluster.name, "otherNode") 75 | cluster.allWorkUnits.put("peggedToOther", peggedToOther) 76 | 77 | balancer.isFairGame("vanilla").must(be(true)) 78 | balancer.isFairGame("peggedToMe").must(be(true)) 79 | balancer.isFairGame("peggedToOther").must(be(false)) 80 | } 81 | 82 | @Test def `is pegged to me` { 83 | val cluster = makeCluster() 84 | val balancer = new DummyBalancingPolicy(cluster, config) 85 | 86 | cluster.allWorkUnits.put("vanilla", JsonUtils.OBJECT_MAPPER.createObjectNode()) 87 | val peggedToMe = JsonUtils.OBJECT_MAPPER.createObjectNode() 88 | peggedToMe.put(cluster.name, cluster.myNodeID) 89 | cluster.allWorkUnits.put("peggedToMe", peggedToMe) 90 | val peggedToOther = JsonUtils.OBJECT_MAPPER.createObjectNode() 91 | peggedToOther.put(cluster.name, "otherNode") 92 | cluster.allWorkUnits.put("peggedToOther", peggedToOther) 93 | 94 | balancer.isPeggedToMe("vanilla").must(be(false)) 95 | balancer.isPeggedToMe("bean").must(be(false)) 96 | balancer.isPeggedToMe("peggedToMe").must(be(true)) 97 | balancer.isPeggedToMe("peggedToOther").must(be(false)) 98 | } 99 | 100 | @Test def `attempt to claim` { 101 | val cluster = makeCluster() 102 | val balancer = new DummyBalancingPolicy(cluster, config) 103 | 104 | Mockito.when(cluster.zk.get().create(any, any, any, any)). 105 | thenReturn(""). 106 | thenThrow(new NodeExistsException) 107 | 108 | balancer.attemptToClaim("taco") 109 | cluster.myWorkUnits.contains("taco").must(be(true)) 110 | 111 | balancer.attemptToClaim("fajita") 112 | cluster.myWorkUnits.contains("fajita").must(be(false)) 113 | } 114 | 115 | @Test def `claim work pegged to me` { 116 | val cluster = makeCluster() 117 | val balancer = new DummyBalancingPolicy(cluster, config) 118 | val peggedToMe = JsonUtils.OBJECT_MAPPER.createObjectNode() 119 | peggedToMe.put(cluster.name, cluster.myNodeID) 120 | cluster.allWorkUnits.put("peggedToMe", peggedToMe) 121 | 122 | Mockito.when(cluster.zk.get().create(any, any, any, any)). 123 | thenThrow(new NodeExistsException). 124 | thenReturn("") 125 | 126 | balancer.attemptToClaim("peggedToMe") 127 | cluster.myWorkUnits.contains("peggedToMe").must(be(true)) 128 | } 129 | 130 | @Test def `drain to count` { 131 | val cluster = makeCluster() 132 | val balancer = new DummyBalancingPolicy(cluster, config) 133 | 134 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven") 135 | cluster.myWorkUnits.addAll(workUnits) 136 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 137 | workUnits.foreach(el => cluster.workUnitMap.put(el, "testNode")) 138 | workUnits.foreach(el => 139 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath(el)), any[Boolean], any[Stat]) 140 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 141 | ) 142 | 143 | drainAndWait(balancer, 0) 144 | 145 | cluster.myWorkUnits.size().must(be(0)) 146 | cluster.state.get().must(be(NodeState.Started)) 147 | } 148 | 149 | @Test def `drain to count, ensuring that work units pegged to the node are not shut down` { 150 | val cluster = makeCluster() 151 | val balancer = new DummyBalancingPolicy(cluster, config) 152 | 153 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven") 154 | cluster.myWorkUnits.addAll(workUnits) 155 | cluster.workUnitsPeggedToMe.add("two") 156 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 157 | workUnits.foreach(el => cluster.workUnitMap.put(el, "testNode")) 158 | workUnits.foreach(el => 159 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath(el)), any[Boolean], any[Stat]) 160 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 161 | ) 162 | 163 | drainAndWait(balancer, 0) 164 | 165 | cluster.myWorkUnits.size().must(be(1)) 166 | cluster.state.get().must(be(NodeState.Started)) 167 | } 168 | 169 | @Test def `drain to zero with shutdown, ensuring that work units pegged to the node are shut down` { 170 | val cluster = makeCluster() 171 | val balancer = new DummyBalancingPolicy(cluster, config) 172 | 173 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven") 174 | cluster.myWorkUnits.addAll(workUnits) 175 | cluster.workUnitsPeggedToMe.add("two") 176 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 177 | workUnits.foreach(el => cluster.workUnitMap.put(el, "testNode")) 178 | workUnits.foreach(el => 179 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath(el)), any[Boolean], any[Stat]) 180 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 181 | ) 182 | 183 | drainAndWait(balancer, 0, doShutdown = true) 184 | 185 | cluster.myWorkUnits.size().must(be(0)) 186 | cluster.state.get().must(be(NodeState.Shutdown)) 187 | } 188 | 189 | 190 | @Test def `drain to count and shutdown` { 191 | val cluster = makeCluster() 192 | val balancer = new DummyBalancingPolicy(cluster, config) 193 | 194 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven") 195 | cluster.myWorkUnits.addAll(workUnits) 196 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 197 | workUnits.foreach(el => cluster.workUnitMap.put(el, "testNode")) 198 | workUnits.foreach(el => 199 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath(el)), any[Boolean], any[Stat]) 200 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 201 | ) 202 | 203 | drainAndWait(balancer, 0, doShutdown = true) 204 | 205 | cluster.myWorkUnits.size().must(be(0)) 206 | cluster.state.get().must(be(NodeState.Shutdown)) 207 | } 208 | 209 | @Test def `drain to count with handoff` { 210 | val cluster = makeCluster() 211 | val balancer = new DummyBalancingPolicy(cluster, config) 212 | 213 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven") 214 | cluster.myWorkUnits.addAll(workUnits) 215 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 216 | workUnits.foreach(el => cluster.workUnitMap.put(el, "testNode")) 217 | workUnits.foreach(el => 218 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath(el)), any[Boolean], any[Stat]) 219 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 220 | ) 221 | 222 | cluster.zk.get().create(any, any, any, any).returns("") 223 | drainAndWait(balancer, 3, useHandoff = true) 224 | verify.exactly(4)(cluster.zk.get()).create(any, any, any, any) 225 | verify.exactly(4)(cluster.zk.get()).create(any, any, any, any) 226 | cluster.state.get().must(be(NodeState.Started)) 227 | } 228 | 229 | @Test def `get unclaimed` { 230 | val cluster = makeCluster() 231 | val balancer = new CountBalancingPolicy(cluster, config) 232 | 233 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven", "eight") 234 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 235 | cluster.myWorkUnits.add("eight") 236 | 237 | List("one", "two").foreach(el => cluster.workUnitMap.put(el, "")) 238 | List("three", "four").foreach(el => cluster.handoffRequests.put(el, "")) 239 | List("five", "six").foreach(el => cluster.handoffResults.put(el, "")) 240 | 241 | balancer.getUnclaimed().must(be(Set("three", "four", "seven"))) 242 | } 243 | } 244 | 245 | def drainAndWait(balancer:DummyBalancingPolicy, 246 | targetCount: Int, doShutdown: Boolean = false, 247 | useHandoff: Boolean = config.useSoftHandoff): Unit = { 248 | 249 | val latch = new CountDownLatch(1); 250 | balancer.drainToCount(targetCount, doShutdown = doShutdown, useHandoff = useHandoff, latch = Some(latch)) 251 | latch.await(5, TimeUnit.SECONDS) 252 | } 253 | 254 | def makeCluster() : Cluster = { 255 | val cluster = new Cluster(UUID.randomUUID.toString, mock[ClusterListener], config) 256 | 257 | val mockZK = mock[ZooKeeper] 258 | val mockZKClient = mock[ZooKeeperClient] 259 | mockZKClient.get().returns(mockZK) 260 | cluster.zk = mockZKClient 261 | 262 | cluster.state.set(NodeState.Started) 263 | cluster.nodes = new HashMap[String, NodeInfo] 264 | cluster.allWorkUnits = new HashMap[String, ObjectNode] 265 | cluster.workUnitMap = new HashMap[String, String] 266 | cluster.handoffRequests = new HashMap[String, String] 267 | cluster.handoffResults = new HashMap[String, String] 268 | cluster.nodes.put("foo", NodeInfo(NodeState.Started.toString, 0L)) 269 | cluster.nodes.put("bar", NodeInfo(NodeState.Started.toString, 0L)) 270 | cluster.nodes.put("baz", NodeInfo(NodeState.Draining.toString, 0L)) 271 | cluster 272 | } 273 | 274 | } 275 | -------------------------------------------------------------------------------- /src/test/scala/com/boundary/ordasity/balancing/CountBalancingPolicySpec.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.balancing 18 | 19 | import org.junit.Test 20 | import com.boundary.ordasity._ 21 | import java.util.{HashMap, UUID} 22 | import collection.JavaConversions._ 23 | import org.apache.zookeeper.ZooKeeper 24 | import com.twitter.common.zookeeper.ZooKeeperClient 25 | import com.simple.simplespec.Spec 26 | import org.apache.zookeeper.data.Stat 27 | import com.google.common.base.Charsets 28 | import com.fasterxml.jackson.databind.node.ObjectNode 29 | 30 | class CountBalancingPolicySpec extends Spec { 31 | 32 | val config = ClusterConfig.builder(). 33 | setNodeId("testNode"). 34 | setAutoRebalanceInterval(1). 35 | setDrainTime(1). 36 | setHosts("no_existe:2181"). 37 | setEnableAutoRebalance(false).build() 38 | 39 | class `Count Balancing Policy` { 40 | 41 | @Test def `fair share` { 42 | val cluster = makeCluster() 43 | val balancer = new CountBalancingPolicy(cluster, config) 44 | 45 | List("one", "two", "three", "four", "five", "six", "seven").foreach(el => 46 | cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 47 | 48 | balancer.activeNodeSize().must(be(2)) 49 | balancer.fairShare().must(be(4)) 50 | } 51 | 52 | 53 | @Test def `rebalance if i'm overloaded` { 54 | val cluster = makeCluster() 55 | val balancer = new CountBalancingPolicy(cluster, config) 56 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven") 57 | 58 | cluster.myWorkUnits.addAll(workUnits) 59 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 60 | workUnits.foreach(el => cluster.workUnitMap.put(el, "testNode")) 61 | workUnits.foreach(el => 62 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath(el)), any[Boolean], any[Stat]) 63 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 64 | ) 65 | 66 | balancer.rebalance() 67 | 68 | Thread.sleep(1100) 69 | cluster.myWorkUnits.size().must(be(4)) 70 | } 71 | 72 | 73 | @Test def `chill out if things are tite` { 74 | val cluster = makeCluster() 75 | val balancer = new CountBalancingPolicy(cluster, config) 76 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven") 77 | 78 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 79 | cluster.myWorkUnits.add("foo") 80 | cluster.workUnitMap.put("foo", "testNode") 81 | 82 | balancer.rebalance() 83 | 84 | Thread.sleep(1100) 85 | cluster.myWorkUnits.size().must(be(1)) 86 | } 87 | 88 | 89 | @Test def `get max to claim` { 90 | val cluster = makeCluster() 91 | val balancer = new CountBalancingPolicy(cluster, config) 92 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven") 93 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 94 | balancer.getMaxToClaim(balancer.activeNodeSize()).must(be(4)) 95 | 96 | cluster.allWorkUnits.clear() 97 | balancer.getMaxToClaim(balancer.activeNodeSize()).must(be(0)) 98 | 99 | cluster.allWorkUnits.put("one", JsonUtils.OBJECT_MAPPER.createObjectNode()) 100 | balancer.getMaxToClaim(balancer.activeNodeSize()).must(be(1)) 101 | } 102 | 103 | 104 | @Test def `claim work` { 105 | val cluster = makeCluster() 106 | val balancer = new CountBalancingPolicy(cluster, config) 107 | 108 | // Simulate all "create" requests succeeding. 109 | cluster.zk.get().create(any, any, any, any).returns("") 110 | 111 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven") 112 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 113 | 114 | cluster.myWorkUnits.size().must(be(0)) 115 | balancer.claimWork() 116 | 117 | // Since we're mocking the ZK client, the "all work unit" map will not be populated 118 | // by watches firing from ZooKeeper, so we populate our mocked map here. 119 | cluster.myWorkUnits.foreach(w => cluster.workUnitMap.put(w, "testNode")) 120 | 121 | cluster.myWorkUnits.size().must(be(4)) 122 | balancer.getUnclaimed().size.must(be(3)) 123 | (cluster.myWorkUnits ++ balancer.getUnclaimed()).must(be(cluster.allWorkUnits.keySet())) 124 | } 125 | 126 | 127 | @Test def `claim work with one pegged to me and one to someone else` { 128 | val cluster = makeCluster() 129 | val balancer = new CountBalancingPolicy(cluster, config) 130 | 131 | // Simulate all "create" requests succeeding. 132 | cluster.zk.get().create(any, any, any, any).returns("") 133 | 134 | val workUnits = List("one", "two", "three", "four", "five", "six", "seven") 135 | workUnits.foreach(el => cluster.allWorkUnits.put(el, JsonUtils.OBJECT_MAPPER.createObjectNode())) 136 | val peggedToMe = JsonUtils.OBJECT_MAPPER.createObjectNode() 137 | peggedToMe.put(cluster.name, cluster.myNodeID) 138 | cluster.allWorkUnits.put("peggedToMe", peggedToMe) 139 | val peggedToOther = JsonUtils.OBJECT_MAPPER.createObjectNode() 140 | peggedToOther.put(cluster.name, "otherNode") 141 | cluster.allWorkUnits.put("peggedToOther", peggedToOther) 142 | 143 | cluster.myWorkUnits.size().must(be(0)) 144 | balancer.getUnclaimed().size.must(be(9)) 145 | balancer.claimWork() 146 | 147 | // Since we're mocking the ZK client, the "all work unit" map will not be populated 148 | // by watches firing from ZooKeeper, so we populate our mocked map here. 149 | cluster.myWorkUnits.foreach(w => cluster.workUnitMap.put(w, "testNode")) 150 | 151 | cluster.myWorkUnits.size().must(be(greaterThanOrEqualTo(5))) 152 | cluster.myWorkUnits.size().must(be(lessThanOrEqualTo(7))) 153 | cluster.myWorkUnits.contains("peggedToMe").must(be(true)) 154 | cluster.myWorkUnits.contains("peggedToOther").must(be(false)) 155 | 156 | (cluster.myWorkUnits ++ balancer.getUnclaimed()).must(be(cluster.allWorkUnits.keySet())) 157 | } 158 | } 159 | 160 | def makeCluster() : Cluster = { 161 | val cluster = new Cluster(UUID.randomUUID.toString, mock[ClusterListener], config) 162 | 163 | val mockZK = mock[ZooKeeper] 164 | val mockZKClient = mock[ZooKeeperClient] 165 | mockZKClient.get().returns(mockZK) 166 | cluster.zk = mockZKClient 167 | 168 | cluster.nodes = new HashMap[String, NodeInfo] 169 | cluster.allWorkUnits = new HashMap[String, ObjectNode] 170 | cluster.workUnitMap = new HashMap[String, String] 171 | cluster.handoffRequests = new HashMap[String, String] 172 | cluster.handoffResults = new HashMap[String, String] 173 | cluster.nodes.put("foo", NodeInfo(NodeState.Started.toString, 0L)) 174 | cluster.nodes.put("bar", NodeInfo(NodeState.Started.toString, 0L)) 175 | cluster.nodes.put("baz", NodeInfo(NodeState.Draining.toString, 0L)) 176 | cluster 177 | } 178 | 179 | } 180 | -------------------------------------------------------------------------------- /src/test/scala/com/boundary/ordasity/balancing/MeteredBalancingPolicySpec.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.balancing 18 | 19 | import org.junit.Test 20 | import com.boundary.ordasity._ 21 | import collection.JavaConversions._ 22 | import org.apache.zookeeper.ZooKeeper 23 | import com.twitter.common.zookeeper.ZooKeeperClient 24 | import java.util.concurrent.ScheduledFuture 25 | import com.yammer.metrics.scala.Meter 26 | import java.util.{LinkedList, HashMap, UUID} 27 | import com.simple.simplespec.Spec 28 | import org.apache.zookeeper.data.Stat 29 | import com.google.common.base.Charsets 30 | import com.fasterxml.jackson.databind.node.ObjectNode 31 | 32 | class MeteredBalancingPolicySpec extends Spec { 33 | 34 | val config = ClusterConfig.builder(). 35 | setNodeId("testNode"). 36 | setAutoRebalanceInterval(1). 37 | setDrainTime(1). 38 | setHosts("no_existe:2181"). 39 | setEnableAutoRebalance(false).build() 40 | 41 | class `Metered Balancing Policy` { 42 | 43 | @Test def `throw when initialize w/wrong listener` { 44 | val cluster = new Cluster(UUID.randomUUID.toString, mock[ClusterListener], config) 45 | val balancer = new MeteredBalancingPolicy(cluster, config) 46 | 47 | val threw = try { 48 | balancer.init() 49 | false 50 | } catch { 51 | case e: Exception => true 52 | } 53 | 54 | threw.must(be(true)) 55 | } 56 | 57 | @Test def `not throw when initialized w/wrong listener` { 58 | val cluster = makeCluster() 59 | val balancer = new MeteredBalancingPolicy(cluster, config) 60 | 61 | val threw = try { 62 | balancer.init() 63 | false 64 | } catch { 65 | case e: Exception => true 66 | } 67 | 68 | threw.must(be(false)) 69 | } 70 | 71 | @Test def `even distribution` { 72 | val cluster = makeCluster() 73 | val balancer = new MeteredBalancingPolicy(cluster, config) 74 | 75 | cluster.loadMap = new HashMap[String, Double] 76 | cluster.loadMap.putAll( 77 | Map("foo" -> 100.0, "bar" -> 200.0, "baz" -> 300.0)) 78 | 79 | balancer.evenDistribution().must(equal(300.0)) 80 | } 81 | 82 | @Test def `my load` { 83 | val cluster = makeCluster() 84 | val balancer = new MeteredBalancingPolicy(cluster, config) 85 | 86 | cluster.loadMap = new HashMap[String, Double] 87 | cluster.loadMap.putAll( 88 | Map("foo" -> 100.0, "bar" -> 200.0, "baz" -> 300.0)) 89 | 90 | cluster.myWorkUnits.add("foo") 91 | cluster.myWorkUnits.add("bar") 92 | balancer.myLoad().must(be(300.0)) 93 | } 94 | 95 | @Test def `shutdown` { 96 | val cluster = makeCluster() 97 | val balancer = new MeteredBalancingPolicy(cluster, config) 98 | 99 | val mockFuture = mock[ScheduledFuture[_]] 100 | balancer.loadFuture = Some(mockFuture) 101 | 102 | balancer.shutdown() 103 | verify.one(mockFuture).cancel(true) 104 | } 105 | 106 | @Test def `on shutdown work` { 107 | val cluster = makeCluster() 108 | val balancer = new MeteredBalancingPolicy(cluster, config) 109 | cluster.balancingPolicy = balancer 110 | 111 | balancer.meters.put("foo", mock[Meter]) 112 | balancer.onShutdownWork("foo") 113 | balancer.meters.contains("foo").must(be(false)) 114 | } 115 | 116 | @Test def `claim work` { 117 | val cluster = makeCluster() 118 | val balancer = new MeteredBalancingPolicy(cluster, config) 119 | cluster.balancingPolicy = balancer 120 | 121 | Map("foo" -> 100.0, "bar" -> 200.0, "baz" -> 300.0).foreach(el => 122 | cluster.allWorkUnits.put(el._1, JsonUtils.OBJECT_MAPPER.createObjectNode())) 123 | 124 | cluster.loadMap = new HashMap[String, Double] 125 | cluster.loadMap.putAll( 126 | Map("foo" -> 100.0, "bar" -> 200.0, "baz" -> 300.0)) 127 | 128 | // Simulate all "create" requests succeeding. 129 | cluster.zk.get().create(any, any, any, any).returns("") 130 | balancer.claimWork() 131 | 132 | (balancer.myLoad() >= balancer.evenDistribution()).must(be(true)) 133 | cluster.myWorkUnits.size().must(be(lessThan(3))) 134 | } 135 | 136 | @Test def `rebalance` { 137 | val cluster = makeCluster() 138 | val balancer = new MeteredBalancingPolicy(cluster, config) 139 | cluster.balancingPolicy = balancer 140 | 141 | val map = Map("foo" -> 100.0, "bar" -> 200.0, "baz" -> 300.0) 142 | 143 | map.foreach(el => 144 | cluster.allWorkUnits.put(el._1, JsonUtils.OBJECT_MAPPER.createObjectNode())) 145 | cluster.myWorkUnits.addAll(map.keySet) 146 | map.keys.foreach(el => 147 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath(el)), any[Boolean], any[Stat]) 148 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 149 | ) 150 | cluster.loadMap = new HashMap[String, Double] 151 | cluster.loadMap.putAll(map) 152 | 153 | balancer.rebalance() 154 | 155 | Thread.sleep(1200) 156 | 157 | (balancer.myLoad() >= balancer.evenDistribution()).must(be(true)) 158 | cluster.myWorkUnits.size().must(be(lessThan(3))) 159 | } 160 | 161 | @Test def `drain to load` { 162 | val cluster = makeCluster() 163 | val balancer = new MeteredBalancingPolicy(cluster, config) 164 | cluster.balancingPolicy = balancer 165 | 166 | val map = Map("foo" -> 100.0, "bar" -> 200.0, "baz" -> 300.0) 167 | 168 | map.foreach(el => 169 | cluster.allWorkUnits.put(el._1, JsonUtils.OBJECT_MAPPER.createObjectNode())) 170 | cluster.myWorkUnits.addAll(map.keySet) 171 | map.keys.foreach(el => 172 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath(el)), any[Boolean], any[Stat]) 173 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 174 | ) 175 | cluster.loadMap = new HashMap[String, Double] 176 | cluster.loadMap.putAll(map) 177 | balancer.myLoad().must(be(600.0)) 178 | balancer.drainToLoad(balancer.evenDistribution().longValue()) 179 | 180 | Thread.sleep(1200) 181 | 182 | (balancer.myLoad() >= balancer.evenDistribution()).must(be(true)) 183 | 184 | balancer.myLoad().must(be(500.0)) 185 | } 186 | 187 | @Test def `build drain task` { 188 | val cluster = makeCluster() 189 | val balancer = new MeteredBalancingPolicy(cluster, config) 190 | cluster.balancingPolicy = balancer 191 | 192 | val map = Map("foo" -> 100.0, "bar" -> 200.0, "baz" -> 300.0) 193 | 194 | map.foreach(el => 195 | cluster.allWorkUnits.put(el._1, JsonUtils.OBJECT_MAPPER.createObjectNode())) 196 | map.keys.foreach(el => 197 | cluster.zk.get().getData(equalTo(cluster.workUnitClaimPath(el)), any[Boolean], any[Stat]) 198 | .returns(cluster.myNodeID.getBytes(Charsets.UTF_8)) 199 | ) 200 | cluster.myWorkUnits.addAll(map.keySet) 201 | cluster.loadMap = new HashMap[String, Double] 202 | cluster.loadMap.putAll(map) 203 | balancer.myLoad().must(be(600.0)) 204 | 205 | val drainList = new LinkedList[String] 206 | drainList.addAll(map.keySet.toList) 207 | val task = balancer.buildDrainTask(drainList, 10, false, balancer.myLoad()) 208 | 209 | task.run() 210 | Thread.sleep(1200) 211 | 212 | (balancer.myLoad() == balancer.evenDistribution()).must(be(true)) 213 | balancer.myLoad().must(be(300.0)) 214 | } 215 | } 216 | 217 | def makeCluster() : Cluster = { 218 | val listener = new SmartListener { 219 | def startWork(workUnit: String, meter: Meter) = null 220 | def shutdownWork(workUnit: String) = null 221 | def onLeave() = null 222 | def onJoin(client: ZooKeeperClient) = null 223 | } 224 | 225 | val cluster = new Cluster(UUID.randomUUID.toString, listener, config) 226 | val mockZK = mock[ZooKeeper] 227 | val mockZKClient = mock[ZooKeeperClient] 228 | mockZKClient.get().returns(mockZK) 229 | cluster.zk = mockZKClient 230 | 231 | cluster.nodes = new HashMap[String, NodeInfo] 232 | cluster.allWorkUnits = new HashMap[String, ObjectNode] 233 | cluster.workUnitMap = new HashMap[String, String] 234 | cluster.handoffRequests = new HashMap[String, String] 235 | cluster.handoffResults = new HashMap[String, String] 236 | cluster.nodes.put("foo", NodeInfo(NodeState.Started.toString, 0L)) 237 | cluster.nodes.put("bar", NodeInfo(NodeState.Started.toString, 0L)) 238 | cluster.nodes.put("baz", NodeInfo(NodeState.Draining.toString, 0L)) 239 | cluster 240 | } 241 | 242 | } 243 | -------------------------------------------------------------------------------- /src/test/scala/com/boundary/ordasity/listeners/ClusterNodesChangedListenerSpec.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.listeners 18 | 19 | import org.junit.Test 20 | import java.util.concurrent.atomic.AtomicBoolean 21 | import com.boundary.ordasity.{NodeState, NodeInfo, Cluster, ClusterConfig, Claimer} 22 | import com.simple.simplespec.Spec 23 | 24 | class ClusterNodesChangedListenerSpec extends Spec { 25 | 26 | val config = ClusterConfig.builder(). 27 | setNodeId("testNode"). 28 | setAutoRebalanceInterval(1). 29 | setDrainTime(1). 30 | setHosts("no_existe:2181").build() 31 | 32 | class `Cluster Nodes Changed Listener` { 33 | 34 | @Test def `node changed` { 35 | val cluster = mock[Cluster] 36 | cluster.watchesRegistered.returns(new AtomicBoolean(true)) 37 | cluster.initialized.returns(new AtomicBoolean(true)) 38 | 39 | val claimer = mock[Claimer] 40 | claimer.start() 41 | cluster.claimer.returns(claimer) 42 | claimer.requestClaim().answersWith(invocation => { 43 | cluster.claimWork() 44 | true 45 | }) 46 | 47 | val listener = new ClusterNodesChangedListener(cluster) 48 | listener.nodeChanged("foo", NodeInfo(NodeState.Started.toString, 0L)) 49 | 50 | verify.one(cluster).claimWork() 51 | verify.one(cluster).verifyIntegrity() 52 | } 53 | 54 | @Test def `node removed` { 55 | val cluster = mock[Cluster] 56 | cluster.watchesRegistered.returns(new AtomicBoolean(true)) 57 | cluster.initialized.returns(new AtomicBoolean(true)) 58 | 59 | val claimer = mock[Claimer] 60 | cluster.claimer.returns(claimer) 61 | claimer.requestClaim().answersWith(invocation => { 62 | cluster.claimWork() 63 | true 64 | }) 65 | 66 | val listener = new ClusterNodesChangedListener(cluster) 67 | listener.nodeRemoved("foo") 68 | 69 | verify.one(cluster).claimWork() 70 | verify.one(cluster).verifyIntegrity() 71 | } 72 | 73 | @Test def `node changed - watches unregistered` { 74 | val cluster = mock[Cluster] 75 | cluster.watchesRegistered.returns(new AtomicBoolean(false)) 76 | cluster.initialized.returns(new AtomicBoolean(false)) 77 | 78 | val listener = new ClusterNodesChangedListener(cluster) 79 | listener.nodeChanged("foo", NodeInfo(NodeState.Started.toString, 0L)) 80 | 81 | verify.exactly(0)(cluster).claimWork() 82 | verify.exactly(0)(cluster).verifyIntegrity() 83 | } 84 | 85 | @Test def `node removed - watches unregistered` { 86 | val cluster = mock[Cluster] 87 | cluster.watchesRegistered.returns(new AtomicBoolean(false)) 88 | cluster.initialized.returns(new AtomicBoolean(false)) 89 | 90 | val listener = new ClusterNodesChangedListener(cluster) 91 | listener.nodeRemoved("foo") 92 | 93 | verify.exactly(0)(cluster).claimWork() 94 | verify.exactly(0)(cluster).verifyIntegrity() 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/test/scala/com/boundary/ordasity/listeners/HandoffResultsListenerSpec.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.listeners 18 | 19 | import org.junit.Test 20 | import org.cliffc.high_scale_lib.NonBlockingHashSet 21 | import java.util.{UUID, HashMap} 22 | import com.twitter.common.zookeeper.ZooKeeperClient 23 | import com.boundary.ordasity._ 24 | import java.util.concurrent.atomic.{AtomicReference, AtomicBoolean} 25 | import org.apache.zookeeper.ZooDefs.Ids 26 | import org.apache.zookeeper.{WatchedEvent, Watcher, CreateMode, ZooKeeper} 27 | import org.mockito.{ArgumentCaptor, Mockito} 28 | import java.util.concurrent.ScheduledThreadPoolExecutor 29 | import org.apache.zookeeper.data.Stat 30 | import org.apache.zookeeper.Watcher.Event.{EventType, KeeperState} 31 | import com.simple.simplespec.Spec 32 | 33 | class HandoffResultsListenerSpec extends Spec { 34 | 35 | val config = ClusterConfig.builder(). 36 | setNodeId("testNode"). 37 | setAutoRebalanceInterval(1). 38 | setDrainTime(1). 39 | setHosts("no_existe:2181"). 40 | setHandoffShutdownDelay(1).build() 41 | 42 | class `Handoff Results Listener` { 43 | 44 | @Test def `test 'i requested handoff'` { 45 | val cluster = new Cluster(UUID.randomUUID().toString, null, config) 46 | val listener = new HandoffResultsListener(cluster, config) 47 | 48 | cluster.handoffResults = new HashMap[String, String] 49 | cluster.handoffResults.put("workUnit", "otherNode") 50 | cluster.handoffResults.put("myWorkUnit", "testNode") 51 | cluster.handoffResults.put("somethingElse", "somewhereElse") 52 | 53 | cluster.myWorkUnits.add("workUnit") 54 | cluster.myWorkUnits.add("myWorkUnit") 55 | 56 | listener.iRequestedHandoff("workUnit").must(be(true)) 57 | listener.iRequestedHandoff("myWorkUnit").must(be(false)) 58 | listener.iRequestedHandoff("somethingElse").must(be(false)) 59 | listener.iRequestedHandoff("nothing").must(be(false)) 60 | } 61 | 62 | @Test def `test shutdown after handoff` { 63 | val cluster = mock[Cluster] 64 | val workUnit = "workUnit" 65 | 66 | val handoffResults = new HashMap[String, String] 67 | handoffResults.put(workUnit, "otherNode") 68 | 69 | val myWorkUnits = new NonBlockingHashSet[String] 70 | myWorkUnits.add(workUnit) 71 | 72 | cluster.handoffResults.returns(handoffResults) 73 | cluster.myWorkUnits.returns(myWorkUnits) 74 | 75 | cluster.state.returns(new AtomicReference(NodeState.Started)) 76 | 77 | val listener = new HandoffResultsListener(cluster, config) 78 | listener.shutdownAfterHandoff(workUnit).run() 79 | 80 | verify.one(cluster).shutdownWork(workUnit, doLog = false) 81 | verify.no(cluster).shutdown() 82 | } 83 | 84 | @Test def `test cluster-wide shutdown after finishing all handoff` { 85 | val cluster = mock[Cluster] 86 | val workUnit = "workUnit" 87 | 88 | val handoffResults = new HashMap[String, String] 89 | handoffResults.put(workUnit, "otherNode") 90 | 91 | val myWorkUnits = new NonBlockingHashSet[String] 92 | myWorkUnits.add(workUnit) 93 | 94 | cluster.handoffResults.returns(handoffResults) 95 | cluster.myWorkUnits.returns(myWorkUnits) 96 | 97 | cluster.state.returns(new AtomicReference(NodeState.Draining)) 98 | 99 | val listener = new HandoffResultsListener(cluster, config) 100 | listener.shutdownAfterHandoff(workUnit).run() 101 | 102 | // First, verify that we don't trigger full shutdown with a work unit remaining. 103 | verify.one(cluster).shutdownWork(workUnit, doLog = false) 104 | verify.no(cluster).shutdown() 105 | 106 | myWorkUnits.clear() 107 | 108 | // Then, verify that we do trigger shutdown once the work unit set is empty. 109 | listener.shutdownAfterHandoff(workUnit).run() 110 | verify.exactly(2)(cluster).shutdownWork(workUnit, doLog = false) 111 | verify.one(cluster).shutdown() 112 | } 113 | 114 | @Test def `test finish handoff` { 115 | val cluster = new Cluster(UUID.randomUUID().toString, null, config) 116 | val listener = new HandoffResultsListener(cluster, config) 117 | val workUnit = "workUnit" 118 | 119 | val mockZK = mock[ZooKeeper] 120 | val mockZKClient = mock[ZooKeeperClient] 121 | mockZKClient.get().returns(mockZK) 122 | cluster.zk = mockZKClient 123 | 124 | cluster.claimedForHandoff.add(workUnit) 125 | cluster.workUnitMap = new HashMap[String, String] 126 | cluster.workUnitMap.put("workUnit", "somewhereElse") 127 | 128 | val path = "/%s/claimed-%s/%s".format(cluster.name, config.workUnitShortName, workUnit) 129 | mockZK.create(path, cluster.myNodeID.getBytes, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL).returns("") 130 | mockZK.getData(equalTo(path), any[Boolean], any[Stat]).returns("otherNode".getBytes) 131 | 132 | val captureWatchter = ArgumentCaptor.forClass(classOf[Watcher]) 133 | Mockito.when(mockZK.exists(equalTo(path), captureWatchter.capture())).thenReturn(new Stat()) 134 | 135 | listener.finishHandoff(workUnit) 136 | // Callback on the watcher 137 | captureWatchter.getValue.process(new WatchedEvent(EventType.NodeDeleted, KeeperState.SyncConnected, path)) 138 | cluster.claimedForHandoff.contains(workUnit).must(be(false)) 139 | } 140 | 141 | @Test def `test finish handoff when exists is null` { 142 | val cluster = new Cluster(UUID.randomUUID().toString, null, config) 143 | val listener = new HandoffResultsListener(cluster, config) 144 | val workUnit = "workUnit" 145 | 146 | val mockZK = mock[ZooKeeper] 147 | val mockZKClient = mock[ZooKeeperClient] 148 | mockZKClient.get().returns(mockZK) 149 | cluster.zk = mockZKClient 150 | 151 | cluster.claimedForHandoff.add(workUnit) 152 | cluster.workUnitMap = new HashMap[String, String] 153 | cluster.workUnitMap.put("workUnit", "somewhereElse") 154 | 155 | val path = "/%s/claimed-%s/%s".format(cluster.name, config.workUnitShortName, workUnit) 156 | mockZK.exists(equalTo(path), any[Watcher]).returns(null) 157 | mockZK.create(path, cluster.myNodeID.getBytes, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL) 158 | 159 | listener.finishHandoff(workUnit) 160 | 161 | cluster.claimedForHandoff.contains(workUnit).must(be(false)) 162 | } 163 | 164 | // TODO: Expand the scope of this test. 165 | // The big kahuna for 'i accepted handoff' 166 | @Test def `test apply for accepting handoff` { 167 | val workUnit = "workUnit" 168 | val cluster = new Cluster(UUID.randomUUID().toString, null, config) 169 | val listener = new HandoffResultsListener(cluster, config) 170 | 171 | val mockZK = mock[ZooKeeper] 172 | val mockZKClient = mock[ZooKeeperClient] 173 | mockZKClient.get().returns(mockZK) 174 | cluster.zk = mockZKClient 175 | 176 | val path = "/%s/claimed-%s/%s".format(cluster.name, config.workUnitShortName, workUnit) 177 | mockZK.getData(equalTo(path), any[Boolean], any[Stat]).returns("otherNode".getBytes) 178 | 179 | Mockito.when(mockZK.create(path, cluster.myNodeID.getBytes, Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL)). 180 | thenReturn("") 181 | 182 | val captureWatchter = ArgumentCaptor.forClass(classOf[Watcher]) 183 | Mockito.when(mockZK.exists(equalTo(path), captureWatchter.capture())).thenReturn(new Stat()) 184 | 185 | cluster.watchesRegistered.set(true) 186 | cluster.initialized.set(true) 187 | cluster.handoffResults = new HashMap[String, String] 188 | cluster.workUnitMap = new HashMap[String, String] 189 | cluster.handoffResults.put(workUnit, "testNode") 190 | cluster.myWorkUnits.add(workUnit) 191 | cluster.claimedForHandoff.add(workUnit) 192 | cluster.handoffResultsListener.finishHandoff(workUnit) 193 | // Callback on the watcher 194 | captureWatchter.getValue.process(new WatchedEvent(EventType.NodeDeleted, KeeperState.SyncConnected, path)) 195 | 196 | cluster.workUnitMap = new HashMap[String, String] 197 | cluster.workUnitMap.put(workUnit, "somewhereElse") 198 | 199 | listener.apply(workUnit) 200 | 201 | cluster.claimedForHandoff.contains(workUnit).must(be(false)) 202 | } 203 | 204 | /** 205 | * The big kahuna for 'i requested handoff'. This one's kinda heavy on the mocks. 206 | * State: I have a work unit in my active set, and handoffResults says that another 207 | * node has accepted handoff. 208 | */ 209 | @Test def `test apply for requesting handoff` { 210 | val workUnit = "workUnit" 211 | val cluster = mock[Cluster] 212 | 213 | val handoffResults = new HashMap[String, String] 214 | handoffResults.put(workUnit, "otherNode") 215 | 216 | val myWorkUnits = new NonBlockingHashSet[String] 217 | myWorkUnits.add(workUnit) 218 | 219 | val claimedForHandoff = new NonBlockingHashSet[String] 220 | cluster.claimedForHandoff.returns(claimedForHandoff) 221 | 222 | // Mocks 223 | val mockZK = mock[ZooKeeper] 224 | val mockZKClient = mock[ZooKeeperClient] 225 | mockZKClient.get().returns(mockZK) 226 | cluster.zk.returns(mockZKClient) 227 | cluster.pool.returns(new AtomicReference[ScheduledThreadPoolExecutor](new ScheduledThreadPoolExecutor(1))) 228 | 229 | // More mocks. 230 | cluster.handoffResults.returns(handoffResults) 231 | cluster.myWorkUnits.returns(myWorkUnits) 232 | cluster.isMe("otherNode").returns(false) 233 | cluster.getOrElse(handoffResults, workUnit, "").returns("otherNode") 234 | cluster.watchesRegistered.returns(new AtomicBoolean(true)) 235 | cluster.initialized.returns(new AtomicBoolean(true)) 236 | cluster.state.returns(new AtomicReference(NodeState.Started)) 237 | 238 | // Assert that the listener behaves correctly when called, given the above state. 239 | val listener = new HandoffResultsListener(cluster, config) 240 | listener.iRequestedHandoff(workUnit).must(be(true)) 241 | listener.apply(workUnit) 242 | 243 | verify.one(mockZK).delete("/%s/handoff-requests/%s".format(cluster.name, workUnit), -1) 244 | 245 | Thread.sleep((config.handoffShutdownDelay * 1000) + 100) 246 | 247 | verify.one(cluster).shutdownWork(workUnit, doLog = false) 248 | verify.no(cluster).shutdown() 249 | } 250 | } 251 | } 252 | 253 | -------------------------------------------------------------------------------- /src/test/scala/com/boundary/ordasity/listeners/VerifyIntegrityListenerSpec.scala: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright 2011-2012, Boundary 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | // 16 | 17 | package com.boundary.ordasity.listeners 18 | 19 | import org.junit.Test 20 | 21 | import java.util.concurrent.atomic.AtomicBoolean 22 | import org.cliffc.high_scale_lib.NonBlockingHashSet 23 | 24 | import com.boundary.ordasity.{Claimer, Cluster, ClusterConfig} 25 | import com.boundary.ordasity.balancing.MeteredBalancingPolicy 26 | import com.simple.simplespec.Spec 27 | 28 | class VerifyIntegrityListenerSpec extends Spec { 29 | 30 | val config = ClusterConfig.builder(). 31 | setNodeId("testNode"). 32 | setAutoRebalanceInterval(1). 33 | setDrainTime(1). 34 | setHosts("no_existe:2181").build() 35 | 36 | class `Verify Integrity Listener` { 37 | 38 | @Test def `node changed` { 39 | val cluster = mock[Cluster] 40 | cluster.watchesRegistered.returns(new AtomicBoolean(true)) 41 | cluster.initialized.returns(new AtomicBoolean(true)) 42 | cluster.workUnitsPeggedToMe.returns(new NonBlockingHashSet[String]) 43 | cluster.balancingPolicy.returns(new MeteredBalancingPolicy(cluster, config)) 44 | cluster.myNodeID.returns("testNode") 45 | val claimer = mock[Claimer] 46 | claimer.start() 47 | cluster.claimer.returns(claimer) 48 | claimer.requestClaim().answersWith(invocation => { 49 | cluster.claimWork() 50 | true 51 | }) 52 | 53 | val listener = new VerifyIntegrityListener[String](cluster, config) 54 | listener.nodeChanged("foo", "bar") 55 | 56 | verify.one(cluster).claimWork() 57 | verify.one(cluster).verifyIntegrity() 58 | } 59 | 60 | @Test def `node removed` { 61 | val cluster = mock[Cluster] 62 | cluster.watchesRegistered.returns(new AtomicBoolean(true)) 63 | cluster.initialized.returns(new AtomicBoolean(true)) 64 | 65 | val claimer = mock[Claimer] 66 | claimer.start() 67 | cluster.claimer.returns(claimer) 68 | claimer.requestClaim().answersWith(invocation => { 69 | cluster.claimWork() 70 | true 71 | }) 72 | 73 | val listener = new VerifyIntegrityListener(cluster, config) 74 | listener.nodeRemoved("foo") 75 | 76 | verify.one(cluster).claimWork() 77 | verify.one(cluster).verifyIntegrity() 78 | } 79 | 80 | @Test def `node changed - watches unregistered` { 81 | val cluster = mock[Cluster] 82 | cluster.watchesRegistered.returns(new AtomicBoolean(false)) 83 | cluster.initialized.returns(new AtomicBoolean(false)) 84 | 85 | val listener = new VerifyIntegrityListener[String](cluster, config) 86 | listener.nodeChanged("foo", "bar") 87 | 88 | verify.exactly(0)(cluster).claimWork() 89 | verify.exactly(0)(cluster).verifyIntegrity() 90 | } 91 | 92 | @Test def `node removed - watches unregistered` { 93 | val cluster = mock[Cluster] 94 | cluster.watchesRegistered.returns(new AtomicBoolean(false)) 95 | cluster.initialized.returns(new AtomicBoolean(false)) 96 | 97 | val listener = new VerifyIntegrityListener[String](cluster, config) 98 | listener.nodeRemoved("foo") 99 | 100 | verify.exactly(0)(cluster).claimWork() 101 | verify.exactly(0)(cluster).verifyIntegrity() 102 | } 103 | } 104 | } 105 | --------------------------------------------------------------------------------