├── .gitignore ├── DCAwareFailoverDropwizard ├── .travis.yml ├── .gitignore ├── src │ └── main │ │ ├── resources │ │ └── META-INF │ │ │ └── services │ │ │ └── org.stuartgunter.dropwizard.cassandra.loadbalancing.LoadBalancingPolicyFactory │ │ └── java │ │ └── org │ │ └── adejanovski │ │ └── cassandra │ │ └── policies │ │ └── dropwizard │ │ └── DCAwareFailoverRoundRobinPolicyFactory.java ├── README.md └── pom.xml ├── DCAwareFailoverRoundRobinPolicy ├── .travis.yml ├── .gitignore ├── src │ ├── main │ │ ├── resources │ │ │ └── META-INF │ │ │ │ └── services │ │ │ │ └── org.stuartgunter.dropwizard.cassandra.loadbalancing.LoadBalancingPolicyFactory │ │ └── java │ │ │ └── org │ │ │ └── adejanovski │ │ │ └── cassandra │ │ │ └── policies │ │ │ └── DCAwareFailoverRoundRobinPolicy.java │ └── test │ │ └── java │ │ └── org │ │ └── adejanovski │ │ └── cassandra │ │ └── policies │ │ └── DCAwareFailoverRoundRobinPolicyTest.java ├── README.md └── pom.xml ├── .settings ├── org.eclipse.core.resources.prefs └── org.eclipse.m2e.core.prefs ├── .project └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /DCAwareFailoverDropwizard/.travis.yml: -------------------------------------------------------------------------------- 1 | language: java -------------------------------------------------------------------------------- /DCAwareFailoverRoundRobinPolicy/.travis.yml: -------------------------------------------------------------------------------- 1 | language: java -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding/=UTF-8 3 | -------------------------------------------------------------------------------- /.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /DCAwareFailoverDropwizard/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | /.classpath 4 | /.project 5 | test-output/ 6 | dependency-reduced-pom.xml 7 | -------------------------------------------------------------------------------- /DCAwareFailoverRoundRobinPolicy/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | /.classpath 4 | /.project 5 | test-output/ 6 | dependency-reduced-pom.xml 7 | -------------------------------------------------------------------------------- /DCAwareFailoverDropwizard/src/main/resources/META-INF/services/org.stuartgunter.dropwizard.cassandra.loadbalancing.LoadBalancingPolicyFactory: -------------------------------------------------------------------------------- 1 | org.adejanovski.cassandra.policies.dropwizard.DCAwareFailoverRoundRobinPolicyFactory -------------------------------------------------------------------------------- /DCAwareFailoverRoundRobinPolicy/src/main/resources/META-INF/services/org.stuartgunter.dropwizard.cassandra.loadbalancing.LoadBalancingPolicyFactory: -------------------------------------------------------------------------------- 1 | org.adejanovski.cassandra.policies.dropwizard.DCAwareFailoverRoundRobinPolicyFactory -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | dcaware-failover-policy-parent 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.m2e.core.maven2Builder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.m2e.core.maven2Nature 16 | 17 | 18 | -------------------------------------------------------------------------------- /DCAwareFailoverDropwizard/src/main/java/org/adejanovski/cassandra/policies/dropwizard/DCAwareFailoverRoundRobinPolicyFactory.java: -------------------------------------------------------------------------------- 1 | package org.adejanovski.cassandra.policies.dropwizard; 2 | 3 | import org.adejanovski.cassandra.policies.DCAwareFailoverRoundRobinPolicy; 4 | import org.stuartgunter.dropwizard.cassandra.loadbalancing.*; 5 | 6 | import com.datastax.driver.core.policies.DCAwareRoundRobinPolicy; 7 | import com.datastax.driver.core.policies.LoadBalancingPolicy; 8 | import com.fasterxml.jackson.annotation.JsonProperty; 9 | import com.fasterxml.jackson.annotation.JsonTypeName; 10 | import com.datastax.driver.core.policies.LoadBalancingPolicy; 11 | 12 | /** 13 | * A factory for configuring and building 14 | * {@link com.datastax.driver.core.policies.DCAwareRoundRobinPolicy} instances. 15 | * Configuration Parameters: 16 | * 17 | * 18 | * 19 | * 20 | * 21 | * 22 | * 23 | * 24 | * 25 | * 26 | * 27 | * 28 | * 29 | * 30 | * 31 | * 32 | * 33 | * 34 | * 35 | * 36 | * 37 | * 38 | * 39 | * 40 | * 41 | * 42 | *
NameDefaultDescription
localDCNo default.The name of the local datacenter (as known by Cassandra).
backupDCNo default.The name of the backup datacenter (as known by Cassandra).
switchBackDelayFactor0The connection can switch back if uptime >= downtime*switchBackDelayFactor (gives time for hinted handoff to complete)
noSwitchBackDowntimeDelay0Switch back cannot happen if downtime > noSwitchBackDowntimeDelay (in seconds)
43 | */ 44 | @JsonTypeName("dcAwareFailoverRoundRobin") 45 | public class DCAwareFailoverRoundRobinPolicyFactory implements 46 | LoadBalancingPolicyFactory { 47 | 48 | private String localDC; 49 | private String backupDC; 50 | private Integer hostDownSwitchThreshold; 51 | private Float switchBackDelayFactor; 52 | private Integer noSwitchBackDowntimeDelay; 53 | 54 | @JsonProperty 55 | public String getLocalDC() { 56 | return localDC; 57 | } 58 | 59 | @JsonProperty 60 | public void setLocalDC(String localDC) { 61 | this.localDC = localDC; 62 | } 63 | 64 | @JsonProperty 65 | public String getBackupDC() { 66 | return backupDC; 67 | } 68 | 69 | @JsonProperty 70 | public void setBackupDC(String backupDC) { 71 | this.backupDC = backupDC; 72 | } 73 | 74 | @JsonProperty 75 | public Integer getHostDownSwitchThreshold() { 76 | return hostDownSwitchThreshold; 77 | } 78 | 79 | @JsonProperty 80 | public void setHostDownSwitchThreshold(Integer hostDownSwitchThreshold) { 81 | this.hostDownSwitchThreshold = hostDownSwitchThreshold; 82 | } 83 | 84 | @JsonProperty 85 | public void setSwitchBackDelayFactor(Float switchBackDelayFactor) { 86 | this.switchBackDelayFactor = switchBackDelayFactor; 87 | } 88 | 89 | @JsonProperty 90 | public void setNoSwitchBackDowntimeDelay(Integer noSwitchBackDowntimeDelay) { 91 | this.noSwitchBackDowntimeDelay = noSwitchBackDowntimeDelay; 92 | } 93 | 94 | public LoadBalancingPolicy build() { 95 | return new DCAwareFailoverRoundRobinPolicy(localDC, backupDC, 96 | hostDownSwitchThreshold, switchBackDelayFactor, noSwitchBackDowntimeDelay); 97 | } 98 | 99 | } 100 | -------------------------------------------------------------------------------- /DCAwareFailoverRoundRobinPolicy/src/test/java/org/adejanovski/cassandra/policies/DCAwareFailoverRoundRobinPolicyTest.java: -------------------------------------------------------------------------------- 1 | package org.adejanovski.cassandra.policies; 2 | 3 | import java.net.InetAddress; 4 | import java.net.UnknownHostException; 5 | import java.util.ArrayList; 6 | import java.util.Collection; 7 | 8 | import org.adejanovski.cassandra.policies.DCAwareFailoverRoundRobinPolicy; 9 | import org.mockito.Mockito; 10 | import org.testng.annotations.BeforeClass; 11 | import org.testng.annotations.BeforeMethod; 12 | import org.testng.annotations.Test; 13 | 14 | import static org.testng.Assert.*; 15 | 16 | import com.datastax.driver.core.Cluster; 17 | import com.datastax.driver.core.Host; 18 | import com.datastax.driver.core.HostDistance; 19 | 20 | public class DCAwareFailoverRoundRobinPolicyTest { 21 | DCAwareFailoverRoundRobinPolicy policy; 22 | Collection hosts; 23 | Cluster cluster; 24 | 25 | @BeforeMethod 26 | public void setUp() throws UnknownHostException { 27 | hosts = new ArrayList(); 28 | policy = new DCAwareFailoverRoundRobinPolicy("dc1", "dc2", 2); 29 | for(int i=0;i<6;i++){ 30 | Host host = Mockito.mock(Host.class); 31 | InetAddress address = InetAddress.getByName("127.0.0." + i); 32 | Mockito.when(host.getAddress()).thenReturn(address); 33 | if(i<=2){ 34 | Mockito.when(host.getDatacenter()).thenReturn("dc1"); 35 | }else{ 36 | Mockito.when(host.getDatacenter()).thenReturn("dc2"); 37 | } 38 | hosts.add(host); 39 | } 40 | 41 | cluster = Mockito.mock(Cluster.class); 42 | policy.init(cluster, hosts); 43 | 44 | } 45 | 46 | 47 | @Test 48 | public void testDistance() throws UnknownHostException{ 49 | int i=0; 50 | for(Host host:hosts){ 51 | policy.onUp(host); 52 | if(i<=2){ 53 | // dc1 is local 54 | assertEquals(policy.distance(host),HostDistance.LOCAL); 55 | }else{ 56 | assertTrue(policy.distance(host).equals(HostDistance.REMOTE) || policy.distance(host).equals(HostDistance.IGNORED)); 57 | } 58 | i++; 59 | } 60 | } 61 | 62 | @Test 63 | public void testLostOneNode() throws UnknownHostException{ 64 | int i=0; 65 | for(Host host:hosts){ 66 | if(i==0){ 67 | // we lost the first node 68 | // which doesn't trigger the switch 69 | policy.onDown(host); 70 | } 71 | 72 | if(i<=2){ 73 | // dc1 is local 74 | assertEquals(policy.distance(host),HostDistance.LOCAL); 75 | }else{ 76 | assertTrue(policy.distance(host).equals(HostDistance.REMOTE) || policy.distance(host).equals(HostDistance.IGNORED)); 77 | } 78 | i++; 79 | } 80 | } 81 | 82 | 83 | @Test 84 | public void testLostTwoNodes() throws UnknownHostException{ 85 | int i=0; 86 | 87 | for(Host host:hosts){ 88 | if(i<=1){ 89 | // we lost the first node 90 | // which doesn't trigger the switch 91 | policy.onDown(host); 92 | } 93 | i++; 94 | } 95 | 96 | i=0; 97 | for(Host host:hosts){ 98 | if(i<=2){ 99 | // dc1 is remote now (lost 2 nodes) 100 | assertTrue(policy.distance(host).equals(HostDistance.REMOTE) || policy.distance(host).equals(HostDistance.IGNORED)); 101 | }else{ 102 | assertEquals(policy.distance(host),HostDistance.LOCAL); 103 | } 104 | i++; 105 | } 106 | } 107 | 108 | @Test 109 | public void testSwitchBackProtection() throws UnknownHostException{ 110 | int i=0; 111 | 112 | for(Host host:hosts){ 113 | if(i<=1){ 114 | // we lost the first node 115 | // which doesn't trigger the switch 116 | policy.onDown(host); 117 | } 118 | i++; 119 | } 120 | 121 | for(Host host:hosts){ 122 | // first host is back up 123 | policy.onUp(host); 124 | break; 125 | } 126 | 127 | 128 | i=0; 129 | for(Host host:hosts){ 130 | if(i<=2){ 131 | // dc1 is still remote now (lost 2 nodes, and got 1 back) 132 | assertTrue(policy.distance(host).equals(HostDistance.REMOTE) || policy.distance(host).equals(HostDistance.IGNORED)); 133 | }else{ 134 | assertEquals(policy.distance(host),HostDistance.LOCAL); 135 | } 136 | i++; 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.github.adejanovski.cassandra.policies 6 | dcaware-failover-policy-parent 7 | 1.1.0 8 | pom 9 | DCAwareFailoverRoundRobinPolicy 10 | DC Aware Failover Round Robin Policy for Datastax Java Driver 11 | https://github.com/adejanovski/cassandra-dcaware-failover 12 | 13 | 2015 14 | 15 | 16 | The Apache Software License, Version 2.0 17 | http://www.apache.org/licenses/LICENSE-2.0.txt 18 | repo 19 | 20 | 21 | 22 | 23 | 24 | adejanovski 25 | Alexander Dejanovski 26 | 27 | developer 28 | 29 | 30 | 31 | 32 | 33 | DCAwareFailoverRoundRobinPolicy 34 | DCAwareFailoverDropwizard 35 | 36 | 37 | 38 | UTF-8 39 | 1.7 40 | 41 | 42 | 43 | 44 | 45 | maven-compiler-plugin 46 | 3.1 47 | 48 | ${java.version} 49 | ${java.version} 50 | true 51 | true 52 | true 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | release 62 | 63 | default 64 | 65 | 66 | 67 | 68 | maven-surefire-plugin 69 | 2.17 70 | 71 | true 72 | 73 | 74 | 75 | test 76 | 77 | test 78 | 79 | 80 | true 81 | 82 | 83 | 84 | 85 | 86 | maven-compiler-plugin 87 | 88 | -Xlint:all 89 | true 90 | true 91 | 92 | 93 | 94 | org.apache.maven.plugins 95 | maven-javadoc-plugin 96 | 2.10.3 97 | 98 | -Xdoclint:none 99 | 100 | 101 | 102 | org.apache.maven.plugins 103 | maven-source-plugin 104 | 2.4 105 | 106 | 107 | attach-sources 108 | verify 109 | 110 | jar-no-fork 111 | 112 | 113 | 114 | 115 | 116 | org.apache.maven.plugins 117 | maven-gpg-plugin 118 | 119 | 120 | sign-artifacts 121 | verify 122 | 123 | sign 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | scm:git:git@github.com:adejanovski/cassandra-dcaware-failover.git 137 | scm:git:git@github.com:adejanovski/cassandra-dcaware-failover.git 138 | https://github.com/com:adejanovski/cassandra-dcaware-failover 139 | HEAD 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /DCAwareFailoverRoundRobinPolicy/README.md: -------------------------------------------------------------------------------- 1 | # DC Aware Failover Round Robin Policy for Datastax Java Driver 2 | 3 | Properly handling failover in multi DC Cassandra clusters with LOCAL_* CL usually require writing specific code to properly process failover scenarios. 4 | 5 | The Cassandra setups that fail delivering automatic DC failure handling are those with 2 DCs or those with 1+ real time DCs and 1+ analytical/search DC. 6 | 7 | Having a 2 DC cluster and using CL.QUORUM will lead to systematic failure when a DC goes down. 8 | Using CL.ONE will lead to inconsistencies if a DC goes down and comes back up after a while. 9 | Using CL.LOCAL_QUORUM with DCAwareRoundRobinPolicy will segregate the traffic to a single DC, but the load balancing policy will prevent switching to the other DC in case of failure (in its basic configuration). 10 | This is a conservative behavior, because if done differently, when the primary DC comes back up it will probably not be up to date, and it could even be out of the hinted handoff window (which will require a full repair). 11 | In case you have an analytical or a search DC, you must segregate traffic in order to guarantee low latencies, which forces the use of CL.LOCAL_* consistencies. The behavior in case of failure is the same as the one above. 12 | 13 | Handling those failure scenarios has to be implemented in client applications, by doing the following : 14 | * detect how many nodes are down in the local DC 15 | * if too many nodes are down to allow queries to succeed (depends on the replication factor), the app has to switch to a backup dc (either having two connections and switching between them, either recreating a new connection with a different LB policy conf). 16 | * Switching automatically back at any time to the local DC must be prevented because repair might be necessary in order to regain consistency 17 | 18 | 19 | The DCAwareFailoverRoundRobinPolicy is based on the DCAwareRoundRobinPolicy and handles the above one way switch without having to code it. It takes the following parameters : 20 | * localDc : the primary DC of the app 21 | * backupDc : the DC to switch to in case the primary DC loses a defined number of nodes 22 | * hostDownSwitchThreshold : the number of lost nodes that will trigger the switch 23 | * switchBackDelayFactor : The connection can switch back if uptime >= downtime*switchBackDelayFactor (gives time for hinted handoff to complete) 24 | * noSwitchBackDowntimeDelay : Switch back cannot happen if downtime > noSwitchBackDowntimeDelay (in seconds) - usually the hinted handoff window (default in Cassandra is 3h) 25 | 26 | This policy does a one way switch to the backup DC to prevent inconsistencies, and the connection must be recreated in order to switch back to the local DC (which should require either rebooting the app, or recreating the Cluster and Session objects), unless auto back switch conditions are fulfilled : 27 | * Local DC has regained enough nodes 28 | * uptime is superior to downtime*switchBackDelayFactor 29 | * downtime has been lower than noSwitchBackDowntimeDelay 30 | 31 | 32 | # Usage 33 | 34 | Include this project in your classpath and use it as any other load balancing policy when creating the cluster object: 35 | 36 | ```java 37 | Cluster cluster = Cluster.builder() 38 | .addContactPoints("127.0.0.1","127.0.0.2","127.0.0.3","127.0.0.4","127.0.0.5") 39 | .withLoadBalancingPolicy(new org.adejanovski.cassandra.policies.DCAwareRoundRobinPolicy("primaryDc","backupDc",2)) 40 | .build(); 41 | ``` 42 | 43 | The above will segregate coordination traffic to DC 'primaryDc' and if at least 2 nodes are lost in this DC, switch to 'backupDc' and never go back. 44 | 45 | To authorize auto back switch you can add the switchBackDelayFactor and noSwitchBackDowntimeDelay : 46 | 47 | ```java 48 | Cluster cluster = Cluster.builder() 49 | .addContactPoints("127.0.0.1","127.0.0.2","127.0.0.3","127.0.0.4","127.0.0.5") 50 | .withLoadBalancingPolicy(new org.adejanovski.cassandra.policies.DCAwareRoundRobinPolicy("primaryDc","backupDc", 2, 1.5, 10800)) 51 | .build(); 52 | ``` 53 | 54 | The above will allow automatically switching back if conditions are fulfilled : 55 | * downtime didn't last more than 3 hours (10800 seconds) 56 | * uptime since local DC recovery has lasted more than downtime*1.5 (if downtime was 30 mn, back switch will be prevented for 45 mn after local DC recovery) 57 | 58 | # Usage in Dropwizard 59 | 60 | The DCAwareFailoverRoundRobinPolicy is "dropwizard-ready". 61 | 62 | Use it by declaring it in the yml config file, and it can be combined with any appropriate parent policy: 63 | 64 | ```xml 65 | loadBalancingPolicy: 66 | type: tokenAware 67 | shuffleReplicas: true 68 | subPolicy: 69 | type: dcAwareFailoverRoundRobin 70 | localDC: primaryDc 71 | backupDC: backupDc 72 | hostDownSwitchThreshold: 2 73 | switchBackDelayFactor: 1.5 74 | noSwitchBackDowntimeDelay: 10800 75 | ``` 76 | 77 | 78 | -------------------------------------------------------------------------------- /DCAwareFailoverDropwizard/README.md: -------------------------------------------------------------------------------- 1 | # DC Aware Failover Round Robin Policy for Datastax Java Driver 2 | 3 | Properly handling failover in multi DC Cassandra clusters with LOCAL_* CL usually require writing specific code to properly process failover scenarios. 4 | 5 | The Cassandra setups that fail delivering automatic DC failure handling are those with 2 DCs or those with 1+ real time DCs and 1+ analytical/search DC. 6 | 7 | Having a 2 DC cluster and using CL.QUORUM will lead to systematic failure when a DC goes down. 8 | Using CL.ONE will lead to inconsistencies if a DC goes down and comes back up after a while. 9 | Using CL.LOCAL_QUORUM with DCAwareRoundRobinPolicy will segregate the traffic to a single DC, but the load balancing policy will prevent switching to the other DC in case of failure (in its basic configuration). 10 | This is a conservative behavior, because if done differently, when the primary DC comes back up it will probably not be up to date, and it could even be out of the hinted handoff window (which will require a full repair). 11 | In case you have an analytical or a search DC, you must segregate traffic in order to guarantee low latencies, which forces the use of CL.LOCAL_* consistencies. The behavior in case of failure is the same as the one above. 12 | 13 | Handling those failure scenarios has to be implemented in client applications, by doing the following : 14 | * detect how many nodes are down in the local DC 15 | * if too many nodes are down to allow queries to succeed (depends on the replication factor), the app has to switch to a backup dc (either having two connections and switching between them, either recreating a new connection with a different LB policy conf). 16 | * Switching automatically back at any time to the local DC must be prevented because repair might be necessary in order to regain consistency 17 | 18 | 19 | The DCAwareFailoverRoundRobinPolicy is based on the DCAwareRoundRobinPolicy and handles the above one way switch without having to code it. It takes the following parameters : 20 | * localDc : the primary DC of the app 21 | * backupDc : the DC to switch to in case the primary DC loses a defined number of nodes 22 | * hostDownSwitchThreshold : the number of lost nodes that will trigger the switch 23 | * switchBackDelayFactor : The connection can switch back if uptime >= downtime*switchBackDelayFactor (gives time for hinted handoff to complete) 24 | * noSwitchBackDowntimeDelay : Switch back cannot happen if downtime > noSwitchBackDowntimeDelay (in seconds) - usually the hinted handoff window (default in Cassandra is 3h) 25 | 26 | This policy does a one way switch to the backup DC to prevent inconsistencies, and the connection must be recreated in order to switch back to the local DC (which should require either rebooting the app, or recreating the Cluster and Session objects), unless auto back switch conditions are fulfilled : 27 | * Local DC has regained enough nodes 28 | * uptime is superior to downtime*switchBackDelayFactor 29 | * downtime has been lower than noSwitchBackDowntimeDelay 30 | 31 | 32 | # Usage 33 | 34 | Include this project in your classpath and use it as any other load balancing policy when creating the cluster object: 35 | 36 | ```java 37 | Cluster cluster = Cluster.builder() 38 | .addContactPoints("127.0.0.1","127.0.0.2","127.0.0.3","127.0.0.4","127.0.0.5") 39 | .withLoadBalancingPolicy(new org.adejanovski.cassandra.policies.DCAwareRoundRobinPolicy("primaryDc","backupDc",2)) 40 | .build(); 41 | ``` 42 | 43 | The above will segregate coordination traffic to DC 'primaryDc' and if at least 2 nodes are lost in this DC, switch to 'backupDc' and never go back. 44 | 45 | To authorize auto back switch you can add the switchBackDelayFactor and noSwitchBackDowntimeDelay : 46 | 47 | ```java 48 | Cluster cluster = Cluster.builder() 49 | .addContactPoints("127.0.0.1","127.0.0.2","127.0.0.3","127.0.0.4","127.0.0.5") 50 | .withLoadBalancingPolicy(new org.adejanovski.cassandra.policies.DCAwareRoundRobinPolicy("primaryDc","backupDc", 2, 1.5, 10800)) 51 | .build(); 52 | ``` 53 | 54 | The above will allow automatically switching back if conditions are fulfilled : 55 | * downtime didn't last more than 3 hours (10800 seconds) 56 | * uptime since local DC recovery has lasted more than downtime*1.5 (if downtime was 30 mn, back switch will be prevented for 45 mn after local DC recovery) 57 | 58 | # Usage in Dropwizard 59 | 60 | The DCAwareFailoverRoundRobinPolicy is "dropwizard-ready". 61 | 62 | Use it by declaring it in the yml config file, and it can be combined with any appropriate parent policy: 63 | 64 | ```xml 65 | loadBalancingPolicy: 66 | type: tokenAware 67 | shuffleReplicas: true 68 | subPolicy: 69 | type: dcAwareFailoverRoundRobin 70 | localDC: primaryDc 71 | backupDC: backupDc 72 | hostDownSwitchThreshold: 2 73 | switchBackDelayFactor: 1.5 74 | noSwitchBackDowntimeDelay: 10800 75 | ``` 76 | 77 | ### Build status 78 | 79 | [![Build Status](https://travis-ci.org/adejanovski/cassandra-dcaware-failover.svg)](https://travis-ci.org/adejanovski/cassandra-dcaware-failover) 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /DCAwareFailoverRoundRobinPolicy/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | com.github.adejanovski.cassandra.policies 7 | dcaware-failover-policy-parent 8 | 1.1.0 9 | 10 | 11 | dcaware-failover-policy 12 | jar 13 | 14 | DCAwareFailoverRoundRobinPolicy 15 | http://maven.apache.org 16 | 17 | 18 | UTF-8 19 | 2.1.9 20 | 21 | 22 | 23 | 24 | 25 | com.datastax.cassandra 26 | cassandra-driver-core 27 | ${cassandra.driver.version} 28 | 29 | 30 | 31 | org.testng 32 | testng 33 | 6.9.4 34 | test 35 | 36 | 37 | 38 | org.mockito 39 | mockito-all 40 | 1.10.8 41 | test 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | META-INF/services 50 | 51 | 52 | 53 | 54 | 55 | 56 | org.apache.maven.plugins 57 | maven-assembly-plugin 58 | 2.4 59 | 60 | 61 | org.apache.apache.resources 62 | apache-source-release-assembly-descriptor 63 | 1.0.3 64 | 65 | 66 | 67 | 68 | org.apache.maven.plugins 69 | maven-clean-plugin 70 | 2.5 71 | 72 | 73 | org.apache.maven.plugins 74 | maven-compiler-plugin 75 | 3.0 76 | 77 | 78 | org.apache.maven.plugins 79 | maven-deploy-plugin 80 | 2.7 81 | 82 | 83 | org.apache.maven.plugins 84 | maven-failsafe-plugin 85 | 2.14 86 | 87 | 88 | org.apache.maven.plugins 89 | maven-gpg-plugin 90 | 1.4 91 | 92 | ${gpg.passphrase} 93 | ${gpg.useagent} 94 | 95 | 96 | 97 | org.apache.maven.plugins 98 | maven-install-plugin 99 | 2.4 100 | 101 | 102 | org.apache.maven.plugins 103 | maven-jar-plugin 104 | 2.4 105 | 106 | 107 | org.apache.maven.plugins 108 | maven-javadoc-plugin 109 | 2.9 110 | 111 | 112 | org.apache.maven.plugins 113 | maven-release-plugin 114 | 2.4 115 | 116 | deploy 117 | 118 | 119 | 120 | org.apache.maven.plugins 121 | maven-resources-plugin 122 | 2.6 123 | 124 | 125 | org.apache.maven.plugins 126 | maven-site-plugin 127 | 3.2 128 | 129 | 130 | org.apache.maven.plugins 131 | maven-surefire-plugin 132 | 2.14 133 | 134 | 135 | org.apache.maven.plugins 136 | maven-source-plugin 137 | 2.2.1 138 | 139 | 140 | org.codehaus.mojo 141 | animal-sniffer-maven-plugin 142 | 1.9 143 | 144 | 145 | org.codehaus.mojo 146 | build-helper-maven-plugin 147 | 1.7 148 | 149 | 150 | org.codehaus.mojo 151 | cassandra-maven-plugin 152 | 1.2.1-1 153 | 154 | 155 | 157 | 158 | org.eclipse.m2e 159 | lifecycle-mapping 160 | 1.0.0 161 | 162 | 163 | 164 | 165 | 166 | 167 | org.codehaus.mojo 168 | 169 | 170 | build-helper-maven-plugin 171 | 172 | 173 | [1.7,) 174 | 175 | 176 | 177 | reserve-network-port 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | maven-compiler-plugin 194 | 195 | 1.6 196 | 1.6 197 | 198 | 199 | 200 | maven-surefire-plugin 201 | 202 | 203 | **/*Test.class 204 | 205 | 206 | 207 | 208 | maven-failsafe-plugin 209 | 210 | 211 | **/*Test.class 212 | 213 | 214 | 215 | 216 | 217 | integration-test 218 | verify 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | default 229 | 230 | default 231 | 232 | 233 | true 234 | 235 | 236 | 237 | 238 | maven-surefire-plugin 239 | 2.17 240 | 241 | true 242 | 243 | 244 | 245 | test 246 | 247 | test 248 | 249 | 250 | true 251 | 252 | 253 | 254 | 255 | 256 | maven-compiler-plugin 257 | 258 | -Xlint:all 259 | true 260 | true 261 | 262 | 263 | 264 | org.apache.maven.plugins 265 | maven-javadoc-plugin 266 | 267 | -Xdoclint:none 268 | 269 | 2.10.3 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | release 278 | 279 | default 280 | 281 | 282 | 283 | 284 | maven-surefire-plugin 285 | 2.17 286 | 287 | true 288 | 289 | 290 | 291 | test 292 | 293 | test 294 | 295 | 296 | true 297 | 298 | 299 | 300 | 301 | 302 | maven-compiler-plugin 303 | 304 | -Xlint:all 305 | true 306 | true 307 | 308 | 309 | 310 | org.apache.maven.plugins 311 | maven-javadoc-plugin 312 | 2.10.3 313 | 314 | -Xdoclint:none 315 | 316 | 317 | 318 | org.apache.maven.plugins 319 | maven-source-plugin 320 | 2.4 321 | 322 | 323 | attach-sources 324 | verify 325 | 326 | jar-no-fork 327 | 328 | 329 | 330 | 331 | 332 | org.apache.maven.plugins 333 | maven-gpg-plugin 334 | 335 | 336 | sign-artifacts 337 | verify 338 | 339 | sign 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | test 352 | 353 | default 354 | 355 | 356 | 357 | 358 | org.apache.maven.plugins 359 | maven-surefire-plugin 360 | 2.18.1 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | scm:git:git@github.com:adejanovski/cassandra-dcaware-failover.git 369 | scm:git:git@github.com:adejanovski/cassandra-dcaware-failover.git 370 | https://github.com/com:adejanovski/cassandra-dcaware-failover 371 | HEAD 372 | 373 | 374 | 375 | -------------------------------------------------------------------------------- /DCAwareFailoverDropwizard/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | com.github.adejanovski.cassandra.policies 7 | dcaware-failover-policy-parent 8 | 1.1.0 9 | 10 | 11 | dcaware-failover-policy-dropwizard 12 | jar 13 | 14 | DCAwareFailoverDropwizard 15 | https://github.com/adejanovski/cassandra-dcaware-failover 16 | 17 | 18 | UTF-8 19 | 2.1.9 20 | 0.10.0 21 | 22 | 23 | 24 | 25 | 26 | com.github.adejanovski.cassandra.policies 27 | dcaware-failover-policy 28 | ${project.parent.version} 29 | 30 | 31 | metrics-core 32 | com.codahale.metrics 33 | 34 | 35 | 36 | 37 | 38 | org.stuartgunter 39 | dropwizard-cassandra 40 | 1.1-dw0.8-cs2.1 41 | 42 | 43 | com.datastax.cassandra 44 | cassandra-driver-core 45 | 46 | 47 | 48 | 49 | 50 | org.testng 51 | testng 52 | 6.9.4 53 | test 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | META-INF/services 63 | 64 | 65 | 66 | 67 | 68 | 69 | org.apache.maven.plugins 70 | maven-assembly-plugin 71 | 2.4 72 | 73 | 74 | org.apache.apache.resources 75 | apache-source-release-assembly-descriptor 76 | 1.0.3 77 | 78 | 79 | 80 | 81 | org.apache.maven.plugins 82 | maven-clean-plugin 83 | 2.5 84 | 85 | 86 | org.apache.maven.plugins 87 | maven-compiler-plugin 88 | 3.0 89 | 90 | 91 | org.apache.maven.plugins 92 | maven-deploy-plugin 93 | 2.7 94 | 95 | 96 | org.apache.maven.plugins 97 | maven-failsafe-plugin 98 | 2.14 99 | 100 | 101 | org.apache.maven.plugins 102 | maven-gpg-plugin 103 | 1.4 104 | 105 | ${gpg.passphrase} 106 | ${gpg.useagent} 107 | 108 | 109 | 110 | org.apache.maven.plugins 111 | maven-install-plugin 112 | 2.4 113 | 114 | 115 | org.apache.maven.plugins 116 | maven-jar-plugin 117 | 2.4 118 | 119 | 120 | org.apache.maven.plugins 121 | maven-javadoc-plugin 122 | 2.9 123 | 124 | 125 | org.apache.maven.plugins 126 | maven-release-plugin 127 | 2.4 128 | 129 | deploy 130 | 131 | 132 | 133 | org.apache.maven.plugins 134 | maven-resources-plugin 135 | 2.6 136 | 137 | 138 | org.apache.maven.plugins 139 | maven-site-plugin 140 | 3.2 141 | 142 | 143 | org.apache.maven.plugins 144 | maven-surefire-plugin 145 | 2.14 146 | 147 | 148 | org.apache.maven.plugins 149 | maven-source-plugin 150 | 2.2.1 151 | 152 | 153 | org.codehaus.mojo 154 | animal-sniffer-maven-plugin 155 | 1.9 156 | 157 | 158 | org.codehaus.mojo 159 | build-helper-maven-plugin 160 | 1.7 161 | 162 | 163 | org.codehaus.mojo 164 | cassandra-maven-plugin 165 | 1.2.1-1 166 | 167 | 168 | 170 | 171 | org.eclipse.m2e 172 | lifecycle-mapping 173 | 1.0.0 174 | 175 | 176 | 177 | 178 | 179 | 180 | org.codehaus.mojo 181 | 182 | 183 | build-helper-maven-plugin 184 | 185 | 186 | [1.7,) 187 | 188 | 189 | 190 | reserve-network-port 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | org.apache.felix 207 | maven-bundle-plugin 208 | true 209 | 2.4.0 210 | 212 | 213 | 214 | com.github.adejanovski.cassandra.jdbc 215 | ${project.version} 216 | <_include>-osgi.bnd 217 | 218 | 219 | jar 220 | bundle 221 | pom 222 | 223 | 224 | 225 | 226 | maven-compiler-plugin 227 | 228 | 1.6 229 | 1.6 230 | 231 | 232 | 233 | maven-surefire-plugin 234 | 235 | 236 | **/*UnitTest.class 237 | 238 | 239 | ${cassandra.version} 240 | ${ipprefix} 241 | 60 242 | 243 | 244 | 245 | 246 | maven-failsafe-plugin 247 | 248 | 249 | **/*Test.class 250 | 251 | 252 | **/*UnitTest.class 253 | 254 | 255 | 256 | 257 | 258 | integration-test 259 | verify 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | default 270 | 271 | default 272 | 273 | 274 | true 275 | 276 | 277 | 278 | 279 | maven-surefire-plugin 280 | 2.17 281 | 282 | true 283 | 284 | 285 | 286 | test 287 | 288 | test 289 | 290 | 291 | true 292 | 293 | 294 | 295 | 296 | 297 | maven-compiler-plugin 298 | 299 | -Xlint:all 300 | true 301 | true 302 | 303 | 304 | 305 | org.apache.maven.plugins 306 | maven-javadoc-plugin 307 | 308 | -Xdoclint:none 309 | 310 | 2.10.3 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | release 319 | 320 | default 321 | 322 | 323 | 324 | 325 | maven-surefire-plugin 326 | 2.17 327 | 328 | true 329 | 330 | 331 | 332 | test 333 | 334 | test 335 | 336 | 337 | true 338 | 339 | 340 | 341 | 342 | 343 | maven-compiler-plugin 344 | 345 | -Xlint:all 346 | true 347 | true 348 | 349 | 350 | 351 | org.apache.maven.plugins 352 | maven-javadoc-plugin 353 | 2.10.3 354 | 355 | -Xdoclint:none 356 | 357 | 358 | 359 | org.apache.maven.plugins 360 | maven-source-plugin 361 | 2.4 362 | 363 | 364 | attach-sources 365 | verify 366 | 367 | jar-no-fork 368 | 369 | 370 | 371 | 372 | 373 | org.apache.maven.plugins 374 | maven-gpg-plugin 375 | 376 | 377 | sign-artifacts 378 | verify 379 | 380 | sign 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | test 393 | 394 | default 395 | 396 | 397 | 398 | 399 | org.apache.maven.plugins 400 | maven-surefire-plugin 401 | 2.18.1 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | scm:git:git@github.com:adejanovski/cassandra-dcaware-failover.git 410 | scm:git:git@github.com:adejanovski/cassandra-dcaware-failover.git 411 | https://github.com/com:adejanovski/cassandra-dcaware-failover 412 | HEAD 413 | 414 | 415 | 416 | -------------------------------------------------------------------------------- /DCAwareFailoverRoundRobinPolicy/src/main/java/org/adejanovski/cassandra/policies/DCAwareFailoverRoundRobinPolicy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2012-2015 DataStax Inc. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.adejanovski.cassandra.policies; 17 | 18 | import java.util.*; 19 | import java.util.concurrent.ConcurrentHashMap; 20 | import java.util.concurrent.ConcurrentMap; 21 | import java.util.concurrent.CopyOnWriteArrayList; 22 | import java.util.concurrent.ExecutionException; 23 | import java.util.concurrent.TimeUnit; 24 | import java.util.concurrent.TimeoutException; 25 | import java.util.concurrent.atomic.AtomicBoolean; 26 | import java.util.concurrent.atomic.AtomicInteger; 27 | 28 | import com.google.common.base.Joiner; 29 | import com.google.common.base.Preconditions; 30 | import com.google.common.base.Strings; 31 | import com.google.common.annotations.VisibleForTesting; 32 | import com.google.common.collect.AbstractIterator; 33 | 34 | import org.slf4j.Logger; 35 | import org.slf4j.LoggerFactory; 36 | 37 | import com.datastax.driver.core.Cluster; 38 | import com.datastax.driver.core.Configuration; 39 | import com.datastax.driver.core.ConsistencyLevel; 40 | import com.datastax.driver.core.Host; 41 | import com.datastax.driver.core.HostDistance; 42 | import com.datastax.driver.core.Statement; 43 | import com.datastax.driver.core.policies.CloseableLoadBalancingPolicy; 44 | import com.datastax.driver.core.policies.LoadBalancingPolicy; 45 | 46 | /** 47 | * A data-center aware Round-robin load balancing policy with DC failover 48 | * support. 49 | * 50 | * This policy provides round-robin queries over the node of the local data 51 | * center. It also includes in the query plans returned a configurable number of 52 | * hosts in the remote data centers, but those are always tried after the local 53 | * nodes. In other words, this policy guarantees that no host in a remote data 54 | * center will be queried unless no host in the local data center can be 55 | * reached. 56 | * 57 | * If used with a single data center, this policy is equivalent to the 58 | * RoundRobinPolicy, but its DC awareness incurs a 59 | * slight overhead so the RoundRobinPolicy could be 60 | * preferred to this policy in that case. 61 | * 62 | * On top of the DCAwareRoundRobinPolicy, this policy uses a one way switch in 63 | * case a defined number of nodes are down in the local DC. As stated, the 64 | * policy never switches back to the local DC in order to prevent 65 | * inconsistencies and give ops teams the ability to repair the local DC before 66 | * switching back manually. 67 | */ 68 | /** 69 | * @author adejanovski 70 | * 71 | */ 72 | /** 73 | * @author adejanovski 74 | * 75 | */ 76 | public class DCAwareFailoverRoundRobinPolicy implements LoadBalancingPolicy, 77 | CloseableLoadBalancingPolicy { 78 | 79 | private static final Logger logger = LoggerFactory 80 | .getLogger(DCAwareFailoverRoundRobinPolicy.class); 81 | 82 | /** 83 | * Returns a builder to create a new instance. 84 | * 85 | * @return the builder. 86 | */ 87 | public static Builder builder() { 88 | return new Builder(); 89 | } 90 | 91 | private final String UNSET = ""; 92 | 93 | private final ConcurrentMap> perDcLiveHosts = new ConcurrentHashMap>(); 94 | private final AtomicInteger index = new AtomicInteger(); 95 | 96 | volatile String localDc; 97 | volatile String backupDc; 98 | 99 | /** 100 | * Current value of the switch threshold. if {@code hostDownSwitchThreshold} 101 | * is lower than 0 then we must switch. 102 | */ 103 | private AtomicInteger hostDownSwitchThreshold = new AtomicInteger(); 104 | 105 | /** 106 | * Initial value of the switch threshold 107 | */ 108 | private final int initHostDownSwitchThreshold; 109 | 110 | /** 111 | * flag to test if the switch as occurred 112 | */ 113 | private AtomicBoolean switchedToBackupDc = new AtomicBoolean(false); 114 | 115 | /** 116 | * Time at which the switch occurred 117 | */ 118 | private Date switchedToBackupDcAt; 119 | 120 | /** 121 | * Automatically switching back to local DC is possible after : downtime*{@code switchBackDelayFactor} 122 | */ 123 | private Float switchBackDelayFactor=(float)1000; 124 | 125 | /** 126 | * Downtime delay after which switching back cannot be automated (usually 127 | * when hinted handoff window is reached) In seconds. 128 | */ 129 | private int noSwitchBackDowntimeDelay=0; 130 | 131 | private Date localDcCameBackUpAt; 132 | private boolean switchBackCanNeverHappen=false; 133 | 134 | private volatile Configuration configuration; 135 | 136 | /** 137 | * Creates a new datacenter aware failover round robin policy that uses a 138 | * local data-center and a backup data-center. Switching to the backup DC is 139 | * triggered automatically if local DC loses more than 140 | * {@code hostDownSwitchThreshold} nodes. Switching back to local DC after 141 | * going to backup will never happen automatically. 142 | * @param localDc the local datacenter 143 | * @param backupDc the backup datacenter 144 | * @param hostDownSwitchThreshold how many nodes have to be down before switching 145 | */ 146 | public DCAwareFailoverRoundRobinPolicy(String localDc, String backupDc, 147 | int hostDownSwitchThreshold) { 148 | 149 | this(localDc, backupDc, hostDownSwitchThreshold, (float) -1.0, 0); 150 | 151 | } 152 | 153 | /** 154 | * Creates a new datacenter aware failover round robin policy that uses a 155 | * local data-center and a backup data-center. Switching to the backup DC is 156 | * triggered automatically if local DC loses more than 157 | * {@code hostDownSwitchThreshold} nodes. 158 | * The policy will switch back to the local DC if conditions are fulfilled : 159 | * - Downtime lasted less than noSwitchBackDowntimeDelay (hint window) 160 | * - uptime since downtime happened is superior to downtime*switchBackDelayFactor (give 161 | * enough time for hints to be executed) 162 | * 163 | * @param localDc the local datacenter 164 | * @param backupDc the backup datacenter 165 | * @param hostDownSwitchThreshold how many nodes have to be down before switching 166 | * @param switchBackDelayFactor uptime since downtime happened is superior to downtime*switchBackDelayFactor 167 | * @param noSwitchBackDowntimeDelay maximum downtime to authorize a back switch to local DC 168 | */ 169 | public DCAwareFailoverRoundRobinPolicy(String localDc, String backupDc, 170 | int hostDownSwitchThreshold, float switchBackDelayFactor, 171 | int noSwitchBackDowntimeDelay) { 172 | this.localDc = localDc == null ? UNSET : localDc; 173 | this.backupDc = backupDc == null ? UNSET : backupDc; 174 | this.hostDownSwitchThreshold = new AtomicInteger(hostDownSwitchThreshold); 175 | this.initHostDownSwitchThreshold = hostDownSwitchThreshold; 176 | this.switchBackDelayFactor = switchBackDelayFactor; 177 | this.noSwitchBackDowntimeDelay = noSwitchBackDowntimeDelay; 178 | 179 | } 180 | 181 | public void init(Cluster cluster, Collection hosts) { 182 | if (localDc != UNSET) 183 | logger.info( 184 | "Using provided data-center name '{}' for DCAwareFailoverRoundRobinPolicy", 185 | localDc); 186 | 187 | this.configuration = cluster.getConfiguration(); 188 | 189 | ArrayList notInLocalDC = new ArrayList(); 190 | 191 | for (Host host : hosts) { 192 | String dc = dc(host); 193 | 194 | logger.trace("node {} is in dc {}", host.getAddress().toString(), dc); 195 | // If the localDC was in "auto-discover" mode and it's the first 196 | // host for which we have a DC, use it. 197 | if (localDc == UNSET && dc != UNSET) { 198 | logger.info( 199 | "Using data-center name '{}' for DCAwareFailoverRoundRobinPolicy (if this is incorrect, please provide the correct datacenter name with DCAwareFailoverRoundRobinPolicy constructor)", 200 | dc); 201 | localDc = dc; 202 | } else if (!dc.equals(localDc) && !dc.equals(backupDc)) 203 | notInLocalDC.add(String.format("%s (%s)", host.toString(), dc)); 204 | 205 | if (!dc.equals(localDc) && !dc.equals(backupDc)) 206 | notInLocalDC.add(String.format("%s (%s)", host.toString(), 207 | host.getDatacenter())); 208 | 209 | CopyOnWriteArrayList prev = perDcLiveHosts.get(dc); 210 | if (prev == null) 211 | perDcLiveHosts.put(dc, new CopyOnWriteArrayList( 212 | Collections.singletonList(host))); 213 | else 214 | prev.addIfAbsent(host); 215 | } 216 | 217 | if (notInLocalDC.size() > 0) { 218 | String nonLocalHosts = Joiner.on(",").join(notInLocalDC); 219 | logger.warn( 220 | "Some contact points don't match local or backup data center. Local DC = {} - backup DC {}. Non-conforming contact points: {}", 221 | localDc, backupDc, nonLocalHosts); 222 | } 223 | } 224 | 225 | private String dc(Host host) { 226 | String dc = host.getDatacenter(); 227 | return dc == null ? localDc : dc; 228 | } 229 | 230 | @SuppressWarnings("unchecked") 231 | private static CopyOnWriteArrayList cloneList( 232 | CopyOnWriteArrayList list) { 233 | return (CopyOnWriteArrayList) list.clone(); 234 | } 235 | 236 | /** 237 | * Return the HostDistance for the provided host. 238 | * 239 | * This policy consider nodes in the local datacenter as {@code LOCAL}. For 240 | * each remote datacenter, it considers a configurable number of hosts as 241 | * {@code REMOTE} and the rest is {@code IGNORED}. 242 | * 243 | * To configure how many host in each remote datacenter is considered 244 | * {@code REMOTE}. 245 | * 246 | * @param host 247 | * the host of which to return the distance of. 248 | * @return the HostDistance to {@code host}. 249 | */ 250 | public HostDistance distance(Host host) { 251 | String dc = dc(host); 252 | // If the connection has switched to the backup DC and fulfills 253 | // the requirement for a back switch, make it happen. 254 | if(!switchBackCanNeverHappen){ 255 | triggerBackSwitchIfNecessary(); 256 | } 257 | 258 | if (isLocal(dc)) { 259 | return HostDistance.LOCAL; 260 | } 261 | 262 | // Only hosts in local DC and backup DC can be considered remote 263 | if(dc(host).equals(localDc) || dc(host).equals(backupDc)) 264 | return HostDistance.REMOTE; 265 | 266 | // All other hosts are ignored 267 | return HostDistance.IGNORED; 268 | 269 | } 270 | 271 | /** 272 | * Returns the hosts to use for a new query. 273 | * 274 | * The returned plan will always try each known host in the local datacenter 275 | * first, and then, if none of the local host is reachable, will try up to a 276 | * configurable number of other host per remote datacenter. The order of the 277 | * local node in the returned query plan will follow a Round-robin 278 | * algorithm. 279 | * 280 | * @param loggedKeyspace 281 | * the keyspace currently logged in on for this query. 282 | * @param statement 283 | * the query for which to build the plan. 284 | * @return a new query plan, i.e. an iterator indicating which host to try 285 | * first for querying, which one to use as failover, etc... 286 | */ 287 | public Iterator newQueryPlan(String loggedKeyspace, 288 | final Statement statement) { 289 | String currentDc = localDc; 290 | if(!switchBackCanNeverHappen){ 291 | triggerBackSwitchIfNecessary(); 292 | } 293 | 294 | if (switchedToBackupDc.get()) { 295 | currentDc = backupDc; 296 | } 297 | 298 | CopyOnWriteArrayList localLiveHosts = perDcLiveHosts.get(currentDc); 299 | final List hosts = localLiveHosts == null ? Collections. emptyList() : cloneList(localLiveHosts); 300 | 301 | final int startIdx = index.getAndIncrement(); 302 | 303 | return new AbstractIterator() { 304 | 305 | private int idx = startIdx; 306 | private int remainingLocal = hosts.size(); 307 | 308 | // For remote Dcs 309 | private Iterator remoteDcs; 310 | private List currentDcHosts; 311 | private int currentDcRemaining; 312 | 313 | @Override 314 | protected Host computeNext() { 315 | if (remainingLocal > 0) { 316 | remainingLocal--; 317 | int c = idx++ % hosts.size(); 318 | if (c < 0) { 319 | c += hosts.size(); 320 | } 321 | return hosts.get(c); 322 | } 323 | 324 | return endOfData(); 325 | } 326 | }; 327 | } 328 | 329 | public void onUp(Host host) { 330 | 331 | String dc = dc(host); 332 | if (dc.equals(localDc) && this.hostDownSwitchThreshold.get() < this.initHostDownSwitchThreshold 333 | ) { 334 | // if a node comes backup in the local DC and we're not already 335 | // equal to the initial threshold, add one node to the 336 | // switch threshold 337 | // This can only happen if the switch didn't occur yet 338 | this.hostDownSwitchThreshold.incrementAndGet(); 339 | updateLocalDcStatus(); 340 | } 341 | // If the localDC was in "auto-discover" mode and it's the first host 342 | // for which we have a DC, use it. 343 | if (localDc == UNSET && dc != UNSET) { 344 | logger.info( 345 | "Using data-center name '{}' for DCAwareFailoverRoundRobinPolicy (if this is incorrect, please provide the correct datacenter name with DCAwareFailoverRoundRobinPolicy constructor)", 346 | dc); 347 | localDc = dc; 348 | } 349 | 350 | CopyOnWriteArrayList dcHosts = perDcLiveHosts.get(dc); 351 | if (dcHosts == null) { 352 | CopyOnWriteArrayList newMap = new CopyOnWriteArrayList(Collections.singletonList(host)); 353 | dcHosts = perDcLiveHosts.putIfAbsent(dc, newMap); 354 | // If we've successfully put our new host, we're good, otherwise 355 | // we've been beaten so continue 356 | if (dcHosts == null) 357 | return; 358 | } 359 | dcHosts.addIfAbsent(host); 360 | } 361 | 362 | public void onSuspected(Host host) { 363 | } 364 | 365 | public void onDown(Host host) { 366 | if (dc(host).equals(localDc) && !switchedToBackupDc.get()) { 367 | // if a node goes down in the local DC remove one node to eventually 368 | // trigger the switch 369 | this.hostDownSwitchThreshold.decrementAndGet(); 370 | } 371 | CopyOnWriteArrayList dcHosts = perDcLiveHosts.get(dc(host)); 372 | if (dcHosts != null) 373 | dcHosts.remove(host); 374 | 375 | if (this.hostDownSwitchThreshold.get() <= 0) { 376 | // Make sure localDc is not considered as being up 377 | localDcCameBackUpAt = null; 378 | if (!switchedToBackupDc.get()) { 379 | // if we lost as many nodes in the local dc as configured in the 380 | // threshold, switch to backup DC 381 | switchToBackup(); 382 | } 383 | } 384 | } 385 | 386 | public void onAdd(Host host) { 387 | onUp(host); 388 | } 389 | 390 | public void onRemove(Host host) { 391 | onDown(host); 392 | } 393 | 394 | public void close() { 395 | // nothing to do 396 | } 397 | 398 | /** 399 | * Perform switch to backup DC 400 | */ 401 | private void switchToBackup() { 402 | switchedToBackupDc.set(true); 403 | switchedToBackupDcAt = new Date(); 404 | logger.warn( 405 | "Lost {} nodes in data-center '{}'. Switching to data-center '{}'", 406 | this.initHostDownSwitchThreshold, this.localDc, this.backupDc); 407 | 408 | } 409 | 410 | /** 411 | * Perform switch back to local DC 412 | */ 413 | private void switchBackToLocal() { 414 | switchedToBackupDc.set(false); 415 | switchedToBackupDcAt = null; 416 | localDcCameBackUpAt = null; 417 | logger.warn( 418 | "Recovered enough nodes in data-center '{}'. Switching back since conditions are fulfilled", 419 | this.localDc); 420 | 421 | } 422 | 423 | /** 424 | * Check if the cluster state fulfills requirements for switching back to 425 | * local DC. Conditions to switch back : - the connection as already 426 | * switched to backup DC - hostDownSwitchThreshold is > 0 - Enough time has 427 | * passed for hinted handoff (currentTime - localDcCameBackUpAt) > 428 | * (localDcCameBackUpAt - switchedToBackupDcAt)*switchBackDelayFactor - 429 | * (localDcCameBackUpAt - switchedToBackupDcAt) < noSwitchBackDowntimeDelay 430 | * 431 | * @return 432 | */ 433 | private boolean canSwitchBack() { 434 | if ((localDcCameBackUpAt.getTime() - switchedToBackupDcAt.getTime()) < noSwitchBackDowntimeDelay * 1000) { 435 | if (switchedToBackupDc.get() && isLocalDcBackUp()) { 436 | logger.debug( 437 | "Local DC {} is up and has been down for {}s. Switch back will happen after {}s. Uptime = {}s ", 438 | localDc, 439 | (int) (getDowntimeDuration() / 1000), 440 | (int) (getDowntimeDuration() * switchBackDelayFactor / 1000), 441 | (getUptimeDuration()) / 1000); 442 | 443 | return (hostDownSwitchThreshold.get() > 0) 444 | && (getUptimeDuration() > getDowntimeDuration() * switchBackDelayFactor) 445 | && getDowntimeDuration() < noSwitchBackDowntimeDelay * 1000; 446 | } 447 | }else{ 448 | // Downtime lasted more than the hinted handoff window 449 | // Switching back is now a manual operation 450 | logger.warn( 451 | "Local DC has been down for too long. Switch back will never happen."); 452 | switchBackCanNeverHappen=true; 453 | } 454 | 455 | return false; 456 | 457 | } 458 | 459 | /** 460 | * returns the duration of the local DC downtime. 461 | * @return 462 | */ 463 | private long getDowntimeDuration(){ 464 | return localDcCameBackUpAt.getTime() - switchedToBackupDcAt.getTime(); 465 | } 466 | 467 | /** 468 | * get the uptime duration of local DC after outage. 469 | * @return 470 | */ 471 | private long getUptimeDuration(){ 472 | return new Date().getTime() - localDcCameBackUpAt.getTime(); 473 | } 474 | 475 | 476 | 477 | private void updateLocalDcStatus() { 478 | if (switchedToBackupDc.get() && hostDownSwitchThreshold.get() > 0 && localDcCameBackUpAt == null) { 479 | localDcCameBackUpAt = new Date(); 480 | } 481 | } 482 | 483 | /** 484 | * Test if local DC has enough nodes to be considered alive 485 | * 486 | * @return 487 | */ 488 | private boolean isLocalDcBackUp() { 489 | return hostDownSwitchThreshold.get() > 0 && localDcCameBackUpAt != null; 490 | } 491 | 492 | /** 493 | * Test if a node is in the local DC (or in the backup DC and switch has 494 | * occurred) 495 | * 496 | * @param dc 497 | * @return 498 | */ 499 | private boolean isLocal(String dc) { 500 | return dc == UNSET || (dc.equals(localDc) && !switchedToBackupDc.get()) 501 | || (dc.equals(backupDc) && switchedToBackupDc.get()); 502 | } 503 | 504 | /** 505 | * Check if a switch as occurred and switching back to local DC is possible. 506 | */ 507 | public void triggerBackSwitchIfNecessary() { 508 | if (switchedToBackupDc.get() && localDcCameBackUpAt!=null && switchedToBackupDcAt!=null) { 509 | if (canSwitchBack()) { 510 | switchBackToLocal(); 511 | } 512 | } 513 | } 514 | 515 | 516 | /** 517 | * Helper class to build the policy. 518 | */ 519 | public static class Builder { 520 | private String localDc; 521 | private String backupDc; 522 | private int hostDownSwitchThreshold; 523 | private Float switchBackDelayFactor=(float)1000; 524 | private int noSwitchBackDowntimeDelay=0; 525 | 526 | /** 527 | * Sets the name of the datacenter that will be considered "local" by the policy. 528 | * 529 | * This must be the name as known by Cassandra (in other words, the name in that appears in 530 | * {@code system.peers}, or in the output of admin tools like nodetool). 531 | * 532 | * If this method isn't called, the policy will default to the datacenter of the first node 533 | * connected to. This will always be ok if all the contact points use at {@code Cluster} 534 | * creation are in the local data-center. Otherwise, you should provide the name yourself 535 | * with this method. 536 | * 537 | * @param localDc the name of the datacenter. It should not be {@code null}. 538 | * @return this builder. 539 | */ 540 | public Builder withLocalDc(String localDc) { 541 | Preconditions.checkArgument(!Strings.isNullOrEmpty(localDc), 542 | "localDc name can't be null or empty. If you want to let the policy autodetect the datacenter, don't call Builder.withLocalDC"); 543 | this.localDc = localDc; 544 | return this; 545 | } 546 | 547 | /** 548 | * Sets the name of the datacenter that will be considered as "backup" by the policy. 549 | *

550 | * This must be the name as known by Cassandra (in other words, the name in that appears in 551 | * {@code system.peers}, or in the output of admin tools like nodetool). 552 | *

553 | * If this method must be called, otherwise you should not use this policy. 554 | * 555 | * @param backupDc the name of the datacenter. It should not be {@code null}. 556 | * @return this builder. 557 | */ 558 | public Builder withBackupDc(String backupDc) { 559 | Preconditions.checkArgument(!Strings.isNullOrEmpty(localDc), 560 | "backupDc name can't be null or empty."); 561 | this.backupDc = backupDc; 562 | return this; 563 | } 564 | 565 | 566 | /** 567 | * Sets how many nodes must be down in the local DC before switching to backup. 568 | * 569 | * @param hostDownSwitchThreshold the number of nodes down before switching to the backup DC. 570 | * @return this builder 571 | */ 572 | public Builder withHostDownSwitchThreshold(int hostDownSwitchThreshold) { 573 | this.hostDownSwitchThreshold = hostDownSwitchThreshold; 574 | return this; 575 | } 576 | 577 | /** 578 | * Mandatory if you want to authorize switching back to local DC after downtime. 579 | * Allows enough time to pass so that hinted handoff can finish 580 | * (currentTime - localDcCameBackUpAt) > (localDcCameBackUpAt - switchedToBackupDcAt)*switchBackDelayFactor 581 | * 582 | * @param switchBackDelayFactor times downtime has to be <= uptime before switching back to local DC 583 | * @return this builder 584 | */ 585 | public Builder withSwitchBackDelayFactor(float switchBackDelayFactor) { 586 | this.switchBackDelayFactor = switchBackDelayFactor; 587 | return this; 588 | } 589 | 590 | /** 591 | * Mandatory if you want to authorize switching back to local DC after downtime. 592 | * Prevents switching back to local DC if downtime was longer than the provided value. 593 | * Used to check if downtime didn't last more than the hinted handoff window (which requires repair). 594 | * 595 | * @param noSwitchBackDowntimeDelay max time in seconds before switching back to local DC will be prevented. 596 | * @return this builder 597 | */ 598 | public Builder withNoSwitchBackDowntimeDelay(int noSwitchBackDowntimeDelay) { 599 | this.noSwitchBackDowntimeDelay = noSwitchBackDowntimeDelay; 600 | return this; 601 | } 602 | 603 | 604 | 605 | /** 606 | * Builds the policy configured by this builder. 607 | * 608 | * @return the policy. 609 | */ 610 | public DCAwareFailoverRoundRobinPolicy build() { 611 | return new DCAwareFailoverRoundRobinPolicy(localDc, backupDc, hostDownSwitchThreshold, switchBackDelayFactor, noSwitchBackDowntimeDelay); 612 | } 613 | } 614 | 615 | } 616 | --------------------------------------------------------------------------------