├── .gitignore ├── src ├── main │ ├── resources │ │ └── es-plugin.properties │ ├── java │ │ └── org │ │ │ └── elasticsearch │ │ │ ├── river │ │ │ └── cassandra │ │ │ │ ├── CassandraCFData.java │ │ │ │ ├── CassandraRiverModule.java │ │ │ │ ├── CassandraDB.java │ │ │ │ └── CassandraRiver.java │ │ │ └── plugin │ │ │ └── river │ │ │ └── cassandra │ │ │ └── CassandraRiverPlugin.java │ └── assemblies │ │ └── plugin.xml └── test │ ├── resources │ ├── cassandra │ │ └── conf │ │ │ └── cassandra_config.xml │ └── log4j.properties │ └── java │ └── org │ └── elasticsearch │ └── river │ └── cassandra │ └── CassandraRiverIntegrationTest.java ├── README.md ├── pom.xml └── LICENSE.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .project 3 | .settings/ 4 | 5 | target/ 6 | test-output/ 7 | -------------------------------------------------------------------------------- /src/main/resources/es-plugin.properties: -------------------------------------------------------------------------------- 1 | plugin=org.elasticsearch.plugin.river.cassandra.CassandraRiverPlugin -------------------------------------------------------------------------------- /src/test/resources/cassandra/conf/cassandra_config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/river/cassandra/CassandraCFData.java: -------------------------------------------------------------------------------- 1 | /*Copyright 2013, eBay Software Foundation 2 | Authored by Utkarsh Sengar 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package org.elasticsearch.river.cassandra; 18 | 19 | import java.util.Map; 20 | 21 | public class CassandraCFData { 22 | public String start; 23 | public Map> rowColumnMap; 24 | } 25 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=WARN,stdout 2 | 3 | log4j.additivity.org.elasticsearch.river.solr=false 4 | log4j.logger.org.elasticsearch.river.solr=INFO,stdout 5 | 6 | log4j.additivity.httpclient.wire=false 7 | log4j.logger.httpclient.wire=ERROR,stdout 8 | 9 | log4j.additivity.org.apache.commons.httpclient=false 10 | log4j.logger.org.apache.commons.httpclient=ERROR,stdout 11 | 12 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 13 | log4j.appender.stdout.layout = org.apache.log4j.PatternLayout 14 | log4j.appender.stdout.layout.ConversionPattern = [%d{ISO8601}][%-5p][%-25c] %m%n 15 | 16 | log4j.appender.file=org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.file.File=logs/elasticsearch-solr-river-test.log 18 | log4j.appender.file.DatePattern='.'yyyy-MM-dd 19 | log4j.appender.file.layout = org.apache.log4j.PatternLayout 20 | log4j.appender.file.layout.ConversionPattern = [%d{ISO8601}][%-5p][%-25c] %m%n 21 | log4j.appender.file.append=true 22 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/river/cassandra/CassandraRiverModule.java: -------------------------------------------------------------------------------- 1 | /*Copyright 2013, eBay Software Foundation 2 | Authored by Utkarsh Sengar 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package org.elasticsearch.river.cassandra; 18 | 19 | import org.elasticsearch.common.inject.AbstractModule; 20 | import org.elasticsearch.river.River; 21 | 22 | public class CassandraRiverModule extends AbstractModule { 23 | 24 | @Override 25 | protected void configure() { 26 | bind(River.class).to(CassandraRiver.class).asEagerSingleton(); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/assemblies/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | / 11 | 12 | org.elasticsearch:elasticsearch-river-cassandra 13 | org.apache.cassandra:cassandra-all 14 | org.apache.cassandra:cassandra-thrift 15 | org.apache.thrift:libthrift 16 | com.google.guava:guava 17 | commons-lang:commons-lang 18 | commons-pool:* 19 | org.hectorclient:hector-core 20 | org.apache.httpcomponents:* 21 | com.fasterxml.jackson.core:* 22 | org.slf4j:* 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Cassandra river for Elastic search. 2 | 3 | This river was a proof of concept for integration of cassandra with elasticsearch, its a 2 day hacked up solution. 4 | 5 | ##Setup 6 | 7 | Build: `mvn clean package` 8 | 9 | Install: 10 | `./bin/plugin -url file:elasticsearch-river-cassandra/target/releases/elasticsearch-river-cassandra-1.0.0-SNAPSHOT.zip -install river-cassandra` 11 | 12 | 13 | Remove: 14 | `./bin/plugin -remove river-cassandra` 15 | 16 | 17 | 18 | ##Init 19 | 20 | curl -XPUT 'localhost:9200/_river/prodinfo/_meta' -d '{ 21 | "type" : "cassandra", 22 | "cassandra" : { 23 | "cluster_name" : "test-cluster", 24 | "keyspace" : "catalogks", 25 | "column_family" : "info", 26 | "batch_size" : 1000, 27 | "hosts" : "host1:9161,host2:9161", 28 | "username" : "username", 29 | "password" : "password" 30 | }, 31 | "index" : { 32 | "index" : "prodinfo", 33 | "type" : "product" 34 | } 35 | }' 36 | 37 | 38 | ##Query 39 | 1. localhost:9200/info/_search 40 | 2. localhost:9200/info/_count 41 | 42 | 43 | ##References 44 | 1. http://jfarrell.github.com/ 45 | 2. Setup elasticsearch-head and bigdesk to monitor ES 46 | 47 | 48 | 49 | ##Improvements 50 | 1. http://mail-archives.apache.org/mod_mbox/cassandra-user/201303.mbox/%3CEB07A386-F9E3-4CF3-BBC6-9DA3B9CAA79F@thelastpickle.com%3E 51 | 2. https://groups.google.com/forum/?fromgroups=#!topic/elasticsearch/M1aJqvAIpZE 52 | 3. Tests -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/plugin/river/cassandra/CassandraRiverPlugin.java: -------------------------------------------------------------------------------- 1 | /*Copyright 2013, eBay Software Foundation 2 | Authored by Utkarsh Sengar 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package org.elasticsearch.plugin.river.cassandra; 18 | 19 | import org.elasticsearch.common.inject.Inject; 20 | import org.elasticsearch.plugins.AbstractPlugin; 21 | import org.elasticsearch.river.RiversModule; 22 | import org.elasticsearch.river.cassandra.CassandraRiverModule; 23 | 24 | public class CassandraRiverPlugin extends AbstractPlugin { 25 | 26 | @Inject 27 | public CassandraRiverPlugin() { 28 | } 29 | 30 | @Override 31 | public String name() { 32 | return "river-cassandra"; 33 | } 34 | 35 | @Override 36 | public String description() { 37 | return "River Cassandra plugin"; 38 | } 39 | 40 | /** 41 | * Registers the {@link CassandraRiverModule} 42 | * @param module the elasticsearch module used to handle rivers 43 | */ 44 | public void onModule(RiversModule module) { 45 | module.registerRiver("cassandra", CassandraRiverModule.class); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/river/cassandra/CassandraDB.java: -------------------------------------------------------------------------------- 1 | /*Copyright 2013, eBay Software Foundation 2 | Authored by Utkarsh Sengar 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package org.elasticsearch.river.cassandra; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | import me.prettyprint.cassandra.model.AllOneConsistencyLevelPolicy; 23 | import me.prettyprint.cassandra.serializers.StringSerializer; 24 | import me.prettyprint.cassandra.service.CassandraHostConfigurator; 25 | import me.prettyprint.cassandra.service.FailoverPolicy; 26 | import me.prettyprint.hector.api.Cluster; 27 | import me.prettyprint.hector.api.Keyspace; 28 | import me.prettyprint.hector.api.beans.ColumnSlice; 29 | import me.prettyprint.hector.api.beans.HColumn; 30 | import me.prettyprint.hector.api.beans.OrderedRows; 31 | import me.prettyprint.hector.api.beans.Row; 32 | import me.prettyprint.hector.api.factory.HFactory; 33 | import me.prettyprint.hector.api.query.RangeSlicesQuery; 34 | 35 | 36 | public class CassandraDB { 37 | private static final StringSerializer STR = StringSerializer.get(); 38 | 39 | public static final String COMPOSITE_SEPARATOR = "~"; 40 | 41 | private Cluster cluster; 42 | private Keyspace keyspace; 43 | private int batchSize; 44 | 45 | private static HashMap instances = new HashMap(); 46 | 47 | private CassandraDB(String hosts, String username, String password, String clustername, String keyspace) { 48 | init(clustername, hosts, username, password, keyspace); 49 | } 50 | 51 | protected void init(String clustername, String hosts, String username, String password, String keyspace) { 52 | CassandraHostConfigurator hostconfig = new CassandraHostConfigurator(hosts); 53 | hostconfig.setRetryDownedHosts(true); 54 | hostconfig.setRetryDownedHostsDelayInSeconds(5); 55 | hostconfig.setRetryDownedHostsQueueSize(-1); // no bounds 56 | this.cluster = HFactory.getOrCreateCluster(clustername, hostconfig); 57 | 58 | Map credentials = new HashMap(); 59 | if (username != null && username.length() > 0) { 60 | credentials.put("username", username); 61 | credentials.put("password", password); 62 | } 63 | 64 | this.keyspace = HFactory.createKeyspace( 65 | keyspace, 66 | cluster, 67 | new AllOneConsistencyLevelPolicy(), 68 | FailoverPolicy.ON_FAIL_TRY_ALL_AVAILABLE); 69 | } 70 | 71 | public static CassandraDB getInstance(String hosts, String username, String password, String clustername, String keyspace) { 72 | String instanceKey = clustername + "|" + keyspace; // TODO A cleaner key would be nice 73 | CassandraDB instance = null; 74 | 75 | synchronized (CassandraDB.class) { 76 | instance = instances.get(instanceKey); 77 | if (instance == null) { 78 | instance = new CassandraDB(hosts, username, password, clustername, keyspace); 79 | instances.put(instanceKey, instance); 80 | } 81 | } 82 | 83 | return instance; 84 | } 85 | 86 | public int getBatchSize() { 87 | return batchSize; 88 | } 89 | 90 | public void setBatchSize(int batchSize) { 91 | this.batchSize = batchSize; 92 | } 93 | 94 | public CassandraCFData getCFData(String columnFamily, String start, int limit) { 95 | int columnLimit = 100; 96 | CassandraCFData data = new CassandraCFData(); 97 | String lastEnd = null; 98 | 99 | Map> cfData = new HashMap>(); 100 | RangeSlicesQuery query = HFactory.createRangeSlicesQuery(keyspace, STR, STR, STR); 101 | query.setColumnFamily(columnFamily); 102 | query.setKeys(start, ""); 103 | query.setRange("", "", false, columnLimit); 104 | query.setRowCount(limit); 105 | OrderedRows rows = query.execute().get(); 106 | if (rows.getCount() != 1) { 107 | lastEnd = rows.peekLast().getKey(); 108 | data.start = lastEnd; 109 | } else { 110 | data.start = null; 111 | return data; 112 | } 113 | 114 | for(Row row : rows.getList()){ 115 | Map columnMap = new HashMap(); 116 | ColumnSlice columnData = row.getColumnSlice(); 117 | for (HColumn column : columnData.getColumns()){ 118 | columnMap.put(column.getName(), column.getValue()); 119 | } 120 | 121 | cfData.put(row.getKey(), columnMap); 122 | } 123 | 124 | data.rowColumnMap = cfData; 125 | return data; 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/test/java/org/elasticsearch/river/cassandra/CassandraRiverIntegrationTest.java: -------------------------------------------------------------------------------- 1 | /*Copyright 2013, eBay Software Foundation 2 | Authored by Utkarsh Sengar 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package org.elasticsearch.river.cassandra; 18 | 19 | import java.io.File; 20 | import java.io.IOException; 21 | import java.util.Map; 22 | 23 | import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest; 24 | import org.elasticsearch.client.Client; 25 | import org.elasticsearch.client.Requests; 26 | import org.elasticsearch.common.collect.ImmutableMap; 27 | import org.elasticsearch.common.settings.ImmutableSettings; 28 | import org.elasticsearch.common.settings.Settings; 29 | import org.elasticsearch.common.xcontent.XContentBuilder; 30 | import org.elasticsearch.common.xcontent.XContentFactory; 31 | import org.elasticsearch.node.Node; 32 | import org.elasticsearch.node.NodeBuilder; 33 | import org.slf4j.Logger; 34 | import org.slf4j.LoggerFactory; 35 | import org.testng.annotations.AfterClass; 36 | import org.testng.annotations.BeforeClass; 37 | import org.testng.annotations.BeforeMethod; 38 | import org.testng.annotations.Test; 39 | 40 | 41 | public class CassandraRiverIntegrationTest { 42 | 43 | Logger logger = LoggerFactory.getLogger(CassandraRiverIntegrationTest.class); 44 | private Node esNode; 45 | private Client esClient; 46 | 47 | private static final File DATA_DIR; 48 | private static final File ES_DATA_DIR; 49 | private static final boolean ES_HTTP_ENABLED = true; 50 | 51 | static { 52 | String tmpDir = System.getProperty("tempDir", System.getProperty("java.io.tmpdir")); 53 | if (tmpDir == null) { 54 | throw new RuntimeException("No system property 'tempDir' or 'java.io.tmpdir' defined"); 55 | } 56 | DATA_DIR = new File(tmpDir, "test-" + CassandraRiverIntegrationTest.class.getSimpleName() + "-" + System.currentTimeMillis()); 57 | ES_DATA_DIR = new File(DATA_DIR, "elasticsearch"); 58 | } 59 | 60 | @BeforeClass 61 | public void beforeClass() throws Exception { 62 | //fires elasticsearch node 63 | Settings settings = ImmutableSettings.settingsBuilder() 64 | .put("path.data", ES_DATA_DIR.getAbsolutePath()) 65 | .put("http.enabled", ES_HTTP_ENABLED) 66 | .build(); 67 | esNode = NodeBuilder.nodeBuilder().clusterName("cassandra-river-test").local(true).settings(settings).build(); 68 | esNode.start(); 69 | esClient = esNode.client(); 70 | } 71 | 72 | @AfterClass 73 | public void afterClass() throws Exception { 74 | esClient.close(); 75 | esNode.close(); 76 | } 77 | 78 | @BeforeMethod 79 | public void beforeMethod() throws IOException { 80 | //removes data from elasticsearch (both registered river if existing and imported data) 81 | String[] indices = esClient.admin().cluster().state(new ClusterStateRequest().local(true)) 82 | .actionGet().getState().getMetaData().getConcreteAllIndices(); 83 | esClient.admin().indices().prepareDelete(indices).execute().actionGet(); 84 | } 85 | 86 | @Test 87 | public void testImportDefaultValues() throws Exception { 88 | registerRiver(); 89 | } 90 | 91 | @Test 92 | public void testImportWithRows() throws Exception { 93 | registerRiver(ImmutableMap.of("rows", 20), null); 94 | } 95 | 96 | private void registerRiver() throws Exception { 97 | registerRiver(null, null, null); 98 | } 99 | 100 | private void registerRiver(Map solrConfig, 101 | Map indexConfig) throws Exception { 102 | registerRiver(solrConfig, indexConfig, null); 103 | } 104 | 105 | private void registerRiver(Map solrConfig, 106 | Map indexConfig, 107 | Map mainConfig) throws Exception { 108 | 109 | registerRiver(solrConfig, indexConfig, mainConfig, null); 110 | } 111 | 112 | private void registerRiver(Map solrConfig, 113 | Map indexConfig, 114 | Map mainConfig, 115 | Map transformConfig) throws Exception { 116 | XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint().startObject(); 117 | builder.field("type", "cassandra"); 118 | 119 | if (mainConfig != null) { 120 | for (Map.Entry entry : mainConfig.entrySet()) { 121 | builder.field(entry.getKey(), entry.getValue()); 122 | } 123 | } 124 | 125 | builder.startObject("cassandra"); 126 | builder.field("url", "http://localhost:8983/cassandra-river/"); 127 | if (solrConfig != null) { 128 | for (Map.Entry entry : solrConfig.entrySet()) { 129 | builder.field(entry.getKey(), entry.getValue()); 130 | } 131 | } 132 | builder.endObject(); 133 | 134 | if (indexConfig != null) { 135 | builder.startObject("index"); 136 | for (Map.Entry entry : indexConfig.entrySet()) { 137 | builder.field(entry.getKey(), entry.getValue()); 138 | } 139 | builder.endObject(); 140 | } 141 | 142 | if (transformConfig != null) { 143 | builder.startObject("transform"); 144 | for (Map.Entry entry : transformConfig.entrySet()) { 145 | builder.field(entry.getKey(), entry.getValue()); 146 | } 147 | builder.endObject(); 148 | } 149 | 150 | builder.endObject(); 151 | 152 | logger.debug("Registering river \n{}", builder.string()); 153 | 154 | esClient.index(Requests.indexRequest("_river").type("cassandra_river").id("_meta").source(builder)).actionGet(); 155 | } 156 | } -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.elasticsearch 8 | elasticsearch-river-cassandra 9 | 1.0.0-SNAPSHOT 10 | jar 11 | Cassandra River for ElasticSearch 12 | 2013 13 | 14 | 15 | The Apache Software License, Version 2.0 16 | http://www.apache.org/licenses/LICENSE-2.0.txt 17 | repo 18 | 19 | 20 | 21 | 22 | 0.20.5 23 | 4.2.1 24 | UTF-8 25 | 26 | 27 | 28 | 29 | org.apache.cassandra 30 | cassandra-all 31 | 1.2.0 32 | 33 | 34 | org.hectorclient 35 | hector-core 36 | 1.1-1 37 | 38 | 39 | org.apache.thrift 40 | libthrift 41 | 0.9.0 42 | 43 | 44 | org.apache.cassandra 45 | cassandra-thrift 46 | 1.2.0 47 | 48 | 49 | commons-pool 50 | commons-pool 51 | 1.5.3 52 | 53 | 54 | com.google.guava 55 | guava 56 | 14.0 57 | 58 | 59 | org.elasticsearch 60 | elasticsearch 61 | ${elasticsearch.version} 62 | provided 63 | 64 | 65 | org.slf4j 66 | slf4j-log4j12 67 | 1.6.6 68 | 69 | 70 | log4j 71 | log4j 72 | 1.2.16 73 | provided 74 | 75 | 76 | 77 | org.apache.httpcomponents 78 | httpclient 79 | ${httpclient.version} 80 | 81 | 82 | org.apache.httpcomponents 83 | httpmime 84 | ${httpclient.version} 85 | 86 | 87 | com.fasterxml.jackson.core 88 | jackson-databind 89 | 2.0.6 90 | 91 | 92 | 93 | org.testng 94 | testng 95 | 6.8 96 | test 97 | 98 | 99 | org.hamcrest 100 | hamcrest-core 101 | 102 | 103 | junit 104 | junit 105 | 106 | 107 | 108 | 109 | commons-io 110 | commons-io 111 | 2.4 112 | test 113 | 114 | 115 | commons-lang 116 | commons-lang 117 | 2.6 118 | 119 | 120 | org.mortbay.jetty 121 | jetty 122 | 6.1.26 123 | test 124 | 125 | 126 | 127 | 128 | 129 | 130 | org.apache.maven.plugins 131 | maven-compiler-plugin 132 | 2.5.1 133 | 134 | 1.6 135 | 1.6 136 | 137 | 138 | 139 | org.apache.maven.plugins 140 | maven-surefire-plugin 141 | 2.12.4 142 | 143 | 144 | **/*Test.java 145 | 146 | 147 | 148 | 149 | org.apache.maven.plugins 150 | maven-source-plugin 151 | 2.2.1 152 | 153 | 154 | attach-sources 155 | 156 | jar 157 | 158 | 159 | 160 | 161 | 162 | maven-assembly-plugin 163 | 2.3 164 | 165 | false 166 | ${project.build.directory}/releases/ 167 | 168 | ${basedir}/src/main/assemblies/plugin.xml 169 | 170 | 171 | 172 | 173 | package 174 | 175 | single 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/river/cassandra/CassandraRiver.java: -------------------------------------------------------------------------------- 1 | /*Copyright 2013, eBay Software Foundation 2 | Authored by Utkarsh Sengar 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package org.elasticsearch.river.cassandra; 18 | 19 | import static org.elasticsearch.client.Requests.indexRequest; 20 | 21 | import java.util.Map; 22 | import java.util.concurrent.ExecutorService; 23 | import java.util.concurrent.Executors; 24 | import java.util.concurrent.ThreadFactory; 25 | 26 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 27 | import org.elasticsearch.client.Client; 28 | import org.elasticsearch.common.UUID; 29 | import org.elasticsearch.common.inject.Inject; 30 | import org.elasticsearch.common.util.concurrent.ThreadFactoryBuilder; 31 | import org.elasticsearch.common.xcontent.support.XContentMapValues; 32 | import org.elasticsearch.river.AbstractRiverComponent; 33 | import org.elasticsearch.river.River; 34 | import org.elasticsearch.river.RiverName; 35 | import org.elasticsearch.river.RiverSettings; 36 | import org.elasticsearch.script.ScriptService; 37 | 38 | public class CassandraRiver extends AbstractRiverComponent implements River { 39 | 40 | private final Client client; 41 | 42 | private ExecutorService threadExecutor; 43 | private volatile boolean closed; 44 | 45 | //Cassandra settings 46 | private final String hosts; 47 | private final String username; 48 | private final String password; 49 | 50 | private final String clusterName; 51 | private final String keyspace; 52 | private final String columnFamily; 53 | private final int batchSize; 54 | 55 | //Index settings 56 | private final String typeName; 57 | private final String indexName; 58 | static final String DEFAULT_UNIQUE_KEY = "id"; 59 | 60 | 61 | @Inject 62 | protected CassandraRiver(RiverName riverName, RiverSettings riverSettings, Client client, ScriptService scriptService) { 63 | super(riverName, riverSettings); 64 | this.client = client; 65 | 66 | if (riverSettings.settings().containsKey("cassandra")) { 67 | @SuppressWarnings("unchecked") 68 | Map couchSettings = (Map) settings.settings().get("cassandra"); 69 | this.clusterName = XContentMapValues.nodeStringValue(couchSettings.get("cluster_name"), "DEFAULT_CLUSTER"); 70 | this.keyspace = XContentMapValues.nodeStringValue(couchSettings.get("keyspace"), "DEFAULT_KS"); 71 | this.columnFamily = XContentMapValues.nodeStringValue(couchSettings.get("column_family"), "DEFAULT_CF"); 72 | this.batchSize = XContentMapValues.nodeIntegerValue(couchSettings.get("batch_size"), 1000); 73 | this.hosts = XContentMapValues.nodeStringValue(couchSettings.get("hosts"), "host1:9161,host2:9161"); 74 | this.username = XContentMapValues.nodeStringValue(couchSettings.get("username"), "USERNAME"); 75 | this.password = XContentMapValues.nodeStringValue(couchSettings.get("password"), "P$$WD"); 76 | } else { 77 | /* 78 | * Set default values 79 | */ 80 | this.clusterName = "DEFAULT_CLUSTER"; 81 | this.keyspace = "DEFAULT_KS"; 82 | this.columnFamily = "DEFAULT_CF"; 83 | this.batchSize = 1000; 84 | this.hosts = "host1:9161,host2:9161"; 85 | this.username = "USERNAME"; 86 | this.password = "P$$WD"; 87 | } 88 | 89 | if (riverSettings.settings().containsKey("index")) { 90 | @SuppressWarnings("unchecked") 91 | Map couchSettings = (Map) settings.settings().get("index"); 92 | this.indexName = XContentMapValues.nodeStringValue(couchSettings.get("index"), "DEFAULT_INDEX_NAME"); 93 | this.typeName = XContentMapValues.nodeStringValue(couchSettings.get("type"), "DEFAULT_TYPE_NAME"); 94 | 95 | } else { 96 | this.indexName = "DEFAULT_INDEX_NAME"; 97 | this.typeName = "DEFAULT_TYPE_NAME"; 98 | } 99 | } 100 | 101 | 102 | @Override 103 | public void start() { 104 | ThreadFactory daemonThreadFactory = new ThreadFactoryBuilder().setNameFormat("Queue-Indexer-thread-%d").setDaemon(false).build(); 105 | threadExecutor = Executors.newFixedThreadPool(10, daemonThreadFactory); 106 | 107 | logger.info("Starting cassandra river"); 108 | CassandraDB db = CassandraDB.getInstance(this.hosts, this.username, this.password, this.clusterName, this.keyspace); 109 | String start = ""; 110 | while(true){ 111 | if (closed) { 112 | return; 113 | } 114 | CassandraCFData cassandraData = db.getCFData(columnFamily, start, 1000); 115 | start = cassandraData.start; 116 | threadExecutor.execute(new Indexer(this.batchSize, 117 | this.typeName, 118 | this.indexName, 119 | cassandraData)); 120 | } 121 | } 122 | 123 | 124 | @Override 125 | public void close() { 126 | if (closed) { 127 | return; 128 | } 129 | logger.info("closing cassandra river"); 130 | closed = true; 131 | threadExecutor.shutdownNow(); 132 | } 133 | 134 | private class Indexer implements Runnable { 135 | private final int batchSize; 136 | private final CassandraCFData keys; 137 | private final String typeName; 138 | private final String indexName; 139 | 140 | public Indexer(int batchSize, String typeName, String indexName, CassandraCFData keys){ 141 | this.batchSize = batchSize; 142 | this.typeName = typeName; 143 | this.indexName = indexName; 144 | this.keys = keys; 145 | } 146 | 147 | @Override 148 | public void run() { 149 | logger.info("Starting thread with {} keys", this.keys.rowColumnMap.size()); 150 | if (closed) { 151 | return; 152 | } 153 | 154 | BulkRequestBuilder bulk = client.prepareBulk(); 155 | for(String key : this.keys.rowColumnMap.keySet()){ 156 | 157 | try { 158 | String id = UUID.nameUUIDFromBytes(key.getBytes()).toString(); 159 | bulk.add(indexRequest(this.indexName).type(this.typeName) 160 | .id(id) 161 | .source(this.keys.rowColumnMap.get(key))); 162 | } catch (Exception e) { 163 | logger.error("failed to entry to bulk indexing"); 164 | } 165 | 166 | if(bulk.numberOfActions() >= this.batchSize){ 167 | saveToEs(bulk); 168 | bulk = client.prepareBulk(); 169 | } 170 | } 171 | } 172 | 173 | /* 174 | * Persists data to elastic search 175 | */ 176 | private boolean saveToEs(BulkRequestBuilder bulk) { 177 | logger.info("Inserting {} keys in ES", bulk.numberOfActions()); 178 | 179 | try { 180 | bulk.execute().addListener(new Runnable() { 181 | @Override 182 | public void run() { 183 | logger.info("Processing done!"); 184 | } 185 | }); 186 | } catch (Exception e) { 187 | logger.error("failed to execute bulk", e); 188 | return false; 189 | } 190 | 191 | return true; 192 | } 193 | } 194 | } -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | --------------------------------------------------------------------------------