├── .gitignore ├── .gitmodules ├── .idea ├── .name ├── compiler.xml ├── copyright │ └── profiles_settings.xml ├── encodings.xml ├── libraries │ ├── Maven__ch_qos_logback_logback_classic_1_0_9.xml │ ├── Maven__ch_qos_logback_logback_core_1_0_9.xml │ ├── Maven__com_carrotsearch_randomizedtesting_junit4_ant_2_1_3.xml │ ├── Maven__com_carrotsearch_randomizedtesting_randomizedtesting_runner_2_1_3.xml │ ├── Maven__com_google_guava_guava_17_0.xml │ ├── Maven__com_google_protobuf_protobuf_java_2_5_0.xml │ ├── Maven__com_spatial4j_spatial4j_0_4_1.xml │ ├── Maven__log4j_log4j_1_2_16.xml │ ├── Maven__org_antlr_antlr_runtime_3_5.xml │ ├── Maven__org_apache_ant_ant_1_8_2.xml │ ├── Maven__org_apache_lucene_lucene_analyzers_common_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_codecs_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_core_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_grouping_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_highlighter_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_join_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_memory_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_misc_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_queries_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_queryparser_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_sandbox_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_spatial_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_suggest_4_9_0.xml │ ├── Maven__org_apache_lucene_lucene_test_framework_4_9_0.xml │ ├── Maven__org_elasticsearch_elasticsearch_1_3_2.xml │ ├── Maven__org_elasticsearch_elasticsearch_test_jar_tests_1_3_2.xml │ ├── Maven__org_ow2_asm_asm_4_1.xml │ ├── Maven__org_ow2_asm_asm_commons_4_1.xml │ └── Maven__org_slf4j_slf4j_api_1_6_6.xml ├── misc.xml ├── modules.xml ├── scopes │ └── scope_settings.xml └── vcs.xml ├── README.md ├── pom.xml └── src └── main ├── assemblies └── plugin.xml ├── java ├── com └── org │ └── elasticsearch │ ├── plugin │ └── river │ │ └── rethinkdb │ │ └── RethinkDBRiverPlugin.java │ └── river │ └── rethinkdb │ ├── ChangeRecord.java │ ├── ChangeRecords.java │ ├── FeedWorker.java │ ├── RethinkDBRiver.java │ └── RethinkDBRiverModule.java ├── proto └── resources └── es-plugin.properties /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .idea/workspace.xml 3 | .idea/tasks.xml 4 | .idea/dictionaries 5 | .idea/libraries 6 | /*.iml 7 | 8 | # Created by http://www.gitignore.io 9 | 10 | ### Maven ### 11 | target/ 12 | pom.xml.tag 13 | pom.xml.releaseBackup 14 | pom.xml.versionsBackup 15 | pom.xml.next 16 | release.properties 17 | 18 | 19 | ### Java ### 20 | *.class 21 | 22 | # Mobile Tools for Java (J2ME) 23 | .mtj.tmp/ 24 | 25 | # Package Files # 26 | *.jar 27 | *.war 28 | *.ear 29 | 30 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 31 | hs_err_pid* 32 | 33 | 34 | ### Emacs ### 35 | # -*- mode: gitignore; -*- 36 | *~ 37 | \#*\# 38 | /.emacs.desktop 39 | /.emacs.desktop.lock 40 | *.elc 41 | auto-save-list 42 | tramp 43 | .\#* 44 | 45 | # Org-mode 46 | .org-id-locations 47 | *_archive 48 | 49 | # flymake-mode 50 | *_flymake.* 51 | 52 | # eshell files 53 | /eshell/history 54 | /eshell/lastdir 55 | 56 | # elpa packages 57 | /elpa/ 58 | 59 | # reftex files 60 | *.rel 61 | 62 | # AUCTeX auto folder 63 | /auto/ 64 | 65 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "rethink-java-driver"] 2 | path = rethink-java-driver 3 | url = git@github.com:npiv/rethink-java-driver 4 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | elasticsearch-river-rethinkdb -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__ch_qos_logback_logback_classic_1_0_9.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__ch_qos_logback_logback_core_1_0_9.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_carrotsearch_randomizedtesting_junit4_ant_2_1_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_carrotsearch_randomizedtesting_randomizedtesting_runner_2_1_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_google_guava_guava_17_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_google_protobuf_protobuf_java_2_5_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_spatial4j_spatial4j_0_4_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__log4j_log4j_1_2_16.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_antlr_antlr_runtime_3_5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_ant_ant_1_8_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_analyzers_common_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_codecs_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_core_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_grouping_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_highlighter_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_join_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_memory_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_misc_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_queries_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_queryparser_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_sandbox_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_spatial_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_suggest_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_lucene_lucene_test_framework_4_9_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_elasticsearch_elasticsearch_1_3_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_elasticsearch_elasticsearch_test_jar_tests_1_3_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_ow2_asm_asm_4_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_ow2_asm_asm_commons_4_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_slf4j_slf4j_api_1_6_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 46 | 47 | 48 | 49 | 50 | 51 | 58 | 59 | 60 | 64 | 65 | 66 | 77 | 78 | 91 | 92 | 93 | 111 | 118 | 119 | 120 | localhost 121 | 5050 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/scopes/scope_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch RethinkDB River 2 | 3 | **NOTE: This river is deprecated and will no longer work with RethinkDB 2.2 and higher. It is recommended you use the [RethinkDB logstash input](https://github.com/rethinkdb/logstash-input-rethinkdb) instead.** 4 | 5 | This is a plugin for [Elasticsearch][] that pulls in documents from [RethinkDB][], then indexes new/updated/deleted documents in real time. 6 | Elasticsearch gives you the ability to do [full-text search][]. 7 | 8 | You might want this if you'd like to be able to search RethinkDB documents using queries like: 9 | - get all documents that contain the phrase X 10 | - retrieve the first 10 docs that roughly match the phrase X, ignoring common words like "the" and "a" 11 | 12 | [Elasticsearch]: http://www.elasticsearch.org 13 | [RethinkDB]: http://rethinkdb.com 14 | [full-text search]: http://en.wikipedia.org/wiki/Full_text_search 15 | 16 | ## Installation 17 | 18 | First off, you need [Elasticsearch 1.3 or 1.4][] running on [Java 8][] for this to work. 19 | Once that's in place, you can install the plugin with: 20 | 21 | [Elasticsearch 1.3 or 1.4]: http://www.elasticsearch.org/overview/elkdownloads/ 22 | [Java 8]: http://www.oracle.com/technetwork/java/javase/downloads/index.html 23 | 24 | ``` 25 | elasticsearch-plugin --install river-rethinkdb --url http://goo.gl/JmMwTf 26 | ``` 27 | 28 | 29 | __Note__: Depending on how you've installed Elasticsearch, you may need to become the elasticsearch user to install the plugin. 30 | 31 | ## Quickstart 32 | 33 | If you want to index the table `posts` in the database `blog`, this is all you need to do: 34 | 35 | ```bash 36 | $ curl -XPUT localhost:9200/_river/rethinkdb/_meta -d '{ 37 | "type":"rethinkdb", 38 | "rethinkdb": { 39 | "databases": {"blog": {"posts": {"backfill": true}}}, 40 | "host": "localhost", 41 | "port": 28015 42 | }}' 43 | ``` 44 | 45 | Now you'll have a new index called `blog` and a type called `posts` which you can query: 46 | 47 | ```bash 48 | $ curl localhost:9200/blog/posts/_search?q=*:* 49 | ``` 50 | 51 | Elasticsearch's default port is 9200. 52 | RethinkDB's default port is 28015. 53 | You may want to brush up on [how to query Elasticsearch][]. 54 | 55 | [how to query Elasticsearch]: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-search.html 56 | 57 | ## Details 58 | 59 | Rivers are a kind of plugin for Elasticsearch (ES) that sync external data sources with Elasticsearch's indexes. 60 | ES indexes are similar to RethinkDB's databases, and ES types are similar to RethinkDB's tables. 61 | Every index can have zero or more types, and every type can have zero or more documents. 62 | To configure the river, you create a document in the `_river` index, which is a magical index ES watches for configuration info. 63 | 64 | ```bash 65 | $ curl -XPUT localhost:9200/_river/rethinkdb/_meta 66 | ``` 67 | 68 | This creates a new document in the `rethinkdb` type with the id `_meta`. 69 | At a minimum, the `_meta` document needs a key with the `type` field set to `"rethinkdb"`. 70 | You'll also want to put a `rethinkdb` key with a document that contains these keys: 71 | 72 | - `host`: RethinkDB server hostname, defaults to `"localhost"` 73 | - `port`: RethinkDB server port, defaults to 28015, 74 | - `auth_key`: RethinkDB server auth key, defaults to the empty string 75 | - `databases`: A document containing one subdocument per database 76 | - ``: The name of a database in RethinkDB. Must have a table specified as well. 77 | - ``: The name of a RethinkDB table to watch 78 | - `backfill`: Whether to backfill existing documents or just watch for new ones, defaults to true. 79 | - `index`: What ES index to send documents from this table to, defaults to `` 80 | - `type`: What ES type to send documents from this table to, defaults to `` 81 | 82 | You can specify as many databases and tables to watch as you'd like. 83 | 84 | Here's a larger example that indexes `blog.posts` and `blog.comments` with the defaults plugged in: 85 | 86 | ```javascript 87 | // localhost:9200/_river/rethinkdb/_meta 88 | { 89 | "type": "rethinkdb", 90 | "rethinkdb": { 91 | "host": "localhost", 92 | "port": 28015, 93 | "auth_key": "", 94 | "databases": { 95 | "blog": { 96 | "posts": { 97 | "backfill": true, 98 | "index": "blog", 99 | "type": "posts", 100 | }, 101 | "comments": { 102 | "backfill": true, 103 | "index": "blog", 104 | "type": "comments", 105 | } 106 | } 107 | } 108 | } 109 | } 110 | ``` 111 | 112 | After the river backfills documents for a given table, it will change the `backfill` setting to `false`. 113 | This way, the next time the Elasticsearch server restarts, it won't re-pull all documents from RethinkDB again. 114 | 115 | ## OK, I've queried Elasticsearch, what do I do now? 116 | 117 | The documents are stored in Elasticsearch with the same id as the RethinkDB uses for it, so you can easily retrieve the original document. 118 | 119 | For example, if you query your lorem ipsum blog posts for any that have the word "cupiditate" in the body: 120 | 121 | ``` 122 | $ curl localhost:9200/blog/posts/_search?q=body:cupiditate 123 | ``` 124 | 125 | You'll get results that look like: 126 | 127 | ```javascript 128 | { 129 | "_shards": { 130 | "failed": 0, 131 | "successful": 1, 132 | "total": 1 133 | }, 134 | "hits": { 135 | "hits": [ 136 | { 137 | "_id": "261f4990-627b-4844-96ed-08b182121c5e", 138 | "_index": "blog", 139 | "_score": 1.0, 140 | "_source": { 141 | "body": "cupiditate quo est a modi nesciunt soluta\nipsa voluptas", 142 | "id": "261f4990-627b-4844-96ed-08b182121c5e", 143 | "title": "at nam consequatur ea labore ea harum", 144 | "userId": 10.0 145 | }, 146 | "_type": "posts" 147 | } 148 | ], 149 | "max_score": 1.0, 150 | "total": 1 151 | }, 152 | "timed_out": false, 153 | "took": 6 154 | } 155 | ``` 156 | 157 | Now, you can fetch the original document from RethinkDB using: 158 | 159 | ```python 160 | r.db('blog').table('posts').get('261f4990-627b-4844-96ed-08b182121c5e').run(conn) 161 | ``` 162 | 163 | 164 | 165 | ## Caveats 166 | 167 | Currently, there's no way to guarantee that no documents are lost if the river loses connection with the RethinkDB server. 168 | The only way to be sure is to backfill every time, and this will still miss deleted documents. 169 | In the future, RethinkDB will support changefeeds that accept a timestamp. 170 | When that is implemented, this plugin will be able to ensure no documents are lost during disconnections. 171 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | org.elasticsearch 7 | elasticsearch-river-rethinkdb 8 | 1.0.1 9 | jar 10 | Elasticsearch RethinkDB River plugin 11 | A River for indexing RethinkDB databases and staying synced 12 | https://github.com/rethinkdb/elasticsearch-river-rethinkdb/ 13 | 2014 14 | 15 | 16 | MIT 17 | http://www.opensource.org/licenses/mit-license.php 18 | repo 19 | 20 | 21 | 22 | https://github.com/rethinkdb/elasticsearch-river-rethinkdb/issues 23 | GitHub Issues 24 | 25 | 26 | scm:git:git@github.com:rethinkdb/elasticsearch-river-rethinkdb.git 27 | scm:git:git@github.com:rethinkdb/elasticsearch-river-rethinkdb.git 28 | http://github.com/rethinkdb/elasticsearch-river-rethinkdb 29 | 30 | 31 | 32 | josh@rethinkdb.com 33 | Josh Kuhn 34 | https://github.com/deontologician 35 | deontologician 36 | 37 | 38 | 39 | 40 | 1.3.2 41 | 0.3 42 | INFO 43 | 4.9.0 44 | github 45 | /usr/bin/protoc 46 | UTF-8 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | org.apache.lucene 59 | lucene-test-framework 60 | ${lucene.version} 61 | test 62 | 63 | 64 | 65 | org.elasticsearch 66 | elasticsearch 67 | ${elasticsearch.version} 68 | 69 | 70 | 71 | log4j 72 | log4j 73 | 1.2.27 74 | runtime 75 | 76 | 77 | org.elasticsearch 78 | elasticsearch 79 | ${elasticsearch.version} 80 | test-jar 81 | test 82 | 83 | 84 | com.google.protobuf 85 | protobuf-java 86 | 2.5.0 87 | 88 | 89 | org.slf4j 90 | slf4j-api 91 | 1.6.6 92 | 93 | 94 | 95 | com.google.guava 96 | guava 97 | 24.1.1 98 | 99 | 100 | 101 | 102 | ch.qos.logback 103 | logback-classic 104 | 1.0.9 105 | 106 | 107 | 108 | 109 | junit 110 | junit 111 | 4.11 112 | test 113 | 114 | 115 | 116 | org.easytesting 117 | fest-assert 118 | 1.4 119 | test 120 | 121 | 122 | 123 | 124 | 125 | 126 | src/main/resources 127 | true 128 | 129 | **/*.properties 130 | 131 | 132 | 133 | 134 | 135 | src/test/resources 136 | true 137 | 138 | elasticsearch.yml 139 | 140 | 141 | 142 | src/test/resources 143 | false 144 | 145 | log4j.xml 146 | 147 | 148 | 149 | 150 | 151 | 152 | org.codehaus.mojo 153 | failsafe-maven-plugin 154 | 2.4.3-alpha-1 155 | 156 | 157 | 158 | integration-test 159 | verify 160 | 161 | 162 | 163 | 164 | 165 | com.google.protobuf.tools 166 | maven-protoc-plugin 167 | 0.3.2 168 | 169 | true 170 | ${protoc.file} 171 | 172 | 173 | 174 | 175 | compile 176 | testCompile 177 | 178 | 179 | 180 | 181 | 182 | org.apache.maven.plugins 183 | maven-compiler-plugin 184 | 2.3.2 185 | 186 | 1.8 187 | 1.8 188 | 189 | 190 | 191 | org.apache.maven.plugins 192 | maven-source-plugin 193 | 2.1.2 194 | 195 | 196 | attach-sources 197 | 198 | jar 199 | 200 | 201 | 202 | 203 | 204 | maven-assembly-plugin 205 | 2.3 206 | 207 | false 208 | ${project.build.directory}/releases/ 209 | 210 | ${basedir}/src/main/assemblies/plugin.xml 211 | 212 | 213 | 214 | 215 | package 216 | 217 | single 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | -------------------------------------------------------------------------------- /src/main/assemblies/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | / 11 | true 12 | true 13 | 14 | org.elasticsearch:elasticsearch 15 | 16 | 17 | 18 | / 19 | true 20 | true 21 | runtime 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/main/java/com: -------------------------------------------------------------------------------- 1 | ../../../rethink-java-driver/src/main/java/com -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/plugin/river/rethinkdb/RethinkDBRiverPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Elasticsearch under one or more contributor 3 | * license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright 5 | * ownership. Elasticsearch licenses this file to you under 6 | * the Apache License, Version 2.0 (the "License"); you may 7 | * not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.elasticsearch.plugin.river.rethinkdb; 21 | 22 | import org.elasticsearch.common.inject.Inject; 23 | import org.elasticsearch.plugins.AbstractPlugin; 24 | import org.elasticsearch.river.RiversModule; 25 | import org.elasticsearch.river.rethinkdb.RethinkDBRiverModule; 26 | 27 | /** 28 | * 29 | */ 30 | public class RethinkDBRiverPlugin extends AbstractPlugin { 31 | 32 | @Inject 33 | public RethinkDBRiverPlugin() { 34 | } 35 | 36 | @Override 37 | public String name() { 38 | return "river-rethinkdb"; 39 | } 40 | 41 | @Override 42 | public String description() { 43 | return "River RethinkDB Plugin"; 44 | } 45 | 46 | public void onModule(RiversModule module) { 47 | module.registerRiver("rethinkdb", RethinkDBRiverModule.class); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/river/rethinkdb/ChangeRecord.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.river.rethinkdb; 2 | 3 | import com.rethinkdb.RethinkDB; 4 | import com.rethinkdb.ast.query.RqlQuery; 5 | 6 | import java.util.Map; 7 | 8 | 9 | public class ChangeRecord { 10 | public final String table; 11 | public final String db; 12 | public final boolean backfill; 13 | public final String targetIndex; 14 | public final String targetType; 15 | 16 | public ChangeRecord(String db, String table, Map options){ 17 | this.db = db; 18 | this.table = table; 19 | this.backfill = (boolean) options.getOrDefault("backfill", false); 20 | this.targetIndex = (String) options.getOrDefault("index", db); 21 | this.targetType = (String) options.getOrDefault("type", table); 22 | } 23 | 24 | @Override 25 | public String toString(){ 26 | return "ChangeRecord(" + db + "," + table + "," + 27 | (backfill ? "backfill,": "no backfill,") + 28 | (!targetIndex.equals(db) ? "index=" + targetIndex + ",": "") + 29 | (!targetType.equals(table) ? "type=" + targetType : ""); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/river/rethinkdb/ChangeRecords.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.river.rethinkdb; 2 | 3 | import java.util.*; 4 | 5 | public class ChangeRecords implements Iterable { 6 | 7 | public final int totalSize; 8 | private HashMap> hash; 9 | 10 | public ChangeRecords(Map>> dbs){ 11 | int size = 0; 12 | hash = new HashMap<>(); 13 | for(String dbName: dbs.keySet()){ 14 | for(String tableName: dbs.get(dbName).keySet()){ 15 | if (!hash.containsKey(dbName)){ 16 | hash.put(dbName, new HashMap<>()); 17 | } 18 | hash.get(dbName).put(tableName, new ChangeRecord(dbName, tableName, dbs.get(dbName).get(tableName))); 19 | size++; 20 | } 21 | } 22 | totalSize = size; 23 | } 24 | 25 | public ChangeRecord get(String db, String table){ 26 | return hash.get(db).get(table); 27 | } 28 | 29 | @Override 30 | public String toString(){ 31 | return "ChangeRecords(" + hash.toString() + ")"; 32 | } 33 | 34 | @Override 35 | public Iterator iterator(){ 36 | return getAll().iterator(); 37 | } 38 | 39 | public List getAll(){ 40 | ArrayList ret = new ArrayList<>(totalSize); 41 | for(HashMap tables: hash.values()){ 42 | ret.addAll(tables.values()); 43 | } 44 | return ret; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/river/rethinkdb/FeedWorker.java: -------------------------------------------------------------------------------- 1 | package org.elasticsearch.river.rethinkdb; 2 | 3 | import com.rethinkdb.Cursor; 4 | import com.rethinkdb.RethinkDB; 5 | import com.rethinkdb.RethinkDBConnection; 6 | import com.rethinkdb.RethinkDBException; 7 | import org.elasticsearch.action.bulk.BulkItemResponse; 8 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 9 | import org.elasticsearch.action.bulk.BulkResponse; 10 | import org.elasticsearch.client.Client; 11 | import org.elasticsearch.common.logging.ESLogger; 12 | import org.elasticsearch.common.logging.ESLoggerFactory; 13 | import org.elasticsearch.common.xcontent.XContentBuilder; 14 | 15 | import java.io.IOException; 16 | import java.util.HashSet; 17 | import java.util.Map; 18 | 19 | import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; 20 | 21 | 22 | class FeedWorker implements Runnable { 23 | 24 | private RethinkDBRiver river; 25 | private final RethinkDB r = RethinkDB.r; 26 | private ChangeRecord changeRecord; 27 | private RethinkDBConnection connection; 28 | private Cursor> cursor; 29 | private String primaryKey; 30 | private boolean backfillRequired; 31 | private Client client; 32 | private final ESLogger logger; 33 | private int backoff = 250; 34 | 35 | public FeedWorker(RethinkDBRiver river, ChangeRecord changeRecord, Client client){ 36 | this.river = river; 37 | this.changeRecord = changeRecord; 38 | this.backfillRequired = changeRecord.backfill; 39 | this.client = client; 40 | logger = ESLoggerFactory.getLogger("[" + changeRecord.db + "." + changeRecord.table + "] ", "river.rethinkdb.feedworker"); 41 | } 42 | 43 | private RethinkDBConnection connect(){ 44 | RethinkDBConnection conn = r.connect(river.hostname, river.port, river.authKey); 45 | conn.use(changeRecord.db); 46 | return conn; 47 | } 48 | 49 | private void close(){ 50 | if (cursor != null && !cursor.isClosed()){ 51 | try { 52 | cursor.close(); 53 | }catch(Exception e){} 54 | } 55 | if (connection != null && !connection.isClosed()){ 56 | try { 57 | connection.close(); 58 | }catch(Exception e){} 59 | } 60 | cursor = null; 61 | connection = null; 62 | } 63 | 64 | @Override 65 | public void run() { 66 | try { 67 | connection = connect(); 68 | primaryKey = getPrimaryKey(); 69 | while (!river.closed) { 70 | try { 71 | cursor = r.table(changeRecord.table).changes().runForCursor(connection); 72 | if (backfillRequired) { 73 | backfill(); 74 | } 75 | int counter = 0; 76 | while (cursor.hasNext()) { 77 | Map change = cursor.next(); 78 | updateES(change); 79 | counter++; 80 | if(counter % 10 == 0){ 81 | logger.info("Synced {} documents", counter); 82 | } 83 | } 84 | } catch (RethinkDBException e) { 85 | logger.error("Worker has a problem: " + e.getMessage()); 86 | if (isRecoverableError(e)) { 87 | logger.info("I think this is recoverable. Hang on a second..."); 88 | reconnect(); 89 | } else { 90 | logger.info("This probably isn't recoverable, bailing."); 91 | throw e; 92 | } 93 | } 94 | } 95 | } catch (InterruptedException ie) { 96 | // We are just being shut down by the river, don't do anything 97 | } catch (Exception e){ 98 | if(!river.closed) { 99 | logger.error("failed due to exception", e); 100 | } 101 | } finally { 102 | logger.info("thread shutting down"); 103 | close(); 104 | } 105 | } 106 | 107 | private boolean updateES(Map change) { 108 | Map newVal = (Map) change.get("new_val"); 109 | Map oldVal = (Map) change.get("old_val"); 110 | if(newVal != null) { 111 | client.prepareIndex( 112 | changeRecord.targetIndex, 113 | changeRecord.targetType, 114 | newVal.get(primaryKey).toString()) 115 | .setSource(newVal) 116 | .execute(); 117 | return false; 118 | }else{ 119 | client.prepareDelete( 120 | changeRecord.targetIndex, 121 | changeRecord.targetType, 122 | oldVal.get(primaryKey).toString()) 123 | .execute(); 124 | return true; 125 | } 126 | } 127 | 128 | private int synchronizeBulk(BulkRequestBuilder bulkRequest, HashSet failureReasons) { 129 | int failed = 0; 130 | BulkResponse response = bulkRequest.get(); 131 | if (response.hasFailures()) { 132 | logger.error("Encountered errors backfilling"); 133 | logger.error(response.buildFailureMessage()); 134 | for(BulkItemResponse ir : response.getItems()){ 135 | if (ir.isFailed()) { 136 | failed++; 137 | failureReasons.add(ir.getFailureMessage()); 138 | } 139 | } 140 | } 141 | return failed; 142 | } 143 | 144 | private void backfill() throws IOException { 145 | RethinkDBConnection backfillConnection = r.connect(river.hostname, river.port, river.authKey); 146 | backfillConnection.use(changeRecord.db); 147 | try { 148 | logger.info("Beginning backfill of documents"); 149 | // totalSize is purely for the purposes of printing progress, and may be inaccurate since documents can be 150 | // inserted while we're backfilling 151 | int totalSize = r.table(changeRecord.table).count().run(backfillConnection).intValue(); 152 | BulkRequestBuilder bulkRequest = client.prepareBulk(); 153 | int attempted = 0, failed = 0; 154 | HashSet failureReasons = new HashSet<>(); 155 | int oldDecile = 0, newDecile; 156 | Cursor cursor = r.table(changeRecord.table).runForCursor(backfillConnection); 157 | while (cursor.hasNext()){ 158 | Map doc = (Map) cursor.next(); 159 | newDecile = (attempted * 100) / totalSize / 10; 160 | if (newDecile != oldDecile) { 161 | logger.info("backfill {}0% complete ({} documents)", newDecile, attempted); 162 | oldDecile = newDecile; 163 | } 164 | if (attempted > 0 && attempted % 1000 == 0) { 165 | failed += synchronizeBulk(bulkRequest, failureReasons); 166 | bulkRequest = client.prepareBulk(); 167 | } 168 | bulkRequest.add(client.prepareIndex( 169 | changeRecord.targetIndex, 170 | changeRecord.targetType, 171 | doc.get(primaryKey).toString()) 172 | .setSource(doc) 173 | ); 174 | attempted += 1; 175 | } 176 | if (bulkRequest.numberOfActions() > 0) { 177 | failed += synchronizeBulk(bulkRequest, failureReasons); 178 | } 179 | if (failed > 0) { 180 | logger.info("Attempted to backfill {} items, {} succeeded and {} failed.", 181 | attempted, attempted - failed, failed); 182 | logger.info("Unique failure reasons were: {}", failureReasons.toString()); 183 | backfillRequired = true; 184 | } else { 185 | logger.info("Backfilled {} items. Turning off backfill in settings", attempted); 186 | backfillRequired = false; 187 | } 188 | XContentBuilder builder = jsonBuilder() 189 | .startObject() 190 | .startObject("rethinkdb") 191 | .startObject("databases") 192 | .startObject(changeRecord.db) 193 | .startObject(changeRecord.table) 194 | .field("backfill", backfillRequired) 195 | .endObject() 196 | .endObject() 197 | .endObject() 198 | .endObject() 199 | .endObject(); 200 | client.prepareUpdate("_river", river.riverName().name(), "_meta") 201 | .setRetryOnConflict(river.changeRecords.totalSize + 1) // only other backfilling threads should conflict 202 | .setDoc(builder) 203 | .execute(); 204 | backfillConnection.close(); 205 | } finally { 206 | backfillConnection.close(); 207 | } 208 | } 209 | 210 | @SuppressWarnings("unchecked") 211 | private String getPrimaryKey() { 212 | Map tableInfo = (Map) r.db(changeRecord.db).table(changeRecord.table).info().run(connection); 213 | return tableInfo.get("primary_key").toString(); 214 | } 215 | 216 | private boolean isRecoverableError(RethinkDBException exc){ 217 | String msg = exc.getMessage(); 218 | return !river.closed && (// Don't try to recover if the River is shutting down 219 | msg.matches(".*?Master for shard \\[.*\\) not available.*") // happens immediately after the db starts up, temporary 220 | || msg.matches(".*?Error receiving data.*") // happens when the database shuts down while we're waiting 221 | || msg.matches(".*?Query interrupted.*") // happens when a query is killed? maybe. 222 | || msg.matches(".*?Broken pipe.*") 223 | ); 224 | } 225 | 226 | private void reconnect() throws InterruptedException { 227 | Thread.sleep(backoff); 228 | logger.info("Attempting to reconnect to {}:{}", river.hostname, river.port); 229 | for(backoff = Math.min(backoff*2, 30000);;backoff = Math.min(backoff*2, 30000)){ 230 | try { 231 | close(); 232 | connection = connect(); 233 | }catch(RethinkDBException e){ 234 | logger.error("Reconnect failed, waiting {}ms before trying again", backoff); 235 | Thread.sleep(backoff); 236 | continue; 237 | } 238 | logger.info("Reconnection successful."); 239 | break; 240 | } 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/river/rethinkdb/RethinkDBRiver.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Elasticsearch under one or more contributor 3 | * license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright 5 | * ownership. Elasticsearch licenses this file to you under 6 | * the Apache License, Version 2.0 (the "License"); you may 7 | * not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.elasticsearch.river.rethinkdb; 21 | 22 | import com.rethinkdb.RethinkDB; 23 | import org.elasticsearch.client.Client; 24 | import org.elasticsearch.common.inject.Inject; 25 | import org.elasticsearch.common.util.concurrent.EsExecutors; 26 | import org.elasticsearch.river.AbstractRiverComponent; 27 | import org.elasticsearch.river.River; 28 | import org.elasticsearch.river.RiverName; 29 | import org.elasticsearch.river.RiverSettings; 30 | 31 | import java.util.*; 32 | 33 | import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; 34 | 35 | /** 36 | * 37 | */ 38 | public class RethinkDBRiver extends AbstractRiverComponent implements River { 39 | 40 | private static final RethinkDB r = RethinkDB.r; 41 | private final Client client; 42 | 43 | public final String hostname; 44 | public final int port; 45 | public final String authKey; 46 | public final ChangeRecords changeRecords; 47 | 48 | private volatile List threads; 49 | public volatile boolean closed; 50 | 51 | @SuppressWarnings({"unchecked"}) 52 | @Inject 53 | public RethinkDBRiver( 54 | RiverName riverName, 55 | RiverSettings settings, 56 | Client client) { 57 | super(riverName, settings); 58 | this.client = client; 59 | 60 | // Get ye settings from the document: 61 | // Expected settings document (angle brackets indicate it stands in for a real name) 62 | // "rethinkdb": { 63 | // "host": , 64 | // "port": , 65 | // "auth_key": , 66 | // "databases": { 67 | // : { 68 | // : { 69 | // "backfill": , 70 | // "index": )>, 71 | // "type": )> 72 | // }, 73 | // ... more tables in 74 | // } 75 | // ... more databases 76 | // } 77 | // } 78 | try { 79 | Map rdbSettings = jsonGet( 80 | settings.settings(), "rethinkdb", new HashMap<>()); 81 | hostname = jsonGet(rdbSettings, "host", "localhost"); 82 | port = jsonGet(rdbSettings, "port", 28015); 83 | authKey = jsonGet(rdbSettings, "auth_key", ""); 84 | Map>> tables = jsonGet( 85 | rdbSettings, "databases", new HashMap<>()); 86 | 87 | // Create thy changeRecords to hold the configuration 88 | changeRecords = new ChangeRecords(tables); 89 | logger.info("ChangeRecords: {}", changeRecords); 90 | threads = new ArrayList(); 91 | closed = false; 92 | }catch(Exception e){ 93 | logger.error("Initializing the RethinkDB River failed. " + 94 | " Is your configuration in the right format?" + e.getMessage()); 95 | throw e; 96 | } 97 | } 98 | 99 | @Override 100 | public void start() { 101 | // Start up all the listening threads 102 | logger.info("Starting up RethinkDB River for {}:{}", hostname, port); 103 | for(ChangeRecord changeRecord : changeRecords.getAll()) { 104 | Thread thread = EsExecutors 105 | .daemonThreadFactory(settings.globalSettings(), "rethinkdb_river") 106 | .newThread(new FeedWorker(this, changeRecord, client)); 107 | thread.start(); 108 | logger.info("Starting feed watcher for {}.{}", changeRecord.db, changeRecord.table); 109 | threads.add(thread); 110 | } 111 | } 112 | 113 | @Override 114 | public void close() { 115 | if(closed){ 116 | return; 117 | } 118 | logger.info("Closing RethinkDB River"); 119 | closed = true; 120 | for(Thread thread: threads){ 121 | thread.interrupt(); 122 | } 123 | 124 | } 125 | 126 | @SuppressWarnings({"unchecked"}) 127 | public static V jsonGet(Map map, String key, V defaultValue){ 128 | return map.containsKey(key) ? (V) map.get(key) : defaultValue; 129 | } 130 | 131 | } 132 | -------------------------------------------------------------------------------- /src/main/java/org/elasticsearch/river/rethinkdb/RethinkDBRiverModule.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Elasticsearch under one or more contributor 3 | * license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright 5 | * ownership. Elasticsearch licenses this file to you under 6 | * the Apache License, Version 2.0 (the "License"); you may 7 | * not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.elasticsearch.river.rethinkdb; 21 | 22 | import org.elasticsearch.common.inject.AbstractModule; 23 | import org.elasticsearch.river.River; 24 | 25 | /** 26 | * 27 | */ 28 | public class RethinkDBRiverModule extends AbstractModule { 29 | 30 | @Override 31 | protected void configure() { 32 | bind(River.class).to(RethinkDBRiver.class).asEagerSingleton(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/proto: -------------------------------------------------------------------------------- 1 | ../../rethink-java-driver/src/main/proto -------------------------------------------------------------------------------- /src/main/resources/es-plugin.properties: -------------------------------------------------------------------------------- 1 | plugin=org.elasticsearch.plugin.river.rethinkdb.RethinkDBRiverPlugin 2 | version=${project.version} 3 | --------------------------------------------------------------------------------