├── .gitignore ├── LICENSE ├── README.md ├── changes.md ├── circle.yml ├── project.clj ├── scripts ├── release.sh └── start_elasticsearch.sh ├── src └── onyx │ └── plugin │ └── elasticsearch.clj └── test └── onyx ├── plugin ├── input_test.clj └── output_test.clj └── util └── helper.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .idea 11 | *.iml 12 | *.log 13 | scripts/release-scripts 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of Washington and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## onyx-elasticsearch 2 | 3 | [Onyx](https://github.com/onyx-platform/onyx) plugin providing query and write for batch processing an ElasticSearch 1.x cluster. For more details on ElasticSearch please read the [official documentation](https://www.elastic.co/guide/index.html). 4 | 5 | **Note on the Version Number Format**: The first three numbers in the version correspond to the latest Onyx platform release. The final digit increments when there are changes to the plugin within an Onyx release. 6 | 7 | ### Installation 8 | 9 | In your project file: 10 | 11 | ```clojure 12 | [org.onyxplatform/onyx-elasticsearch "0.9.15.0"] 13 | ``` 14 | 15 | In your peer boot-up namespace: 16 | 17 | ```clojure 18 | (:require [onyx.plugin.elasticsearch]) 19 | ``` 20 | 21 | ### Functions 22 | 23 | #### read-messages 24 | 25 | Reads documents from an ElasticSearch cluster with a specified query and submits them to the Onyx workflow for processing. 26 | 27 | **Catalog entry**: 28 | 29 | ```clojure 30 | {:onyx/name :read-messages 31 | :onyx/plugin :onyx.plugin.elasticsearch/read-messages 32 | :onyx/type :input 33 | :onyx/medium :elasticsearch 34 | :elasticsearch/host "127.0.0.1" 35 | :elasticsearch/port 9200 36 | :elasticsearch/cluster-name "my-cluster-name" 37 | :elasticsearch/client-type :http 38 | :elasticsearch/http-ops {:basic-auth ["user" "pass"]} 39 | :elasticsearch/index "my-index-name" 40 | :elasticsearch/mapping "my-mapping-name" 41 | :elasticsearch/query {:term {:foo "bar"}} 42 | :elasticsearch/sort {:foo "desc"} 43 | :elasticsearch/restart-on-fail false 44 | :onyx/batch-size batch-size 45 | :onyx/max-peers 1 46 | :onyx/doc "Read documents from an ElasticSearch Query"} 47 | ``` 48 | 49 | **Lifecycle entry**: 50 | 51 | ```clojure 52 | {:lifecycle/task :read-messages 53 | :lifecycle/calls :onyx.plugin.elasticsearch/read-messages-calls} 54 | ``` 55 | 56 | **Attributes** 57 | 58 | | key | type | default | description 59 | |--------------------------------|-----------|-------------|------------- 60 | |`:elasticsearch/host` | `string` | | ElasticSearch Host. Required. 61 | |`:elasticsearch/port` | `number` | | ElasticSearch Port. Required. 62 | |`:elasticsearch/protocol` | `keyword` | `:http` | Protocol to use when connecting to ElasticSearch. Should be either `:http` or `:https`. Only applies when using `:client-type` of `:http`. 63 | |`:elasticsearch/cluster-name` | `string` | | ElasticSearch Cluster Name. Required for native connections. 64 | |`:elasticsearch/client-type` | `keyword` | `:http` | Type of client to create. Should be either `:http` or `:native`. 65 | |`:elasticsearch/http-ops` | `map` | | Additional, optional HTTP Options used by the HTTP Client for connections. Includes any options allowed by the [clj-http library](https://github.com/dakrone/clj-http#usage). 66 | |`:elasticsearch/index` | `string` | | The index to search for the document in. If not provided, all indexes will be searched. 67 | |`:elasticsearch/mapping` | `string` | | The name of the ElasticSearch mapping to search for documents in. If not provided, all mappings will be searched. 68 | |`:elasticsearch/query` | `map` | | A Clojure map with the same structure as an [ElasticSearch JSON Query](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html). If not provided, all documents will be returned. 69 | |`:elasticsearch/sort` | `map` | `"_score"` | A Clojure map with the same structure as an [ElasticSearch JSON Sort](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-sort.html). 70 | |`:elasticsearch/restart-on-fail`| `boolean` | `false` | If `true` the entire query will be run again from scratch in the event of a peer failure. Otherwise, will re-run from last offset. See below section: "Message Guarantees" 71 | 72 | 73 | **Response Segment**: 74 | 75 | If the query does not match any documents, then the `:done` sentinel will be returned signaling that there is no further processing for that query. Otherwise, the response segment will be a clojure map with data and meta-data from ElasticSearch. An example segment resulting from a search for a document `{:foo "bar"}`: 76 | 77 | ```clojure 78 | {:_index "idx-id" 79 | :_type "mapping-type" 80 | :_id "doc-id" 81 | :_score 0.30685282 82 | :_source {:foo "bar"}} 83 | ``` 84 | 85 | **Message Guarantees & Fault Tolerance** 86 | 87 | In general, Onyx offers an "at-least-once" message processing guarantee. This same guarantee should extend to input plugins as well including situations where there is a peer failure processing the input from source. However, because of the nature of ElasticSearch, there are situations where this guarantee is not possible with this plugin if a peer fails and there are updates to messages involved in the query concurrent to the processing job. If there are no concurrent modifications (or a peer does not fail), then the "at-least-once" guarantee stands. 88 | 89 | There is also variation on how this plugin supports a "[happened-before](https://en.wikipedia.org/wiki/Happened-before)" relationship when updates to documents are being made concurrent to workflow execution and a failure occurs. Without failures, the plugin will only process documents that were created/modified prior to the execution of the query regardless of concurrent activity. However, in a failure, the query must be re-run and so there is variation on this guarantee. 90 | 91 | There are two options available to handle fault tolerance using the `:elasticsearch/restart-on-fail` catalog parameter as well as some additional manual steps that can be taken. This section is intended to help the workflow designer decide how to best configure fault tolerance to meet the message processing requirements of their application. 92 | 93 | `:elasticsearch/restart-on-fail = TRUE` 94 | 95 | When `true` the entire query will be restarted from scratch in the event of a failure. This will guarantee that all messages will be processed at least once, however depending on how many messages processed before the peer failure, there could be a large number of duplicate messages processed from the query. For concurrent creates and updates all messages processed prior to the last restarted query will be seen. The exception is concurrent deletes, which can cause non-deterministic results if a message is deleted after the original processing, but before a restart query. 96 | 97 | This setting is recommended when the query does not return a prohibitively large number of results and every message needs to be processed (assuming, of course, that processing is idempotent, which should be the case anyway in an "at-least-once" system). 98 | 99 | Additionally, the user can leverage the [Onyx State Management](https://onyx-platform.gitbooks.io/onyx/content/doc/user-guide/aggregation-state-management.html) functionality to only process messages once. This can improve the message processing guarantee to "exactly-once" and a "happened-before" the original query message relationship (the exception being deletes). 100 | 101 | `:elasticsearch/restart-on-fail = FALSE` 102 | 103 | When `false` the query will be restarted from the last acked offset. This can provide significant savings when the query is large since, in the event of a failure, the already processed messages do not get re-triggered. If there are no concurrent modifications to documents in the query or only creates and updates are being processed, then this will guarantee at least once message processing. When there are concurrent deletes, the results are non-deterministic since messages removed before the current offset can affect the order on re-queries. In all cases of concurrent modification, happened-before is non-deterministic with this setting. 104 | 105 | This setting is recommended when the query is large and/or there are either no concurrent modifications or precise processing isn't necessary (E.G.: calculating statistics on a large data set may have tolerance for a couple of lost messages in the event of a failure). 106 | 107 | Additionally, the user can add an ascending [ElasticSearch sort criteria](https://www.elastic.co/guide/en/elasticsearch/reference/master/search-request-sort.html) on a field that increments per message such as a timestamp or incrementing counter so that any new messages created will be returned last in a query response providing a "happened-before" the original query relationship when the only concurrent operation is creates. 108 | 109 | 110 | #### write-messages 111 | 112 | Creates, Updates, or Deletes documents from an ElasticSearch cluster. 113 | 114 | **Catalog entry**: 115 | 116 | ```clojure 117 | {:onyx/name :write-messages 118 | :onyx/plugin :onyx.plugin.elasticsearch/write-messages 119 | :onyx/type :output 120 | :onyx/medium :elasticsearch 121 | :elasticsearch/host "127.0.0.1" 122 | :elasticsearch/port 9200 123 | :elasticsearch/cluster-name "my-cluster-name" 124 | :elasticsearch/client-type :http 125 | :elasticsearch/http-ops {:basic-auth ["user" "pass"]} 126 | :elasticsearch/index "my-index-name" 127 | :elasticsearch/mapping "my-mapping-name" 128 | :elasticsearch/doc-id "my-id" 129 | :elasticsearch/write-type :insert 130 | :onyx/batch-size batch-size 131 | :onyx/doc "Writes documents to elasticsearch"} 132 | ``` 133 | 134 | **Lifecycle entry**: 135 | 136 | ```clojure 137 | [{:lifecycle/task :write-messages 138 | :lifecycle/calls :onyx.plugin.elasticsearch/write-messages-calls}] 139 | ``` 140 | 141 | Segments supplied to a write-messages task should be a Clojure map and can take one of two forms: 142 | * Map containing the message in the following form: `{:elasticsearch/message message-body}` where `message-body` is a Clojure map representing the document to send to ElasticSearch. The map can also contain the following optional attributes (defined in detail in the table below), which overwrite those specified in the catalog: 143 | * `:elasticsearch/index` 144 | * `:elasticsearch/mapping` 145 | * `:elasticsearch/doc-id` 146 | * `:elasticsearch/write-type` 147 | * If the map does NOT contain an `:elasticsearch/message` key, then the entire input segment will be treated as the document for ElasticSearch and the default settings from the catalog will be used. 148 | 149 | **Attributes** 150 | 151 | | key | type | default | description 152 | |-----------------------------|-----------|-------------|------------- 153 | |`:elasticsearch/host` | `string` | | ElasticSearch Host. Required. 154 | |`:elasticsearch/port` | `number` | | ElasticSearch Port. Required. 155 | |`:elasticsearch/cluster-name`| `string` | | ElasticSearch Cluster Name. Required for native connections. 156 | |`:elasticsearch/client-type` | `keyword` |`:http` | Type of client to create. Should be either `:http` or `:native`. 157 | |`:elasticsearch/http-ops` | `map` | | Additional, optional HTTP Options used by the HTTP Client for connections. Includes any options allowed by the [clj-http library](https://github.com/dakrone/clj-http#usage). 158 | |`:elasticsearch/index` | `string` | | The index to store the document in. Required in either Catalog or Segment. 159 | |`:elasticsearch/mapping` | `string` |`"_default_"`| The name of the ElasticSearch mapping to use. 160 | |`:elasticsearch/doc-id` | `string` | | Unique id of the document. Required only for delete, otherwise ElasticSearch will generate a unique id if not supplied for insert/upsert operations. 161 | |`:elasticsearch/write-type` | `keyword` |`:insert` | Type of write to perform. Should be one of `:insert`, `:upsert`, `:delete`. The difference between `:insert` and `:upsert` is that `:insert` will fail if document already exists, while `:upsert` will update the version in ElasticSearch with the submitted document. 162 | 163 | ### Acknowledgements 164 | 165 | This plugin leverages the [clojurewerkz/elastisch](https://github.com/clojurewerkz/elastisch) library for all ElasticSearch communication. 166 | 167 | ### Contributing 168 | 169 | Pull requests into the master branch are welcomed. 170 | 171 | ### License 172 | 173 | Copyright © 2015 Matt Anderson 174 | 175 | Distributed under the Eclipse Public License, the same as Clojure. 176 | -------------------------------------------------------------------------------- /changes.md: -------------------------------------------------------------------------------- 1 | #### 0.9.9.1 2 | 3 | - Added support for `:elasticsearch/protocol` configuration to specify either 4 | `:http` or `:https:`. 5 | 6 | #### 0.8.3.0 7 | 8 | - Added project to automatic release infrastructure. 9 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | override: 3 | - echo '{:user {:plugins [[lein-voom "0.1.0-20150822_000839-g763d315"]]}}' > ~/.lein/profiles.clj 4 | - lein voom build-deps 5 | - docker run -d -p 9200:9200 -p 9300:9300 elasticsearch:1.7.5 6 | 7 | test: 8 | override: 9 | - lein with-profile dev,circle-ci test: 10 | timeout: 480 11 | 12 | machine: 13 | java: 14 | version: oraclejdk8 15 | services: 16 | - docker 17 | 18 | notify: 19 | webhooks: 20 | - url: https://webhooks.gitter.im/e/7f6cadb429def50c94a2 21 | 22 | deployment: 23 | update-projects: 24 | branch: master 25 | commands: 26 | - lein deploy 27 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject org.onyxplatform/onyx-elasticsearch "0.9.15.0" 2 | :description "Onyx plugin for Elasticsearch" 3 | :url "https://github.com/onyx-platform/onyx-elasticsearch" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :repositories {"snapshots" {:url "https://clojars.org/repo" 7 | :username :env 8 | :password :env 9 | :sign-releases false} 10 | "releases" {:url "https://clojars.org/repo" 11 | :username :env 12 | :password :env 13 | :sign-releases false}} 14 | :dependencies [[org.clojure/clojure "1.7.0"] 15 | ^{:voom {:repo "git@github.com:onyx-platform/onyx.git" :branch "master"}} 16 | [org.onyxplatform/onyx "0.9.15"] 17 | [clojurewerkz/elastisch "2.2.0"]] 18 | :profiles {:dev {:dependencies [[http-kit "2.1.19"] 19 | [org.clojure/data.json "0.2.6"]] 20 | :plugins [[lein-set-version "0.4.1"] 21 | [lein-update-dependency "0.1.2"] 22 | [lein-pprint "1.1.1"]]} 23 | :circle-ci {:jvm-opts ["-Xmx4g"]}}) 24 | -------------------------------------------------------------------------------- /scripts/release.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | set -o nounset 6 | set -o xtrace 7 | 8 | # Make sure we're in onyx-kafka's directory first 9 | cd "$(dirname "$0")/.." 10 | 11 | REPO_SRC="https://github.com/onyx-platform/onyx-release-scripts.git" 12 | LOCAL_REPO="scripts/release-scripts" 13 | LOCAL_REPO_VC_DIR=$LOCAL_REPO/.git 14 | 15 | pushd . 16 | 17 | if [ ! -d $LOCAL_REPO_VC_DIR ] 18 | then 19 | git clone $REPO_SRC $LOCAL_REPO 20 | else 21 | cd $LOCAL_REPO 22 | git pull $REPO_SRC 23 | popd 24 | fi 25 | 26 | bash "$LOCAL_REPO/release_plugin.sh" "$@" 27 | -------------------------------------------------------------------------------- /scripts/start_elasticsearch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | docker run -d -p 9200:9200 -p 9300:9300 elasticsearch:1.7.5 3 | -------------------------------------------------------------------------------- /src/onyx/plugin/elasticsearch.clj: -------------------------------------------------------------------------------- 1 | (ns onyx.plugin.elasticsearch 2 | (:require [onyx.peer.function :as function] 3 | [onyx.extensions :as extensions] 4 | [onyx.peer.pipeline-extensions :as p-ext] 5 | [onyx.static.default-vals :refer [default-vals arg-or-default]] 6 | [onyx.types :as t] 7 | [clojure.core.async :refer [chan go timeout !! alts!! sliding-buffer go-loop close! poll! offer!]] 8 | [clojurewerkz.elastisch.native :as es] 9 | [clojurewerkz.elastisch.rest :as esr] 10 | [clojurewerkz.elastisch.native.document] 11 | [clojurewerkz.elastisch.rest.document] 12 | [taoensso.timbre :as log])) 13 | 14 | (defn- contains-some? 15 | [col & keys] 16 | (some true? (map #(contains? col %) keys))) 17 | 18 | (defn- create-es-client 19 | [client-type protocol host port cluster-name http-ops] 20 | (if 21 | (= client-type :http) 22 | (esr/connect (str (name protocol) "://" host ":" port) http-ops) 23 | (es/connect [[host port]] {"cluster.name" cluster-name}))) 24 | 25 | (defn- run-as 26 | [type op & args] 27 | (let [nsp (if (= type :native) "clojurewerkz.elastisch.native.document/" "clojurewerkz.elastisch.rest.document/")] 28 | (apply (resolve (symbol (str nsp (name op)))) (flatten args)))) 29 | 30 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 31 | ;; Reader 32 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 33 | 34 | (defn- query-es 35 | [client-type conn index mapping query sort start-index scroll] 36 | (let [query-list (if query [:query query] []) 37 | sort-list (if sort [:sort sort] [])] 38 | (-> 39 | (cond 40 | (and index mapping) (run-as client-type :search conn index mapping query-list sort-list :from start-index :scroll scroll) 41 | (not (nil? index)) (run-as client-type :search-all-types conn index query-list sort-list :from start-index :scroll scroll) 42 | :else (run-as client-type :search-all-indexes-and-types conn query-list sort-list :from start-index :scroll scroll))))) 43 | 44 | (defn- start-commit-loop! [write-chunk? commit-ch log k] 45 | (go-loop [] 46 | (when-let [content (!! read-ch (t/input (java.util.UUID/randomUUID) :done)) 88 | {}) 89 | (do 90 | (log/info (str "Creating ElasticSearch " client-type " client for " host ":" port)) 91 | (let [_ (start-commit-loop! (not restart-on-fail) commit-ch log job-task-id) 92 | conn (create-es-client client-type protocol host port cluster-name http-ops) 93 | start-index (:chunk-index content) 94 | scroll-time "1m" 95 | res (query-es client-type conn index mapping query sort (inc start-index) scroll-time)] 96 | (loop [rs (run-as client-type :scroll-seq conn res) 97 | chunk-idx (inc start-index)] 98 | (when-let [msg (first rs)] 99 | (when-not (offer! read-ch (assoc (t/input (java.util.UUID/randomUUID) msg) :chunk-index chunk-idx)) 100 | (throw (ex-info "Error placing message onto read-ch, which has static size. To be fixed in a future release: https://github.com/onyx-platform/onyx-elasticsearch/issues/1" {}))) 101 | (recur (next rs) (inc chunk-idx)))) 102 | (>!! read-ch (t/input (java.util.UUID/randomUUID) :done)) 103 | {:elasticsearch/connection conn 104 | :elasticsearch/read-ch read-ch 105 | :elasticsearch/retry-ch retry-ch 106 | :elasticsearch/commit-ch commit-ch 107 | :elasticsearch/doc-defaults {:elasticsearch/index index 108 | :elasticsearch/mapping mapping 109 | :elasticsearch/query query 110 | :elasticsearch/client-type client-type}}))))) 111 | 112 | (defn close-read-resources 113 | [{:keys [elasticsearch/producer-ch elasticsearch/commit-ch elasticsearch/read-ch elasticsearch/retry-ch] :as event} lifecycle] 114 | (close! read-ch) 115 | (close! retry-ch) 116 | (while (poll! read-ch)) 117 | (while (poll! retry-ch)) 118 | (close! commit-ch) 119 | {}) 120 | 121 | (def read-messages-calls 122 | {:lifecycle/before-task-start inject-reader 123 | :lifecycle/after-task-stop close-read-resources}) 124 | 125 | (defn- highest-acked-chunk [starting-index max-index pending-chunk-indices] 126 | (loop [max-pending starting-index] 127 | (if (or (pending-chunk-indices (inc max-pending)) 128 | (= max-index max-pending)) 129 | max-pending 130 | (recur (inc max-pending))))) 131 | 132 | (defn- all-done? [messages] 133 | (empty? (remove #(= :done (:message %)) 134 | messages))) 135 | 136 | (defrecord ElasticsearchRead [max-pending batch-size batch-timeout pending-messages drained? 137 | top-chunk-index top-acked-chunk-index pending-chunk-indices 138 | read-ch retry-ch commit-ch] 139 | p-ext/Pipeline 140 | (write-batch 141 | [_ event] 142 | (function/write-batch event)) 143 | 144 | (read-batch [_ _] 145 | (let [pending (count (keys @pending-messages)) 146 | max-segments (min (- max-pending pending) batch-size) 147 | timeout-ch (timeout batch-timeout) 148 | batch (if (zero? max-segments) 149 | (> (range max-segments) 151 | (keep (fn [_] 152 | (let [[result ch] (alts!! [retry-ch read-ch timeout-ch] :priority true)] 153 | result)))))] 154 | (doseq [m batch] 155 | (when-let [chunk-index (:chunk-index m)] 156 | (swap! top-chunk-index max chunk-index) 157 | (swap! pending-chunk-indices conj chunk-index)) 158 | (swap! pending-messages assoc (:id m) m)) 159 | (when (and (all-done? (vals @pending-messages)) 160 | (all-done? batch) 161 | (zero? (count (.buf read-ch))) 162 | (zero? (count (.buf retry-ch))) 163 | (or (not (empty? @pending-messages)) 164 | (not (empty? batch)))) 165 | (>!! commit-ch {:status :complete}) 166 | (reset! drained? true)) 167 | {:onyx.core/batch batch})) 168 | 169 | (seal-resource [_ _]) 170 | 171 | p-ext/PipelineInput 172 | (ack-segment [_ _ segment-id] 173 | (let [chunk-index (:chunk-index (@pending-messages segment-id))] 174 | (swap! pending-chunk-indices disj chunk-index) 175 | (let [new-top-acked (highest-acked-chunk @top-acked-chunk-index @top-chunk-index @pending-chunk-indices)] 176 | (>!! commit-ch {:chunk-index new-top-acked :status :incomplete}) 177 | (reset! top-acked-chunk-index new-top-acked)) 178 | (swap! pending-messages dissoc segment-id))) 179 | 180 | (retry-segment 181 | [_ _ segment-id] 182 | (when-let [msg (get @pending-messages segment-id)] 183 | (swap! pending-messages dissoc segment-id) 184 | (>!! retry-ch (t/input (java.util.UUID/randomUUID) 185 | (:message msg))))) 186 | 187 | (pending? 188 | [_ _ segment-id] 189 | (get @pending-messages segment-id)) 190 | 191 | (drained? 192 | [_ _] 193 | @drained?)) 194 | 195 | (defn read-messages 196 | [event] 197 | (let [task-map (:onyx.core/task-map event) 198 | max-pending (arg-or-default :onyx/max-pending task-map) 199 | batch-size (:onyx/batch-size task-map) 200 | batch-timeout (arg-or-default :onyx/batch-timeout task-map) 201 | pending-messages (atom {}) 202 | drained? (atom false) 203 | read-ch (chan 10000) 204 | retry-ch (chan (* 2 max-pending)) 205 | commit-ch (chan (sliding-buffer 1))] 206 | (->ElasticsearchRead max-pending batch-size batch-timeout pending-messages drained? 207 | (atom -1) (atom -1) (atom #{}) read-ch retry-ch commit-ch))) 208 | 209 | 210 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 211 | ;; Writer 212 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 213 | 214 | (defn- write-elasticsearch [cxn doc settings] 215 | (let [client-type (:elasticsearch/client-type settings) 216 | index (:elasticsearch/index settings) 217 | mapping (:elasticsearch/mapping settings) 218 | doc-id (:elasticsearch/doc-id settings) 219 | write-type (if doc-id 220 | (:elasticsearch/write-type settings) 221 | (keyword (str (name (:elasticsearch/write-type settings)) "-noid")))] 222 | (case write-type 223 | :insert (run-as client-type :create cxn index mapping doc :id doc-id) 224 | :insert-noid (run-as client-type :create cxn index mapping doc) 225 | :upsert (run-as client-type :put cxn index mapping doc-id doc) 226 | :upsert-noid (run-as client-type :create cxn index mapping doc) 227 | :delete (run-as client-type :delete cxn index mapping doc-id) 228 | :default (throw (Exception. (str "Invalid write type: " write-type)))))) 229 | 230 | (defn inject-writer 231 | [{{protocol :elasticsearch/protocol 232 | host :elasticsearch/host 233 | port :elasticsearch/port 234 | cluster-name :elasticsearch/cluster-name 235 | http-ops :elasticsearch/http-ops 236 | client-type :elasticsearch/client-type 237 | index :elasticsearch/index 238 | doc-id :elasticsearch/doc-id 239 | mapping :elasticsearch/mapping 240 | write-type :elasticsearch/write-type 241 | :or {http-ops {} 242 | protocol :http 243 | client-type :http 244 | mapping "_default_" 245 | write-type :insert}} :onyx.core/task-map} _] 246 | {:pre [(not (empty? host)) 247 | (and (number? port) (< 0 port 65536)) 248 | (some #{client-type} [:http :native]) 249 | (or (= client-type :http) (not (empty? cluster-name))) 250 | (some #{write-type} [:insert :upsert :delete]) 251 | (or (not= write-type :delete) (not (empty? doc-id)))]} 252 | (log/info (str "Creating ElasticSearch " client-type " client for " host ":" port)) 253 | {:elasticsearch/connection (create-es-client client-type protocol host port cluster-name http-ops) 254 | :elasticsearch/doc-defaults {:elasticsearch/index index 255 | :elasticsearch/doc-id doc-id 256 | :elasticsearch/mapping mapping 257 | :elasticsearch/write-type write-type 258 | :elasticsearch/client-type client-type}}) 259 | 260 | (def write-messages-calls 261 | {:lifecycle/before-task-start inject-writer}) 262 | 263 | (defrecord ElasticsearchWrite [] 264 | p-ext/Pipeline 265 | (read-batch 266 | [_ event] 267 | (function/read-batch event)) 268 | 269 | (write-batch 270 | [_ {results :onyx.core/results 271 | connection :elasticsearch/connection 272 | default-vals :elasticsearch/doc-defaults}] 273 | (doseq [msg (mapcat :leaves (:tree results))] 274 | (let [document (or (:elasticsearch/message (:message msg)) (:message msg)) 275 | settings (if 276 | (or (= :delete (:elasticsearch/write-type default-vals)) 277 | (contains-some? (:message msg) :elasticsearch/message :elasticsearch/write-type)) 278 | (merge default-vals (select-keys 279 | (:message msg) [:elasticsearch/index 280 | :elasticsearch/doc-id 281 | :elasticsearch/mapping 282 | :elasticsearch/write-type])) 283 | default-vals)] 284 | (log/debug (str "Message Settings: " settings)) 285 | (write-elasticsearch connection document settings))) 286 | {}) 287 | 288 | (seal-resource 289 | [_ _] 290 | {})) 291 | 292 | (defn write-messages [_] 293 | (->ElasticsearchWrite)) 294 | -------------------------------------------------------------------------------- /test/onyx/plugin/input_test.clj: -------------------------------------------------------------------------------- 1 | (ns onyx.plugin.input-test 2 | (:require [clojure.core.async :refer [chan >!! result first :_source :foo))))))) 160 | 161 | 162 | (testing "Successful Query for Native with Query, Map, and Index defined" 163 | (with-test-env [test-env [2 env-config peer-config]] 164 | (reset! out-chan (chan (sliding-buffer (inc n-messages)))) 165 | (let [job {:catalog catalog-native-q&map&idx 166 | :workflow workflow 167 | :lifecycles lifecycles 168 | :task-scheduler :onyx.task-scheduler/balanced} 169 | {:keys [job-id]} (onyx.api/submit-job peer-config job)] 170 | (onyx.api/await-job-completion peer-config job-id) 171 | (let [result (take-segments! @out-chan 5000)] 172 | (is (= "bar" (-> result first :_source :foo))))))) 173 | 174 | (testing "Successful Query for Native with Query defined" 175 | (with-test-env [test-env [2 env-config peer-config]] 176 | (reset! out-chan (chan (sliding-buffer (inc n-messages)))) 177 | (let [job {:catalog catalog-native-q&all 178 | :workflow workflow 179 | :lifecycles lifecycles 180 | :task-scheduler :onyx.task-scheduler/balanced} 181 | {:keys [job-id]} (onyx.api/submit-job peer-config job)] 182 | (onyx.api/await-job-completion peer-config job-id) 183 | (let [result (take-segments! @out-chan 5000)] 184 | (is (= "bar" (-> result first :_source :foo))))))) 185 | 186 | (testing "Successful Query for Native for all" 187 | (with-test-env [test-env [2 env-config peer-config]] 188 | (reset! out-chan (chan (sliding-buffer (inc n-messages)))) 189 | (let [job {:catalog catalog-native-idx 190 | :workflow workflow 191 | :lifecycles lifecycles 192 | :task-scheduler :onyx.task-scheduler/balanced} 193 | {:keys [job-id]} (onyx.api/submit-job peer-config job)] 194 | (onyx.api/await-job-completion peer-config job-id) 195 | (let [result (take-segments! @out-chan 5000)] 196 | (is (= "bar" (-> result first :_source :foo)))))))) 197 | 198 | (deftest http-test 199 | (def conn (u/connect-rest-client)) 200 | (esrd/create conn (.toString id) "_default_" {:foo "bar"}) 201 | (Thread/sleep 5000) 202 | 203 | (testing "Successful Query for HTTP with Query, Map, and Index defined" 204 | (with-test-env [test-env [2 env-config peer-config]] 205 | (reset! out-chan (chan (sliding-buffer (inc n-messages)))) 206 | (let [job {:catalog catalog-http-q&map&idx 207 | :workflow workflow 208 | :lifecycles lifecycles 209 | :task-scheduler :onyx.task-scheduler/balanced} 210 | {:keys [job-id]} (onyx.api/submit-job peer-config job)] 211 | (onyx.api/await-job-completion peer-config job-id) 212 | (let [result (take-segments! @out-chan 5000)] 213 | (is (= "bar" (-> result first :_source :foo))))))) 214 | 215 | (testing "Successful Query for HTTP with Query and Index defined" 216 | (with-test-env [test-env [2 env-config peer-config]] 217 | (reset! out-chan (chan (sliding-buffer (inc n-messages)))) 218 | (let [job {:catalog catalog-http-q&idx 219 | :workflow workflow 220 | :lifecycles lifecycles 221 | :task-scheduler :onyx.task-scheduler/balanced} 222 | {:keys [job-id]} (onyx.api/submit-job peer-config job)] 223 | (onyx.api/await-job-completion peer-config job-id) 224 | (let [result (take-segments! @out-chan 5000)] 225 | (is (= "bar" (-> result first :_source :foo))))))) 226 | 227 | (testing "Successful Query for HTTP with Query defined" 228 | (with-test-env [test-env [2 env-config peer-config]] 229 | (reset! out-chan (chan (sliding-buffer (inc n-messages)))) 230 | (let [job {:catalog catalog-http-q&all 231 | :workflow workflow 232 | :lifecycles lifecycles 233 | :task-scheduler :onyx.task-scheduler/balanced} 234 | {:keys [job-id]} (onyx.api/submit-job peer-config job)] 235 | (onyx.api/await-job-completion peer-config job-id) 236 | (let [result (take-segments! @out-chan 5000)] 237 | (is (= "bar" (-> result first :_source :foo))))))) 238 | 239 | (testing "Successful Query for HTTP for all" 240 | (with-test-env [test-env [2 env-config peer-config]] 241 | (reset! out-chan (chan (sliding-buffer (inc n-messages)))) 242 | (let [job {:catalog catalog-http-idx 243 | :workflow workflow 244 | :lifecycles lifecycles 245 | :task-scheduler :onyx.task-scheduler/balanced} 246 | {:keys [job-id]} (onyx.api/submit-job peer-config job)] 247 | (onyx.api/await-job-completion peer-config job-id) 248 | (let [result (take-segments! @out-chan 5000)] 249 | (is (= "bar" (-> result first :_source :foo)))))))) 250 | 251 | (deftest fault-logic 252 | (u/delete-indexes (.toString id)) 253 | (doseq [n (range n-messages)] 254 | (esrd/create conn (.toString id) "_default_" {:foo "bar"} :id (str n))) 255 | (Thread/sleep 5000) 256 | 257 | (testing "Successfully processed all messages no failure" 258 | (with-test-env [test-env [2 env-config peer-config]] 259 | (reset! out-chan (chan (sliding-buffer (inc n-messages)))) 260 | (let [job {:catalog catalog-http-q&map&idx 261 | :workflow workflow 262 | :lifecycles lifecycles 263 | :task-scheduler :onyx.task-scheduler/balanced} 264 | {:keys [job-id]} (onyx.api/submit-job peer-config job)] 265 | (onyx.api/await-job-completion peer-config job-id) 266 | (let [result (take-segments! @out-chan 5000) 267 | task-chunk-offset (extensions/read-chunk (:log (:env test-env)) :chunk (str job-id "#" :read-messages))] 268 | (is (= 11 (count result))) 269 | (is (= :complete (:status task-chunk-offset)))))) 270 | 271 | (with-test-env [test-env [2 env-config peer-config]] 272 | (reset! out-chan (chan (sliding-buffer (inc n-messages)))) 273 | (let [job {:catalog (update-in catalog-http-q&map&idx [0] assoc :elasticsearch/restart-on-fail true) 274 | :workflow workflow 275 | :lifecycles lifecycles 276 | :task-scheduler :onyx.task-scheduler/balanced} 277 | {:keys [job-id]} (onyx.api/submit-job peer-config job)] 278 | (onyx.api/await-job-completion peer-config job-id) 279 | (let [task-chunk-restart (extensions/read-chunk (:log (:env test-env)) :chunk (str job-id "#" :read-messages))] 280 | (is (= -1 (:chunk-index task-chunk-restart)))))) 281 | 282 | (with-test-env [test-env [2 env-config peer-config]] 283 | (reset! out-chan (chan (sliding-buffer (inc n-messages)))) 284 | (let [job {:catalog catalog-http-q&map&idx 285 | :workflow workflow 286 | :lifecycles lifecycles-fail 287 | :task-scheduler :onyx.task-scheduler/balanced} 288 | {:keys [job-id]} (onyx.api/submit-job peer-config job)] 289 | (onyx.api/await-job-completion peer-config job-id) 290 | (let [result (take-segments! @out-chan 5000)] 291 | (is (= 11 (count result)))))))) 292 | -------------------------------------------------------------------------------- /test/onyx/plugin/output_test.clj: -------------------------------------------------------------------------------- 1 | (ns onyx.plugin.output-test 2 | (:require [clojure.core.async :refer [chan >!! !! ch seg)) 123 | (>!! ch :done) 124 | (let [job-info (onyx.api/submit-job 125 | peer-config 126 | {:catalog catalog 127 | :workflow workflow 128 | :lifecycles lc 129 | :task-scheduler :onyx.task-scheduler/balanced})] 130 | (info (str "Awaiting job completion for " name)) 131 | (onyx.api/await-job-completion peer-config (:job-id job-info)))) 132 | 133 | (run-job 134 | "HTTP Client Job with Explicit Write Type" 135 | in-chan-http 136 | lifecycles-http 137 | catalog-http&write 138 | {:name "http:insert_plain-msg_noid" :index "one"} 139 | {:elasticsearch/message {:name "http:insert_detail-msg_id"} :elasticsearch/doc-id "1"} 140 | {:elasticsearch/message {:name "http:insert_detail-msg_id" :new "new"} :elasticsearch/doc-id "1" :elasticsearch/write-type :upsert} 141 | {:elasticsearch/message {:name "http:upsert_detail-msg_id"} :elasticsearch/doc-id "2" :elasticsearch/write-type :upsert} 142 | {:elasticsearch/message {:name "http:upsert_detail-msg_noid" :index "two"} :elasticsearch/write-type :upsert} 143 | {:elasticsearch/message {:name "http:insert-to-be-deleted"} :elasticsearch/doc-id "3"} 144 | {:elasticsearch/doc-id "3" :elasticsearch/write-type :delete}) 145 | 146 | (run-job 147 | "Native Client Job with No Default Write Type" 148 | in-chan-native 149 | lifecycles-native 150 | catalog-native-no-write 151 | {:elasticsearch/message {:name "native:insert_detail-msg_id"} :elasticsearch/doc-id "4" :elasticsearch/write-type :insert} 152 | {:elasticsearch/message {:name "native:insert_detail-msg_id" :new "new"} :elasticsearch/doc-id "4" :elasticsearch/write-type :upsert} 153 | {:elasticsearch/message {:name "native:upsert_detail-msg_id"} :elasticsearch/doc-id "5" :elasticsearch/write-type :upsert} 154 | {:elasticsearch/message {:name "native:insert-to-be-deleted"} :elasticsearch/doc-id "6" :elasticsearch/write-type :insert} 155 | {:elasticsearch/doc-id "6" :elasticsearch/write-type :delete}) 156 | 157 | ;; Give ElasticSearch time to Update 158 | (Thread/sleep 7000) 159 | 160 | (doseq [v-peer v-peers] 161 | (onyx.api/shutdown-peer v-peer)) 162 | 163 | (onyx.api/shutdown-peer-group peer-group) 164 | 165 | (onyx.api/shutdown-env env) 166 | 167 | (use-fixtures 168 | :once (fn [f] 169 | (f) 170 | (u/delete-indexes (.toString id)))) 171 | 172 | (let [conn (u/connect-rest-client)] 173 | 174 | (deftest check-http&write-job 175 | (testing "Insert: plain message with no id defined" 176 | (let [res (esrd/search conn id "_default_" :query (q/match :index "one"))] 177 | (is (= 1 (esrsp/total-hits res))) 178 | (is (not-empty (first (esrsp/ids-from res)))))) 179 | (let [res (esrd/search conn id "_default_" :query (q/term :_id "1"))] 180 | (testing "Insert: detail message with id defined" 181 | (is (= 1 (esrsp/total-hits res)))) 182 | (testing "Update: detail message with id defined" 183 | (is (= "new" (-> (esrsp/hits-from res) first :_source :new))))) 184 | (testing "Upsert: detail message with id defined" 185 | (let [res (esrd/search conn id "_default_" :query (q/term :_id "2"))] 186 | (is (= 1 (esrsp/total-hits res))))) 187 | (testing "Upsert: detail message with no id defined" 188 | (let [res (esrd/search conn id "_default_" :query (q/match :index "two"))] 189 | (is (= 1 (esrsp/total-hits res))) 190 | (is (not-empty (first (esrsp/ids-from res)))))) 191 | (testing "Delete: detail defined" 192 | (let [res (esrd/search conn id "_default_" :query (q/term :_id "3"))] 193 | (is (= 0 (esrsp/total-hits res)))))) 194 | 195 | (deftest check-native-no-write-job 196 | (let [res (esrd/search conn id "_default_" :query (q/term :_id "4"))] 197 | (testing "Insert: detail message with id defined" 198 | (is (= 1 (esrsp/total-hits res)))) 199 | (testing "Update: detail message with id defined" 200 | (is (= "new" (-> (esrsp/hits-from res) first :_source :new))))) 201 | (testing "Upsert: detail message with id defined" 202 | (let [res (esrd/search conn id "_default_" :query (q/term :_id "5"))] 203 | (is (= 1 (esrsp/total-hits res))))) 204 | (testing "Delete: detail defined" 205 | (let [res (esrd/search conn id "_default_" :query (q/term :_id "6"))] 206 | (is (= 0 (esrsp/total-hits res))))))) 207 | -------------------------------------------------------------------------------- /test/onyx/util/helper.clj: -------------------------------------------------------------------------------- 1 | (ns onyx.util.helper 2 | (:require [clojure.data.json :as json] 3 | [org.httpkit.client :as http] 4 | [clojurewerkz.elastisch.rest :as es] 5 | [clojurewerkz.elastisch.rest.index :as idx])) 6 | 7 | (defn es-cluster-name 8 | "Returns the name of an ElasticSearch cluster, default returns local cluster name" 9 | ([] 10 | (es-cluster-name "127.0.0.1" 9200)) 11 | ([host port] 12 | (let [{:keys [body error]} @(http/get (str "http://" host ":" port) {:timeout 5000})] 13 | (if error 14 | (throw (Exception. "Failed to connect to ElasticSearch cluster. Please ensure it is runnning locally prior to running tests")) 15 | (get (json/read-str body) "cluster_name"))))) 16 | 17 | (defn connect-rest-client 18 | "Returns a connection to Elastic Search for the http client" 19 | ([] 20 | (connect-rest-client "127.0.0.1" 9200)) 21 | ([host port] 22 | (es/connect (str "http://" host ":" port)))) 23 | 24 | (defn delete-indexes 25 | "Deletes the specified index. If no index is provided, will delete all indexes. 26 | Optionally can specify the cluster host and port. If not, will default to local. 27 | Used to clean up after testing." 28 | ([] 29 | (delete-indexes "127.0.0.1" 9200)) 30 | ([idx] 31 | (delete-indexes "127.0.0.1" 9200 idx)) 32 | ([host port] 33 | (let [conn (connect-rest-client host port)] 34 | (idx/delete conn))) 35 | ([host port idx] 36 | (let [conn (connect-rest-client host port)] 37 | (idx/delete conn idx)))) --------------------------------------------------------------------------------