├── .gitignore ├── LICENSE ├── README.md ├── example └── vectorscoring.sh ├── pom.xml └── src ├── main ├── assemblies │ └── plugin.xml ├── java │ └── com │ │ └── github │ │ └── mlnick │ │ └── elasticsearch │ │ ├── plugin │ │ └── VectorScoringPlugin.java │ │ └── script │ │ └── PayloadVectorScoreScript.java └── resources │ └── plugin-descriptor.properties └── test └── resources └── log4j.properties /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | .idea/ 3 | *.iml 4 | 5 | *.class 6 | 7 | # Mobile Tools for Java (J2ME) 8 | .mtj.tmp/ 9 | 10 | # Package Files # 11 | *.jar 12 | *.war 13 | *.ear 14 | 15 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 16 | hs_err_pid* 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vector Scoring Plugin for Elasticsearch 2 | 3 | **Note:** most of the functionality of this plugin is now supported natively in ES: https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html 4 | 5 | Also see https://www.elastic.co/blog/text-similarity-search-with-vectors-in-elasticsearch. 6 | 7 | **This plugin is no longer maintained** 8 | 9 | ---- 10 | 11 | 12 | This plugin allows you to score documents based on arbitrary raw vectors, 13 | using dot product or cosine similarity. 14 | 15 | ### Releases 16 | 17 | Master branch targets Elasticsearch 5.4. **Note that version 5.5+ is _not_ supported as Elasticsearch changed their plugin mechanism**. An update for 5.5+ will be developed soon (PRs welcome). 18 | 19 | [Branch es-2.4](https://github.com/MLnick/elasticsearch-vector-scoring/tree/es-2.4) targets Elasticsearch 2.4.x 20 | 21 | ## Overview 22 | 23 | The aim of this plugin is to enable real-time scoring of vector-based 24 | models, in particular factor-based recommendation models. 25 | 26 | In this case, user and item factor vectors are indexed using 27 | the [Delimited Payload Token Filter](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-delimited-payload-tokenfilter.html), 28 | e.g. the vector `[1.2, 0.1, 0.4, -0.2, 0.3]` is indexed as a string: 29 | `0|1.2 1|0.1 2|0.4 3|-0.2 4|0.3`. 30 | 31 | This stores the vector indices as "terms" and the vector values as 32 | "payloads". 33 | 34 | ## Scoring 35 | 36 | This plugin provides a native script `payload_vector_score` for use 37 | in `function_score` queries. 38 | 39 | The script computes the dot product between the query vector and the 40 | document vector. In pseudo-code: 41 | 42 | ```java 43 | for (i : vector_indices_terms) { 44 | payload = indexTermField(i).getPayload() 45 | score += payload * queryVector(i) 46 | } 47 | ``` 48 | 49 | ## Plugin installation 50 | 51 | Targets Elasticsearch `5.4.0` and Java `1.8`. 52 | 53 | ### Simple installation 54 | 55 | `ELASTIC_HOME/bin/elasticsearch-plugin install https://github.com/MLnick/elasticsearch-vector-scoring/releases/download/v5.4.0/elasticsearch-vector-scoring-5.4.0.zip` 56 | 57 | ### Build from source 58 | 59 | 1. Build: `mvn package` 60 | 2. Install plugin in Elasticsearch: `ELASTIC_HOME/bin/elasticsearch-plugin install file:///PROJECT_HOME/target/releases/elasticsearch-vector-scoring-5.4.0.zip` (stop ES first). 61 | 62 | 63 | Start Elasticsearch: `ELASTIC_HOME/bin/elasticsearch`. You should see the plugin registered at Elasticsearch startup: 64 | ``` 65 | ... 66 | [2017-03-29T13:46:57,804][INFO ][o.e.p.PluginsService ] [2Zs8kW3] loaded plugin [elasticsearch-vector-scoring] 67 | ... 68 | ``` 69 | 70 | ## Example usage 71 | 72 | Below are examples illustrating basic usage. For a more complete usage example, including training a recommender model with Apache Spark, see the [Elasticsearch Spark Recommender on IBM Code](https://github.com/IBM/elasticsearch-spark-recommender). 73 | 74 | ### Index setup 75 | 76 | ```sh 77 | curl -s -XPUT 'http://localhost:9200/test?pretty' -d '{ 78 | "settings" : { 79 | "analysis": { 80 | "analyzer": { 81 | "payload_analyzer": { 82 | "type": "custom", 83 | "tokenizer":"whitespace", 84 | "filter":"delimited_payload_filter" 85 | } 86 | } 87 | } 88 | } 89 | }' 90 | 91 | curl -s -XPUT 'http://localhost:9200/test/_mapping/movies?pretty' -d ' 92 | { 93 | "movies" : { 94 | "properties" : { 95 | "@model_factor": { 96 | "type": "text", 97 | "term_vector": "with_positions_offsets_payloads", 98 | "analyzer" : "payload_analyzer" 99 | } 100 | } 101 | } 102 | }' 103 | 104 | curl -s -XPUT 'http://localhost:9200/test/movies/1?pretty' -d ' 105 | { 106 | "@model_factor":"0|1.2 1|0.1 2|0.4 3|-0.2 4|0.3", 107 | "name": "Test 1" 108 | }' 109 | 110 | curl -s -XPUT 'http://localhost:9200/test/movies/2?pretty' -d ' 111 | { 112 | "@model_factor":"0|0.1 1|2.3 2|-1.6 3|0.7 4|-1.3", 113 | "name": "Test 2" 114 | }' 115 | 116 | curl -s -XPUT 'http://localhost:9200/test/movies/3?pretty' -d ' 117 | { 118 | "@model_factor":"0|-0.5 1|1.6 2|1.1 3|0.9 4|0.7", 119 | "name": "Test 3" 120 | }' 121 | 122 | curl -s -XGET 'http://localhost:9200/test/movies/1/_termvector?pretty' -d ' 123 | { 124 | "fields" : ["@model_factor"], 125 | "payloads" : true, 126 | "positions" : true 127 | }' 128 | ``` 129 | 130 | ### Scoring example 131 | 132 | ```sh 133 | curl -s -XPOST 'http://localhost:9200/test/movies/_search?pretty' -d ' 134 | { 135 | "query": { 136 | "function_score": { 137 | "query" : { 138 | "query_string": { 139 | "query": "*" 140 | } 141 | }, 142 | "script_score": { 143 | "script": { 144 | "inline": "payload_vector_score", 145 | "lang": "native", 146 | "params": { 147 | "field": "@model_factor", 148 | "vector": [0.1,2.3,-1.6,0.7,-1.3], 149 | "cosine" : true 150 | } 151 | } 152 | }, 153 | "boost_mode": "replace" 154 | } 155 | } 156 | }' 157 | ``` 158 | 159 | This query returns results sorted by cosine similarity (including the document 160 | itself). For "similar item" style recommendations, you can filter the 161 | query item from the returned results. 162 | 163 | ``` 164 | { 165 | "took" : 3, 166 | "timed_out" : false, 167 | "_shards" : { 168 | "total" : 5, 169 | "successful" : 5, 170 | "failed" : 0 171 | }, 172 | "hits" : { 173 | "total" : 3, 174 | "max_score" : 0.99999994, 175 | "hits" : [ { 176 | "_index" : "test", 177 | "_type" : "movies", 178 | "_id" : "2", 179 | "_score" : 0.99999994, 180 | "_source" : { 181 | "@model_factor" : "0|0.1 1|2.3 2|-1.6 3|0.7 4|-1.3", 182 | "name" : "Test 2" 183 | } 184 | }, { 185 | "_index" : "test", 186 | "_type" : "movies", 187 | "_id" : "3", 188 | "_score" : 0.2175577, 189 | "_source" : { 190 | "@model_factor" : "0|-0.5 1|1.6 2|1.1 3|0.9 4|0.7", 191 | "name" : "Test 3" 192 | } 193 | }, { 194 | "_index" : "test", 195 | "_type" : "movies", 196 | "_id" : "1", 197 | "_score" : -0.19618797, 198 | "_source" : { 199 | "@model_factor" : "0|1.2 1|0.1 2|0.4 3|-0.2 4|0.3", 200 | "name" : "Test 1" 201 | } 202 | } ] 203 | } 204 | } 205 | ``` 206 | 207 | ## TODO 208 | 209 | 1. Tests 210 | -------------------------------------------------------------------------------- /example/vectorscoring.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Init an index with custom analyzer 4 | 5 | curl -s -XPUT 'http://localhost:9200/test?pretty' -d '{ 6 | "settings" : { 7 | "analysis": { 8 | "analyzer": { 9 | "payload_analyzer": { 10 | "type": "custom", 11 | "tokenizer":"whitespace", 12 | "filter":"delimited_payload_filter" 13 | } 14 | } 15 | } 16 | } 17 | }' 18 | 19 | curl -s -XPUT 'http://localhost:9200/test/_mapping/movies?pretty' -d ' 20 | { 21 | "movies" : { 22 | "properties" : { 23 | "@model_factor": { 24 | "type": "text", 25 | "term_vector": "with_positions_offsets_payloads", 26 | "analyzer" : "payload_analyzer" 27 | } 28 | } 29 | } 30 | } 31 | ' 32 | 33 | curl -s -XPUT 'http://localhost:9200/test/movies/1?pretty' -d ' 34 | { 35 | "@model_factor":"0|1.2 1|0.1 2|0.4 3|-0.2 4|0.3", 36 | "name": "Test 1" 37 | } 38 | ' 39 | 40 | curl -s -XPUT 'http://localhost:9200/test/movies/2?pretty' -d ' 41 | { 42 | "@model_factor":"0|0.1 1|2.3 2|-1.6 3|0.7 4|-1.3", 43 | "name": "Test 2" 44 | } 45 | ' 46 | 47 | curl -s -XPUT 'http://localhost:9200/test/movies/3?pretty' -d ' 48 | { 49 | "@model_factor":"0|-0.5 1|1.6 2|1.1 3|0.9 4|0.7", 50 | "name": "Test 3" 51 | } 52 | ' 53 | 54 | curl -s -XGET 'http://localhost:9200/test/movies/1/_termvector?pretty' -d ' 55 | { 56 | "fields" : ["@model_factor"], 57 | "payloads" : true, 58 | "positions" : true 59 | }' 60 | 61 | curl -s -XPOST 'http://localhost:9200/test/movies/_search?pretty' -d ' 62 | { 63 | "query": { 64 | "function_score": { 65 | "query" : { 66 | "query_string": { 67 | "query": "*" 68 | } 69 | }, 70 | "script_score": { 71 | "script": { 72 | "inline": "payload_vector_score", 73 | "lang": "native", 74 | "params": { 75 | "field": "@model_factor", 76 | "vector": [0.1,2.3,-1.6,0.7,-1.3], 77 | "cosine" : true 78 | } 79 | } 80 | }, 81 | "boost_mode": "replace" 82 | } 83 | } 84 | } 85 | ' -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | elasticsearch-vector-scoring 8 | com.github.mlnick 9 | elasticsearch-vector-scoring 10 | 5.4.0 11 | ElasticSearch Plugin for Vector Scoring 12 | 13 | 14 | 15 | The Apache Software License, Version 2.0 16 | http://www.apache.org/licenses/LICENSE-2.0.txt 17 | repo 18 | 19 | 20 | 21 | 22 | scm:git:git@github.com:MLnick/elasticsearch-vector-scoring.git 23 | scm:git:git@github.com:MLnick/elasticsearch-vector-scoring.git 24 | http://github.com/MLnick/elasticsearch-vector-scoring 25 | 26 | 27 | 28 | UTF-8 29 | 30 | com.github.mlnick.elasticsearch.plugin.VectorScoringPlugin 31 | 5.4.0 32 | 33 | 34 | 35 | 36 | org.elasticsearch 37 | elasticsearch 38 | ${elasticsearch.version} 39 | provided 40 | 41 | 42 | 43 | 44 | 45 | 46 | oss-snapshots 47 | Sonatype OSS Snapshots 48 | https://oss.sonatype.org/content/repositories/snapshots/ 49 | 50 | 51 | 52 | 53 | 54 | 55 | src/main/resources 56 | true 57 | 58 | plugin-descriptor.properties 59 | 60 | 61 | 62 | 63 | 64 | maven-compiler-plugin 65 | 2.3.2 66 | 67 | 1.8 68 | 1.8 69 | UTF-8 70 | 71 | 72 | 73 | org.apache.maven.plugins 74 | maven-assembly-plugin 75 | 76 | false 77 | ${project.build.directory}/releases/ 78 | ${basedir}/src/main/assemblies/plugin.xml 79 | 80 | 81 | 82 | 83 | package 84 | 85 | single 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /src/main/assemblies/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | true 8 | elasticsearch 9 | 10 | 11 | / 12 | true 13 | true 14 | 15 | org.elasticsearch:elasticsearch 16 | org.apache.lucene:* 17 | 18 | 19 | 20 | 21 | 22 | src/main/resources 23 | / 24 | true 25 | 26 | plugin-descriptor.properties 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/main/java/com/github/mlnick/elasticsearch/plugin/VectorScoringPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | package com.github.mlnick.elasticsearch.plugin; 15 | 16 | import com.github.mlnick.elasticsearch.script.PayloadVectorScoreScript; 17 | import org.elasticsearch.plugins.Plugin; 18 | import org.elasticsearch.plugins.ScriptPlugin; 19 | import org.elasticsearch.script.NativeScriptFactory; 20 | 21 | import java.util.Collections; 22 | import java.util.List; 23 | 24 | public class VectorScoringPlugin extends Plugin implements ScriptPlugin { 25 | 26 | @Override 27 | public List getNativeScripts() { 28 | return Collections.singletonList(new PayloadVectorScoreScript.Factory()); 29 | } 30 | 31 | } -------------------------------------------------------------------------------- /src/main/java/com/github/mlnick/elasticsearch/script/PayloadVectorScoreScript.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | * 6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | * 8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package com.github.mlnick.elasticsearch.script; 16 | 17 | import java.util.ArrayList; 18 | import java.util.Iterator; 19 | import java.util.List; 20 | import java.util.Map; 21 | 22 | import org.elasticsearch.script.ScriptException; 23 | 24 | import org.elasticsearch.common.Nullable; 25 | import org.elasticsearch.script.AbstractSearchScript; 26 | import org.elasticsearch.script.ExecutableScript; 27 | import org.elasticsearch.script.NativeScriptFactory; 28 | import org.elasticsearch.search.lookup.IndexField; 29 | import org.elasticsearch.search.lookup.IndexFieldTerm; 30 | import org.elasticsearch.search.lookup.IndexLookup; 31 | import org.elasticsearch.search.lookup.TermPosition; 32 | 33 | /** 34 | * Script that scores documents based on term vector payloads. Dot product and cosine similarity 35 | * are supported. 36 | */ 37 | public class PayloadVectorScoreScript extends AbstractSearchScript { 38 | 39 | /** 40 | * Factory that is registered in 41 | * {@link com.github.mlnick.elasticsearch.plugin.VectorScoringPlugin#getNativeScripts()} 42 | * method when the plugin is loaded. 43 | */ 44 | public static class Factory implements NativeScriptFactory { 45 | 46 | /** 47 | * This method is called for every search on every shard. 48 | * 49 | * @param params 50 | * list of script parameters passed with the query 51 | * @return new native script 52 | */ 53 | @Override 54 | public ExecutableScript newScript(@Nullable Map params) { 55 | return new PayloadVectorScoreScript(params); 56 | } 57 | 58 | /** 59 | * Indicates if document scores may be needed by the produced scripts. 60 | * 61 | * @return {@code true} if scores are needed. 62 | */ 63 | @Override 64 | public boolean needsScores() { 65 | return false; 66 | } 67 | 68 | @Override 69 | public String getName() { 70 | return SCRIPT_NAME; 71 | } 72 | 73 | } 74 | 75 | // the field containing the vectors to be scored against 76 | String field = null; 77 | // indices for the query vector 78 | List index = null; 79 | // vector for the query vector 80 | List vector = null; 81 | // whether to score cosine similarity (true) or dot product (false) 82 | boolean cosine = false; 83 | double queryVectorNorm = 0; 84 | 85 | final static public String SCRIPT_NAME = "payload_vector_score"; 86 | 87 | 88 | 89 | /** 90 | * @param params index that a scored are placed in this parameter. Initialize them here. 91 | */ 92 | @SuppressWarnings("unchecked") 93 | private PayloadVectorScoreScript(Map params) { 94 | params.entrySet(); 95 | // get field to score 96 | field = (String) params.get("field"); 97 | // get query vector 98 | vector = (List) params.get("vector"); 99 | // cosine flag 100 | Object cosineParam = params.get("cosine"); 101 | if (cosineParam != null) { 102 | cosine = (boolean) cosineParam; 103 | } 104 | if (field == null || vector == null) { 105 | throw new IllegalArgumentException("cannot initialize " + SCRIPT_NAME + ": field or vector parameter missing!"); 106 | } 107 | // init index 108 | index = new ArrayList<>(vector.size()); 109 | for (int i = 0; i < vector.size(); i++) { 110 | index.add(String.valueOf(i)); 111 | } 112 | if (vector.size() != index.size()) { 113 | throw new IllegalArgumentException("cannot initialize " + SCRIPT_NAME + ": index and vector array must have same length!"); 114 | } 115 | if (cosine) { 116 | // compute query vector norm once 117 | for (double v: vector) { 118 | queryVectorNorm += Math.pow(v, 2.0); 119 | } 120 | } 121 | } 122 | 123 | @Override 124 | public Object run() { 125 | float score = 0; 126 | // first, get the ShardTerms object for the field. 127 | IndexField indexField = this.indexLookup().get(field); 128 | double docVectorNorm = 0.0f; 129 | for (int i = 0; i < index.size(); i++) { 130 | // get the vector value stored in the term payload 131 | IndexFieldTerm indexTermField = indexField.get(index.get(i), IndexLookup.FLAG_PAYLOADS); 132 | float payload = 0f; 133 | if (indexTermField != null) { 134 | Iterator iter = indexTermField.iterator(); 135 | if (iter.hasNext()) { 136 | payload = iter.next().payloadAsFloat(0f); 137 | if (cosine) { 138 | // doc vector norm 139 | docVectorNorm += Math.pow(payload, 2.0); 140 | } 141 | } 142 | } 143 | // dot product 144 | score += payload * vector.get(i); 145 | } 146 | if (cosine) { 147 | // cosine similarity score 148 | if (docVectorNorm == 0 || queryVectorNorm == 0) return 0f; 149 | return score / (Math.sqrt(docVectorNorm) * Math.sqrt(queryVectorNorm)); 150 | } else { 151 | // dot product score 152 | return score; 153 | } 154 | } 155 | 156 | } -------------------------------------------------------------------------------- /src/main/resources/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | name=${project.name} 2 | description=${project.description} 3 | version=${project.version} 4 | jvm=true 5 | classname=${elasticsearch.plugin.classname} 6 | java.version=1.8 7 | elasticsearch.version=${elasticsearch.version} -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, out 2 | 3 | log4j.appender.out=org.apache.log4j.ConsoleAppender 4 | log4j.appender.out.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c] %m%n 6 | --------------------------------------------------------------------------------