├── .gitignore ├── .travis.yml ├── .travis └── run.sh ├── LICENSE ├── README.md ├── pom.xml └── src ├── main ├── assemblies │ └── plugin.xml ├── java │ └── org │ │ └── codelibs │ │ └── elasticsearch │ │ └── dynarank │ │ ├── DynamicRankingPlugin.java │ │ ├── filter │ │ └── SearchActionFilter.java │ │ ├── painless │ │ └── DynaRankWhitelistExtension.java │ │ ├── ranker │ │ ├── DynamicRanker.java │ │ └── RetrySearchException.java │ │ └── script │ │ ├── DiversitySortScriptEngine.java │ │ ├── DynaRankScript.java │ │ └── bucket │ │ ├── Bucket.java │ │ ├── BucketFactory.java │ │ ├── Buckets.java │ │ └── impl │ │ ├── MinhashBucket.java │ │ ├── MinhashBucketFactory.java │ │ ├── MinhashBuckets.java │ │ ├── StandardBucket.java │ │ ├── StandardBucketFactory.java │ │ └── StandardBuckets.java ├── plugin-metadata │ ├── plugin-descriptor.properties │ └── plugin-security.policy └── resources │ ├── META-INF │ └── services │ │ └── org.elasticsearch.painless.spi.PainlessExtension │ └── org │ └── codelibs │ └── elasticsearch │ └── dynarank │ └── painless │ └── dynarank_whitelist.txt └── test └── java └── org └── codelibs └── elasticsearch └── dynarank └── DynamicRankingPluginTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | .settings 2 | .classpath 3 | .project 4 | .idea 5 | /target 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | language: java 4 | 5 | jdk: 6 | - openjdk8 7 | 8 | cache: 9 | directories: 10 | - $HOME/.m2/repository 11 | 12 | before_script: 13 | - java -version 14 | 15 | script: 16 | - mvn package 17 | - bash .travis/run.sh 18 | 19 | -------------------------------------------------------------------------------- /.travis/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd `dirname $0` 4 | cd .. 5 | 6 | BASE_DIR=`pwd` 7 | TEST_DIR=$BASE_DIR/target 8 | ES_VERSION=`grep '' $BASE_DIR/pom.xml | sed -e "s/.*>\(.*\)<.*/\1/"` 9 | ES_HOST=localhost 10 | ES_PORT=9200 11 | TMP_FILE=$TEST_DIR/tmp.$$ 12 | 13 | ZIP_FILE=$HOME/.m2/repository/elasticsearch-$ES_VERSION.zip 14 | if [ ! -f $ZIP_FILE ] ; then 15 | curl -o $ZIP_FILE -L https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-$ES_VERSION.zip 16 | fi 17 | 18 | mkdir -p $TEST_DIR 19 | cd $TEST_DIR 20 | 21 | echo "Installing Elasticsearch..." 22 | rm -rf elasticsearch-$ES_VERSION > /dev/null 23 | unzip $ZIP_FILE 24 | ./elasticsearch-$ES_VERSION/bin/elasticsearch-plugin install org.codelibs:elasticsearch-minhash:6.7.0 -b 25 | #./elasticsearch-$ES_VERSION/bin/elasticsearch-plugin install file:`ls $BASE_DIR/../elasticsearch-minhash/target/releases/elasticsearch-*.zip` -b 26 | ./elasticsearch-$ES_VERSION/bin/elasticsearch-plugin install file:`ls $BASE_DIR/target/releases/elasticsearch-*.zip` -b 27 | echo "dynarank.cache.clean_interval: 1s" >> ./elasticsearch-$ES_VERSION/config/elasticsearch.yml 28 | 29 | echo "Starting Elasticsearch..." 30 | ./elasticsearch-$ES_VERSION/bin/elasticsearch & 31 | ES_PID=`echo $!` 32 | 33 | RET=-1 34 | COUNT=0 35 | while [ $RET != 0 -a $COUNT -lt 60 ] ; do 36 | echo "Waiting for ${ES_HOST}..." 37 | curl --connect-timeout 60 --retry 10 -s "$ES_HOST:$ES_PORT/_cluster/health?wait_for_status=green&timeout=3m" 38 | RET=$? 39 | COUNT=`expr $COUNT + 1` 40 | sleep 1 41 | done 42 | curl "$ES_HOST:$ES_PORT" 43 | 44 | echo "=== Start Testing ===" 45 | 46 | curl -s -H "Content-Type: application/json" -XPUT "$ES_HOST:$ES_PORT/sample" -d ' 47 | { 48 | "mappings" : { 49 | "_doc" : { 50 | "properties" : { 51 | "counter" : { 52 | "type" : "long" 53 | }, 54 | "id" : { 55 | "type" : "keyword" 56 | }, 57 | "msg" : { 58 | "type" : "text" 59 | } 60 | } 61 | } 62 | }, 63 | "settings" : { 64 | "index" : { 65 | "number_of_shards" : "5", 66 | "number_of_replicas" : "0", 67 | "dynarank" : { 68 | "reorder_size" : 100, 69 | "script_sort" : { 70 | "lang" : "painless", 71 | "script" : "def l = Arrays.asList(params.searchHits); l.sort((s1,s2) -> s2.getSourceAsMap().get(\"counter\") - s1.getSourceAsMap().get(\"counter\")); l.toArray(new org.elasticsearch.search.SearchHit[l.size()])", 72 | "params" : { 73 | "foo" : "bar" 74 | } 75 | } 76 | } 77 | } 78 | } 79 | } 80 | ' 81 | 82 | count=1 83 | while [ $count -le 1000 ] ; do 84 | curl -s -H "Content-Type: application/json" -XPOST "$ES_HOST:$ES_PORT/sample/_doc/$count" -d "{\"id\":\"$count\",\"msg\":\"test $count\",\"counter\":$count}" > /dev/null 85 | count=`expr $count + 1` 86 | done 87 | curl -s -H "Content-Type: application/json" -XPOST "$ES_HOST:$ES_PORT/_refresh" > /dev/null 88 | curl -s "$ES_HOST:$ES_PORT/_cat/indices?v" 89 | 90 | echo "sort by script" 91 | curl -s -o $TMP_FILE -H "Content-Type: application/json" -XPOST "$ES_HOST:$ES_PORT/sample/_doc/_search" \ 92 | -d '{"query":{"match_all":{}},"sort":[{"counter":{"order":"asc"}}]}' 93 | cat $TMP_FILE | jq '.' 94 | RET=`cat $TMP_FILE | jq '.hits.total'` 95 | if [ "x$RET" != "x1000" ] ; then 96 | echo "[ERROR] hits.total is not 100." 97 | kill $ES_PID 98 | exit 1 99 | fi 100 | RET=`cat $TMP_FILE | jq '.hits.hits[0]._id' | sed -e "s/\"//g"` 101 | if [ "x$RET" != "x100" ] ; then 102 | echo "[ERROR] hits.total is not 100." 103 | kill $ES_PID 104 | exit 1 105 | fi 106 | 107 | echo "=== Finish Testing ===" 108 | 109 | echo "Stopping Elasticsearch..." 110 | kill $ES_PID 111 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Elasticsearch DynaRank Plugin 2 | ======================= 3 | 4 | ## Overview 5 | 6 | DynaRank Plugin provides a feature for Dynamic Ranking at a search time. 7 | You can change top N documents in the search result with your re-ordering algorithm. 8 | Elasticsearch has [rescoring](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-request-rescore.html "rescoring"), but DynaRank is different as below: 9 | 10 | * DynaRank's reranking is executed on requested node only, not on each shard. 11 | * DynaRank uses a script language for reranking. 12 | 13 | 14 | ## Version 15 | 16 | [Versions in Maven Repository](https://repo1.maven.org/maven2/org/codelibs/elasticsearch-dynarank/) 17 | 18 | ### Issues/Questions 19 | 20 | Please file an [issue](https://github.com/codelibs/elasticsearch-dynarank/issues "issue"). 21 | 22 | ## Installation 23 | 24 | $ $ES_HOME/bin/elasticsearch-plugin install org.codelibs:elasticsearch-dynarank:7.16.0 25 | 26 | ## Getting Started 27 | 28 | ### Create Sample Data 29 | 30 | Create "sample" index: 31 | 32 | $ COUNT=1;while [ $COUNT -le 100 ] ; do curl -XPOST 'localhost:9200/sample/_doc/' -d "{\"message\":\"Hello $COUNT\",\"counter\":$COUNT}";COUNT=`expr $COUNT + 1`; done 33 | 34 | 100 documents are inserted. You can see 10 documents by an ascending order of "counter" field: 35 | 36 | $ curl -XPOST "http://127.0.0.1:9200/sample/_search" -d' 37 | { 38 | "query": { 39 | "match_all": {} 40 | }, 41 | "sort": [ 42 | { 43 | "counter": { 44 | "order": "asc" 45 | } 46 | } 47 | ] 48 | }' 49 | 50 | ### Enable Reranking 51 | 52 | DynaRank plugin is enabled if your re-order script is set to the target index: 53 | 54 | ``` 55 | $ curl -s -XPUT -H 'Content-Type: application/json' "localhost:9200/sample/_settings" -d" 56 | { 57 | \"index\" : { 58 | \"dynarank\":{ 59 | \"script_sort\":{ 60 | \"lang\": \"painless\", 61 | \"script\": \"def l=new ArrayList();for(def h:searchHits){l.add(h);}return l.stream().sorted((s1,s2)->s2.getSourceAsMap().get('counter')-s1.getSourceAsMap().get('counter')).toArray(n->new org.elasticsearch.search.SearchHit[n])\" 62 | }, 63 | \"reorder_size\": 5 64 | } 65 | } 66 | }" 67 | ``` 68 | 69 | This setting sorts top 5 documents (5 is given by reorder\_size) by a descending order of "counter" field, and others are by an ascending order. 70 | 71 | ### Disable Reranking 72 | 73 | Set an empty value to index.dynarank.script\_sort.script: 74 | 75 | ``` 76 | $ curl -s -XPUT -H 'Content-Type: application/json' "localhost:9200/sample/_settings" -d" 77 | { 78 | \"index\" : { 79 | \"dynarank\":{ 80 | \"script_sort\":{ 81 | \"script\": \"\" 82 | } 83 | } 84 | } 85 | }" 86 | ``` 87 | 88 | ## References 89 | 90 | ### dynarank\_diversity\_sort Script Sort 91 | 92 | DynaRank plugin provides a sort feature for a diversity problem. 93 | The sort script is dynarank\_diversity\_sort. 94 | The configuration is below: 95 | 96 | curl -XPUT -H 'Content-Type: application/json' 'localhost:9200/sample/_settings' -d ' 97 | { 98 | "index" : { 99 | "dynarank":{ 100 | "script_sort":{ 101 | "lang":"dynarank_diversity_sort", 102 | "params":{ 103 | "bucket_factory": "standard", 104 | "diversity_fields":["filedname1", "filedname2"], 105 | "diversity_thresholds":[0.95, 1] 106 | } 107 | }, 108 | "reorder_size":100 109 | } 110 | } 111 | }' 112 | 113 | bucket\_factory is bucket type. use minhash type field for sort, specify "minhash".(default: standard) 114 | diversity\_fields is fields for a diversity. 115 | diversity\_thresholds is a threshold for a similarity of each document. 116 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | elasticsearch-dynarank 4 | 4.0.0 5 | org.codelibs 6 | elasticsearch-dynarank 7 | 7.16.1-SNAPSHOT 8 | jar 9 | This plugin re-orders top N documents in a search results. 10 | 2011 11 | https://github.com/codelibs/elasticsearch-dynarank 12 | 13 | 14 | The Apache Software License, Version 2.0 15 | http://www.apache.org/licenses/LICENSE-2.0.txt 16 | repo 17 | 18 | 19 | 20 | scm:git:git@github.com:codelibs/elasticsearch-dynarank.git 21 | scm:git:git@github.com:codelibs/elasticsearch-dynarank.git 22 | git@github.com:codelibs/elasticsearch-dynarank.git 23 | 24 | 25 | org.sonatype.oss 26 | oss-parent 27 | 9 28 | 29 | 30 | 7.16.3 31 | org.codelibs.elasticsearch.dynarank.DynamicRankingPlugin 32 | 1.8 33 | 1.8 34 | 35 | 36 | 37 | 38 | maven-compiler-plugin 39 | 3.8.1 40 | 41 | ${maven.compiler.source} 42 | ${maven.compiler.target} 43 | UTF-8 44 | 45 | 46 | 47 | maven-surefire-plugin 48 | 2.22.2 49 | 50 | 51 | **/*Test.java 52 | 53 | 54 | 55 | 56 | maven-source-plugin 57 | 3.1.0 58 | 59 | 60 | attach-sources 61 | 62 | jar 63 | 64 | 65 | 66 | 67 | 68 | maven-javadoc-plugin 69 | 3.2.0 70 | 71 | 8 72 | UTF-8 73 | UTF-8 74 | UTF-8 75 | 76 | 77 | 78 | maven-assembly-plugin 79 | 3.1.1 80 | 81 | false 82 | ${project.build.directory}/releases/ 83 | 84 | ${basedir}/src/main/assemblies/plugin.xml 85 | 86 | 87 | 88 | 89 | package 90 | 91 | single 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | central 101 | https://repo1.maven.org/maven2/ 102 | 103 | true 104 | 105 | 106 | true 107 | 108 | 109 | 110 | codelibs.org 111 | CodeLibs Repository 112 | https://maven.codelibs.org 113 | 114 | 115 | 116 | 117 | org.elasticsearch 118 | elasticsearch 119 | ${elasticsearch.version} 120 | provided 121 | 122 | 123 | org.codelibs 124 | minhash 125 | 0.2.0 126 | 127 | 128 | com.google.guava 129 | guava 130 | 29.0-jre 131 | 132 | 133 | org.codehaus.mojo 134 | animal-sniffer-annotations 135 | 136 | 137 | com.google.j2objc 138 | j2objc-annotations 139 | 140 | 141 | com.google.errorprone 142 | error_prone_annotations 143 | 144 | 145 | com.google.code.findbugs 146 | jsr305 147 | 148 | 149 | 150 | 151 | org.codelibs.elasticsearch.module 152 | scripting-painless-spi 153 | ${elasticsearch.version} 154 | provided 155 | 156 | 157 | org.codelibs 158 | elasticsearch-minhash 159 | 7.16.0 160 | test 161 | 162 | 163 | org.codelibs 164 | elasticsearch-cluster-runner 165 | ${elasticsearch.version}.0 166 | test 167 | 168 | 169 | junit 170 | junit 171 | 4.13.1 172 | test 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /src/main/assemblies/plugin.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | plugin 4 | 5 | zip 6 | 7 | false 8 | 9 | 10 | ${basedir}/src/main/plugin-metadata/plugin-descriptor.properties 11 | true 12 | 13 | 14 | ${basedir}/src/main/plugin-metadata/plugin-security.policy 15 | false 16 | 17 | 18 | 19 | 20 | true 21 | true 22 | 23 | org.elasticsearch:elasticsearch 24 | org.apache.lucene:lucene-core 25 | org.apache.lucene:lucene-analyzers-common 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/DynamicRankingPlugin.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.Collection; 6 | import java.util.List; 7 | 8 | import org.codelibs.elasticsearch.dynarank.filter.SearchActionFilter; 9 | import org.codelibs.elasticsearch.dynarank.ranker.DynamicRanker; 10 | import org.codelibs.elasticsearch.dynarank.script.DiversitySortScriptEngine; 11 | import org.codelibs.elasticsearch.dynarank.script.DynaRankScript; 12 | import org.elasticsearch.action.support.ActionFilter; 13 | import org.elasticsearch.common.component.LifecycleComponent; 14 | import org.elasticsearch.common.settings.Setting; 15 | import org.elasticsearch.common.settings.Settings; 16 | import org.elasticsearch.plugins.ActionPlugin; 17 | import org.elasticsearch.plugins.Plugin; 18 | import org.elasticsearch.plugins.ScriptPlugin; 19 | import org.elasticsearch.rest.RestHeaderDefinition; 20 | import org.elasticsearch.script.ScriptContext; 21 | import org.elasticsearch.script.ScriptEngine; 22 | 23 | public class DynamicRankingPlugin extends Plugin implements ActionPlugin, ScriptPlugin { 24 | 25 | private Settings settings; 26 | 27 | public DynamicRankingPlugin(final Settings settings) { 28 | this.settings = settings; 29 | } 30 | 31 | @Override 32 | public ScriptEngine getScriptEngine(Settings settings, Collection> contexts) { 33 | return new DiversitySortScriptEngine(settings); 34 | } 35 | 36 | @Override 37 | public List getActionFilters() { 38 | return Arrays.asList(new SearchActionFilter(settings)); 39 | } 40 | 41 | @Override 42 | public Collection getRestHeaders() { 43 | return Arrays.asList( 44 | new RestHeaderDefinition(DynamicRanker.DYNARANK_RERANK_ENABLE, 45 | false), 46 | new RestHeaderDefinition(DynamicRanker.DYNARANK_MIN_TOTAL_HITS, 47 | false)); 48 | } 49 | 50 | @Override 51 | public Collection> getGuiceServiceClasses() { 52 | final Collection> services = new ArrayList<>(); 53 | services.add(DynamicRanker.class); 54 | return services; 55 | } 56 | 57 | @Override 58 | public List> getContexts() { 59 | return Arrays.asList(DynaRankScript.CONTEXT); 60 | } 61 | 62 | @Override 63 | public List> getSettings() { 64 | return Arrays.asList(// 65 | DynamicRanker.SETTING_INDEX_DYNARANK_SCRIPT, // 66 | DynamicRanker.SETTING_INDEX_DYNARANK_LANG, // 67 | DynamicRanker.SETTING_INDEX_DYNARANK_TYPE, // 68 | DynamicRanker.SETTING_INDEX_DYNARANK_PARAMS, // 69 | DynamicRanker.SETTING_INDEX_DYNARANK_REORDER_SIZE, // 70 | DynamicRanker.SETTING_INDEX_DYNARANK_KEEP_TOPN, // 71 | DynamicRanker.SETTING_DYNARANK_CACHE_CLEAN_INTERVAL, // 72 | DynamicRanker.SETTING_DYNARANK_CACHE_EXPIRE // 73 | ); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/filter/SearchActionFilter.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.filter; 2 | 3 | import org.codelibs.elasticsearch.dynarank.ranker.DynamicRanker; 4 | import org.elasticsearch.action.ActionListener; 5 | import org.elasticsearch.action.ActionRequest; 6 | import org.elasticsearch.action.ActionResponse; 7 | import org.elasticsearch.action.search.SearchAction; 8 | import org.elasticsearch.action.search.SearchRequest; 9 | import org.elasticsearch.action.support.ActionFilter; 10 | import org.elasticsearch.action.support.ActionFilterChain; 11 | import org.elasticsearch.common.settings.Setting; 12 | import org.elasticsearch.common.settings.Setting.Property; 13 | import org.elasticsearch.common.settings.Settings; 14 | import org.elasticsearch.tasks.Task; 15 | 16 | public class SearchActionFilter implements ActionFilter { 17 | 18 | public static Setting SETTING_DYNARANK_FILTER_ORDER = Setting.intSetting("dynarank.filter.order", 10, Property.NodeScope); 19 | 20 | private final int order; 21 | 22 | public SearchActionFilter(final Settings settings) { 23 | order = SETTING_DYNARANK_FILTER_ORDER.get(settings); 24 | } 25 | 26 | @Override 27 | public int order() { 28 | return order; 29 | } 30 | 31 | @Override 32 | public void apply(final Task task, final String action, 33 | final Request request, final ActionListener listener, final ActionFilterChain chain) { 34 | if (!SearchAction.INSTANCE.name().equals(action)) { 35 | chain.proceed(task, action, request, listener); 36 | return; 37 | } 38 | 39 | final SearchRequest searchRequest = (SearchRequest) request; 40 | final ActionListener wrappedListener = DynamicRanker.getInstance().wrapActionListener(action, searchRequest, listener); 41 | chain.proceed(task, action, request, wrappedListener == null ? listener : wrappedListener); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/painless/DynaRankWhitelistExtension.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.painless; 2 | 3 | import java.util.Collections; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | import org.codelibs.elasticsearch.dynarank.script.DynaRankScript; 8 | import org.elasticsearch.painless.spi.PainlessExtension; 9 | import org.elasticsearch.painless.spi.Whitelist; 10 | import org.elasticsearch.painless.spi.WhitelistLoader; 11 | import org.elasticsearch.script.ScriptContext; 12 | 13 | public class DynaRankWhitelistExtension implements PainlessExtension { 14 | 15 | private static final Whitelist WHITELIST = 16 | WhitelistLoader.loadFromResourceFiles(DynaRankWhitelistExtension.class, "dynarank_whitelist.txt"); 17 | 18 | @Override 19 | public Map, List> getContextWhitelists() { 20 | return Collections.singletonMap(DynaRankScript.CONTEXT, Collections.singletonList(WHITELIST)); 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/ranker/DynamicRanker.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.ranker; 2 | 3 | import static org.elasticsearch.action.search.ShardSearchFailure.readShardSearchFailure; 4 | 5 | import java.util.ArrayList; 6 | import java.util.Arrays; 7 | import java.util.HashMap; 8 | import java.util.List; 9 | import java.util.Map; 10 | import java.util.concurrent.TimeUnit; 11 | import java.util.function.Function; 12 | import java.util.Objects; 13 | 14 | import org.apache.logging.log4j.LogManager; 15 | import org.apache.logging.log4j.Logger; 16 | import org.codelibs.elasticsearch.dynarank.script.DiversitySortScriptEngine; 17 | import org.codelibs.elasticsearch.dynarank.script.DynaRankScript; 18 | import org.codelibs.elasticsearch.dynarank.script.DynaRankScript.Factory; 19 | import org.elasticsearch.ElasticsearchException; 20 | import org.elasticsearch.Version; 21 | import org.elasticsearch.action.ActionListener; 22 | import org.elasticsearch.action.ActionResponse; 23 | import org.elasticsearch.action.search.SearchRequest; 24 | import org.elasticsearch.action.search.SearchResponse; 25 | import org.elasticsearch.action.search.SearchResponse.Clusters; 26 | import org.elasticsearch.action.search.SearchResponseSections; 27 | import org.elasticsearch.action.search.ShardSearchFailure; 28 | import org.elasticsearch.action.support.ActionFilters; 29 | import org.elasticsearch.client.Client; 30 | import org.elasticsearch.cluster.metadata.IndexAbstraction; 31 | import org.elasticsearch.cluster.metadata.IndexMetadata; 32 | import org.elasticsearch.cluster.metadata.MappingMetadata; 33 | import org.elasticsearch.cluster.metadata.Metadata; 34 | import org.elasticsearch.cluster.service.ClusterService; 35 | import org.elasticsearch.common.component.AbstractLifecycleComponent; 36 | import org.elasticsearch.common.inject.Inject; 37 | import org.elasticsearch.common.io.stream.BytesStreamOutput; 38 | import org.elasticsearch.common.io.stream.NamedWriteableAwareStreamInput; 39 | import org.elasticsearch.common.io.stream.NamedWriteableRegistry; 40 | import org.elasticsearch.common.io.stream.StreamInput; 41 | import org.elasticsearch.common.settings.Setting; 42 | import org.elasticsearch.common.settings.Setting.Property; 43 | import org.elasticsearch.common.settings.Settings; 44 | import org.elasticsearch.common.util.concurrent.ThreadContext; 45 | import org.elasticsearch.core.TimeValue; 46 | import org.elasticsearch.script.Script; 47 | import org.elasticsearch.script.ScriptService; 48 | import org.elasticsearch.script.ScriptType; 49 | import org.elasticsearch.search.SearchHit; 50 | import org.elasticsearch.search.SearchHits; 51 | import org.elasticsearch.search.aggregations.InternalAggregations; 52 | import org.elasticsearch.search.builder.SearchSourceBuilder; 53 | import org.elasticsearch.search.internal.InternalSearchResponse; 54 | import org.elasticsearch.search.profile.SearchProfileResults; 55 | import org.elasticsearch.search.suggest.Suggest; 56 | import org.elasticsearch.threadpool.ThreadPool; 57 | 58 | import com.google.common.cache.Cache; 59 | import com.google.common.cache.CacheBuilder; 60 | 61 | public class DynamicRanker extends AbstractLifecycleComponent { 62 | 63 | private static final Logger logger = LogManager.getLogger(DynamicRanker.class); 64 | 65 | private static DynamicRanker instance = null; 66 | 67 | public static final String DEFAULT_SCRIPT_TYPE = "inline"; 68 | 69 | public static final String DEFAULT_SCRIPT_LANG = "painless"; 70 | 71 | public static final Setting SETTING_INDEX_DYNARANK_SCRIPT = 72 | Setting.simpleString("index.dynarank.script_sort.script", Property.IndexScope, Property.Dynamic); 73 | 74 | public static final Setting SETTING_INDEX_DYNARANK_LANG = 75 | Setting.simpleString("index.dynarank.script_sort.lang", Property.IndexScope, Property.Dynamic); 76 | 77 | public static final Setting SETTING_INDEX_DYNARANK_TYPE = new Setting<>("index.dynarank.script_sort.type", 78 | s -> DEFAULT_SCRIPT_TYPE, Function.identity(), Property.IndexScope, Property.Dynamic); 79 | 80 | public static final Setting SETTING_INDEX_DYNARANK_PARAMS = 81 | Setting.groupSetting("index.dynarank.script_sort.params.", Property.IndexScope, Property.Dynamic); 82 | 83 | public static final Setting SETTING_INDEX_DYNARANK_REORDER_SIZE = 84 | Setting.intSetting("index.dynarank.reorder_size", 100, Property.IndexScope, Property.Dynamic); 85 | 86 | public static final Setting SETTING_INDEX_DYNARANK_KEEP_TOPN = 87 | Setting.intSetting("index.dynarank.keep_topn", 0, Property.IndexScope, Property.Dynamic); 88 | 89 | public static final Setting SETTING_DYNARANK_CACHE_EXPIRE = 90 | Setting.timeSetting("dynarank.cache.expire", TimeValue.MINUS_ONE, Property.NodeScope); 91 | 92 | public static final Setting SETTING_DYNARANK_CACHE_CLEAN_INTERVAL = 93 | Setting.timeSetting("dynarank.cache.clean_interval", TimeValue.timeValueSeconds(60), Property.NodeScope); 94 | 95 | public static final String DYNARANK_RERANK_ENABLE = "Dynarank-Rerank"; 96 | 97 | public static final String DYNARANK_MIN_TOTAL_HITS = "Dynarank-Min-Total-Hits"; 98 | 99 | private final ClusterService clusterService; 100 | 101 | private final ScriptService scriptService; 102 | 103 | private final Cache scriptInfoCache; 104 | 105 | private final ThreadPool threadPool; 106 | 107 | private final NamedWriteableRegistry namedWriteableRegistry; 108 | 109 | private final TimeValue cleanInterval; 110 | 111 | private Reaper reaper; 112 | 113 | private final Client client; 114 | 115 | public static DynamicRanker getInstance() { 116 | return instance; 117 | } 118 | 119 | @Inject 120 | public DynamicRanker(final Settings settings, final Client client, final ClusterService clusterService, 121 | final ScriptService scriptService, final ThreadPool threadPool, final ActionFilters filters, 122 | final NamedWriteableRegistry namedWriteableRegistry) { 123 | this.client = client; 124 | this.clusterService = clusterService; 125 | this.scriptService = scriptService; 126 | this.threadPool = threadPool; 127 | this.namedWriteableRegistry = namedWriteableRegistry; 128 | 129 | logger.info("Initializing DynamicRanker"); 130 | 131 | final TimeValue expire = SETTING_DYNARANK_CACHE_EXPIRE.get(settings); 132 | cleanInterval = SETTING_DYNARANK_CACHE_CLEAN_INTERVAL.get(settings); 133 | 134 | final CacheBuilder builder = CacheBuilder.newBuilder().concurrencyLevel(16); 135 | if (expire.millis() >= 0) { 136 | builder.expireAfterAccess(expire.millis(), TimeUnit.MILLISECONDS); 137 | } 138 | scriptInfoCache = builder.build(); 139 | } 140 | 141 | @Override 142 | protected void doStart() throws ElasticsearchException { 143 | instance = this; 144 | reaper = new Reaper(); 145 | threadPool.schedule(reaper, cleanInterval, ThreadPool.Names.SAME); 146 | } 147 | 148 | @Override 149 | protected void doStop() throws ElasticsearchException { 150 | } 151 | 152 | @Override 153 | protected void doClose() throws ElasticsearchException { 154 | reaper.close(); 155 | scriptInfoCache.invalidateAll(); 156 | } 157 | 158 | public ActionListener wrapActionListener(final String action, final SearchRequest request, 159 | final ActionListener listener) { 160 | switch (request.searchType()) { 161 | case DFS_QUERY_THEN_FETCH: 162 | case QUERY_THEN_FETCH: 163 | break; 164 | default: 165 | return null; 166 | } 167 | 168 | if (request.scroll() != null) { 169 | return null; 170 | } 171 | 172 | final ThreadContext threadContext = threadPool.getThreadContext(); 173 | final String isRerank = threadContext.getHeader(DYNARANK_RERANK_ENABLE); 174 | if (isRerank != null && !Boolean.valueOf(isRerank)) { 175 | return null; 176 | } 177 | 178 | final SearchSourceBuilder source = request.source(); 179 | if (source == null) { 180 | return null; 181 | } 182 | 183 | final String[] indices = request.indices(); 184 | if (indices == null || indices.length != 1) { 185 | return null; 186 | } 187 | 188 | final String index = indices[0]; 189 | final ScriptInfo scriptInfo = getScriptInfo(index); 190 | if (scriptInfo == null || scriptInfo.getScript() == null) { 191 | return null; 192 | } 193 | 194 | final long startTime = System.nanoTime(); 195 | 196 | final int size = getInt(source.size(), 10); 197 | final int from = getInt(source.from(), 0); 198 | if (size < 0 || from < 0) { 199 | return null; 200 | } 201 | 202 | if (from >= scriptInfo.getReorderSize()) { 203 | return null; 204 | } 205 | 206 | int maxSize = scriptInfo.getReorderSize(); 207 | if (from + size > scriptInfo.getReorderSize()) { 208 | maxSize = from + size; 209 | } 210 | source.size(maxSize); 211 | source.from(0); 212 | 213 | if (logger.isDebugEnabled()) { 214 | logger.debug("Rewrite query: from:{}->{} size:{}->{}", from, 0, size, maxSize); 215 | } 216 | 217 | final ActionListener searchResponseListener = 218 | createSearchResponseListener(request, listener, from, size, startTime, scriptInfo); 219 | return new ActionListener() { 220 | @Override 221 | public void onResponse(final Response response) { 222 | try { 223 | searchResponseListener.onResponse(response); 224 | } catch (final RetrySearchException e) { 225 | threadPool.getThreadContext().putHeader(DYNARANK_RERANK_ENABLE, Boolean.FALSE.toString()); 226 | source.size(size); 227 | source.from(from); 228 | source.toString(); 229 | final SearchSourceBuilder newSource = e.rewrite(source); 230 | if (newSource == null) { 231 | throw new ElasticsearchException("Failed to rewrite source: " + source); 232 | } 233 | if (logger.isDebugEnabled()) { 234 | logger.debug("Original Query: \n{}\nRewrited Query: \n{}", source, newSource); 235 | } 236 | request.source(newSource); 237 | @SuppressWarnings("unchecked") 238 | final ActionListener actionListener = (ActionListener) listener; 239 | client.search(request, actionListener); 240 | } 241 | } 242 | 243 | @Override 244 | public void onFailure(final Exception e) { 245 | searchResponseListener.onFailure(e); 246 | } 247 | }; 248 | } 249 | 250 | public ScriptInfo getScriptInfo(final String index) { 251 | try { 252 | return scriptInfoCache.get(index, () -> { 253 | final Metadata metaData = clusterService.state().getMetadata(); 254 | IndexAbstraction indexAbstraction = metaData.getIndicesLookup().get(index); 255 | if (indexAbstraction == null) { 256 | return ScriptInfo.NO_SCRIPT_INFO; 257 | } 258 | 259 | final ScriptInfo[] scriptInfos = indexAbstraction.getIndices().stream() 260 | .map(metaData::index) 261 | .filter(idx -> SETTING_INDEX_DYNARANK_LANG.get(idx.getSettings()).length() > 0) 262 | .map(idx -> 263 | new ScriptInfo(SETTING_INDEX_DYNARANK_SCRIPT.get(idx.getSettings()), SETTING_INDEX_DYNARANK_LANG.get(idx.getSettings()), 264 | SETTING_INDEX_DYNARANK_TYPE.get(idx.getSettings()), SETTING_INDEX_DYNARANK_PARAMS.get(idx.getSettings()), 265 | SETTING_INDEX_DYNARANK_REORDER_SIZE.get(idx.getSettings()), SETTING_INDEX_DYNARANK_KEEP_TOPN.get(idx.getSettings()), 266 | idx.mapping()) 267 | ) 268 | .toArray(n -> new ScriptInfo[n]); 269 | 270 | if (scriptInfos.length == 0) { 271 | return ScriptInfo.NO_SCRIPT_INFO; 272 | } else if (scriptInfos.length == 1) { 273 | return scriptInfos[0]; 274 | } else { 275 | for (final ScriptInfo scriptInfo : scriptInfos) { 276 | if (!scriptInfo.getLang().equals(DiversitySortScriptEngine.SCRIPT_NAME)) { 277 | return ScriptInfo.NO_SCRIPT_INFO; 278 | } 279 | } 280 | return scriptInfos[0]; 281 | } 282 | }); 283 | } catch (final Exception e) { 284 | logger.warn("Failed to load ScriptInfo for {}.", e, index); 285 | return null; 286 | } 287 | } 288 | 289 | private ActionListener createSearchResponseListener(final SearchRequest request, 290 | final ActionListener listener, final int from, final int size, final long startTime, 291 | final ScriptInfo scriptInfo) { 292 | return new ActionListener() { 293 | @Override 294 | public void onResponse(final Response response) { 295 | final SearchResponse searchResponse = (SearchResponse) response; 296 | final long totalHits = searchResponse.getHits().getTotalHits().value; 297 | if (totalHits == 0) { 298 | if (logger.isDebugEnabled()) { 299 | logger.debug("totalHits is {}. No reranking results: {}", totalHits, searchResponse); 300 | } 301 | listener.onResponse(response); 302 | return; 303 | } 304 | 305 | final String minTotalHitsValue = threadPool.getThreadContext().getHeader(DYNARANK_MIN_TOTAL_HITS); 306 | if (minTotalHitsValue != null) { 307 | final long minTotalHits = Long.parseLong(minTotalHitsValue); 308 | if (totalHits < minTotalHits) { 309 | if (logger.isDebugEnabled()) { 310 | logger.debug("totalHits is {} < {}. No reranking results: {}", totalHits, minTotalHits, searchResponse); 311 | } 312 | listener.onResponse(response); 313 | return; 314 | } 315 | } 316 | 317 | if (logger.isDebugEnabled()) { 318 | logger.debug("Reranking results: {}", searchResponse); 319 | } 320 | 321 | try { 322 | final BytesStreamOutput out = new BytesStreamOutput(); 323 | searchResponse.writeTo(out); 324 | 325 | if (logger.isDebugEnabled()) { 326 | logger.debug("Reading headers..."); 327 | } 328 | final StreamInput in = new NamedWriteableAwareStreamInput(out.bytes().streamInput(), namedWriteableRegistry); 329 | if (logger.isDebugEnabled()) { 330 | logger.debug("Reading hits..."); 331 | } 332 | // BEGIN: SearchResponse#writeTo 333 | // BEGIN: InternalSearchResponse#writeTo 334 | final SearchHits hits = new SearchHits(in); 335 | final SearchHits newHits = doReorder(hits, from, size, scriptInfo); 336 | if (logger.isDebugEnabled()) { 337 | logger.debug("Reading aggregations..."); 338 | } 339 | final InternalAggregations aggregations = in.readBoolean() ? InternalAggregations.readFrom(in) : null; 340 | if (logger.isDebugEnabled()) { 341 | logger.debug("Reading suggest..."); 342 | } 343 | final Suggest suggest = in.readBoolean() ? new Suggest(in) : null; 344 | final boolean timedOut = in.readBoolean(); 345 | final Boolean terminatedEarly = in.readOptionalBoolean(); 346 | final SearchProfileResults profileResults = in.readOptionalWriteable(SearchProfileResults::new); 347 | final int numReducePhases = in.readVInt(); 348 | 349 | final SearchResponseSections internalResponse = new InternalSearchResponse(newHits, aggregations, suggest, 350 | profileResults, timedOut, terminatedEarly, numReducePhases); 351 | // END: InternalSearchResponse 352 | 353 | final int totalShards = in.readVInt(); 354 | final int successfulShards = in.readVInt(); 355 | final int size = in.readVInt(); 356 | final ShardSearchFailure[] shardFailures; 357 | if (size == 0) { 358 | shardFailures = ShardSearchFailure.EMPTY_ARRAY; 359 | } else { 360 | shardFailures = new ShardSearchFailure[size]; 361 | for (int i = 0; i < shardFailures.length; i++) { 362 | shardFailures[i] = readShardSearchFailure(in); 363 | } 364 | } 365 | final Clusters clusters; 366 | if (in.getVersion().onOrAfter(Version.V_6_1_0)) { 367 | clusters = new Clusters(in.readVInt(), in.readVInt(), in.readVInt()); 368 | } else { 369 | clusters = Clusters.EMPTY; 370 | } 371 | final String scrollId = in.readOptionalString(); 372 | /* tookInMillis = */ in.readVLong(); 373 | final int skippedShards = in.readVInt(); 374 | // END: SearchResponse 375 | 376 | final long tookInMillis = (System.nanoTime() - startTime) / 1000000; 377 | 378 | if (logger.isDebugEnabled()) { 379 | logger.debug("Creating new SearchResponse..."); 380 | } 381 | @SuppressWarnings("unchecked") 382 | final Response newResponse = (Response) new SearchResponse(internalResponse, scrollId, totalShards, successfulShards, 383 | skippedShards, tookInMillis, shardFailures, clusters); 384 | listener.onResponse(newResponse); 385 | 386 | if (logger.isDebugEnabled()) { 387 | logger.debug("Rewriting overhead time: {} - {} = {}ms", tookInMillis, searchResponse.getTook().getMillis(), 388 | tookInMillis - searchResponse.getTook().getMillis()); 389 | } 390 | } catch (final RetrySearchException e) { 391 | throw e; 392 | } catch (final Exception e) { 393 | if (logger.isDebugEnabled()) { 394 | logger.debug("Failed to parse a search response.", e); 395 | } 396 | throw new ElasticsearchException("Failed to parse a search response.", e); 397 | } 398 | } 399 | 400 | @Override 401 | public void onFailure(final Exception e) { 402 | listener.onFailure(e); 403 | } 404 | }; 405 | } 406 | 407 | private SearchHits doReorder(final SearchHits hits, final int from, final int size, 408 | final ScriptInfo scriptInfo) { 409 | final SearchHit[] searchHits = hits.getHits(); 410 | SearchHit[] newSearchHits; 411 | if (logger.isDebugEnabled()) { 412 | logger.debug("searchHits.length <= reorderSize: {}", searchHits.length <= scriptInfo.getReorderSize()); 413 | } 414 | if (searchHits.length <= scriptInfo.getReorderSize()) { 415 | final SearchHit[] targets = onReorder(searchHits, scriptInfo); 416 | if (from >= targets.length) { 417 | newSearchHits = new SearchHit[0]; 418 | if (logger.isDebugEnabled()) { 419 | logger.debug("Invalid argument: {} >= {}", from, targets.length); 420 | } 421 | } else { 422 | int end = from + size; 423 | if (end > targets.length) { 424 | end = targets.length; 425 | } 426 | newSearchHits = Arrays.copyOfRange(targets, from, end); 427 | } 428 | } else { 429 | SearchHit[] targets = Arrays.copyOfRange(searchHits, 0, scriptInfo.getReorderSize()); 430 | targets = onReorder(targets, scriptInfo); 431 | final List list = new ArrayList<>(size); 432 | for (int i = from; i < targets.length; i++) { 433 | list.add(targets[i]); 434 | } 435 | for (int i = targets.length; i < searchHits.length; i++) { 436 | list.add(searchHits[i]); 437 | } 438 | newSearchHits = list.toArray(new SearchHit[list.size()]); 439 | } 440 | return new SearchHits(newSearchHits, hits.getTotalHits(), hits.getMaxScore()); 441 | } 442 | 443 | private SearchHit[] onReorder(final SearchHit[] searchHits, 444 | final ScriptInfo scriptInfo) { 445 | final int keepTopN = scriptInfo.getKeepTopN(); 446 | if (searchHits.length <= keepTopN) { 447 | return searchHits; 448 | } 449 | final Factory factory = scriptService.compile( 450 | new Script(scriptInfo.getScriptType(), scriptInfo.getLang(), 451 | scriptInfo.getScript(), scriptInfo.getSettings()), 452 | DynaRankScript.CONTEXT); 453 | if (keepTopN == 0) { 454 | return factory.newInstance(scriptInfo.getSettings()) 455 | .execute(searchHits); 456 | } 457 | final SearchHit[] hits = Arrays.copyOfRange(searchHits, keepTopN, 458 | searchHits.length); 459 | final SearchHit[] reordered = factory 460 | .newInstance(scriptInfo.getSettings()).execute(hits); 461 | for (int i = keepTopN; i < searchHits.length; i++) { 462 | searchHits[i] = reordered[i - keepTopN]; 463 | } 464 | return searchHits; 465 | } 466 | 467 | private int getInt(final Object value, final int defaultValue) { 468 | if (value instanceof Number) { 469 | final int v = ((Number) value).intValue(); 470 | if (v < 0) { 471 | return defaultValue; 472 | } 473 | return v; 474 | } else if (value instanceof String) { 475 | return Integer.parseInt(value.toString()); 476 | } 477 | return defaultValue; 478 | } 479 | 480 | public static class ScriptInfo { 481 | protected final static ScriptInfo NO_SCRIPT_INFO = new ScriptInfo(); 482 | 483 | private String script; 484 | 485 | private String lang; 486 | 487 | private ScriptType scriptType; 488 | 489 | private Map settings; 490 | 491 | private int reorderSize; 492 | 493 | private int keepTopN; 494 | 495 | ScriptInfo() { 496 | // nothing 497 | } 498 | 499 | ScriptInfo(final String script, final String lang, final String scriptType, final Settings settings, final int reorderSize, final int keepTopN, final MappingMetadata mappingMetadata) { 500 | this.script = script; 501 | this.lang = lang; 502 | this.reorderSize = reorderSize; 503 | this.keepTopN=keepTopN; 504 | this.settings = new HashMap<>(); 505 | for (final String name : settings.keySet()) { 506 | final List list = settings.getAsList(name); 507 | this.settings.put(name, list.toArray(new String[list.size()])); 508 | } 509 | this.settings.put("source_as_map", mappingMetadata.getSourceAsMap()); 510 | if ("STORED".equalsIgnoreCase(scriptType)) { 511 | this.scriptType = ScriptType.STORED; 512 | } else { 513 | this.scriptType = ScriptType.INLINE; 514 | } 515 | } 516 | 517 | public String getScript() { 518 | return script; 519 | } 520 | 521 | public String getLang() { 522 | return lang; 523 | } 524 | 525 | public ScriptType getScriptType() { 526 | return scriptType; 527 | } 528 | 529 | public Map getSettings() { 530 | return settings; 531 | } 532 | 533 | 534 | public int getReorderSize() { 535 | return reorderSize; 536 | } 537 | 538 | public int getKeepTopN() { 539 | return keepTopN; 540 | } 541 | 542 | @Override 543 | public String toString() { 544 | return "ScriptInfo [script=" + script + ", lang=" + lang + ", scriptType=" + scriptType + ", settings=" + settings 545 | + ", reorderSize=" + reorderSize + ", keepTopN=" + keepTopN + "]"; 546 | } 547 | } 548 | 549 | private class Reaper implements Runnable { 550 | private volatile boolean closed; 551 | 552 | void close() { 553 | closed = true; 554 | } 555 | 556 | @Override 557 | public void run() { 558 | if (closed) { 559 | return; 560 | } 561 | 562 | try { 563 | for (final Map.Entry entry : scriptInfoCache.asMap().entrySet()) { 564 | final String index = entry.getKey(); 565 | 566 | final IndexMetadata indexMD = clusterService.state().getMetadata().index(index); 567 | if (indexMD == null) { 568 | scriptInfoCache.invalidate(index); 569 | if (logger.isDebugEnabled()) { 570 | logger.debug("Invalidate cache for {}", index); 571 | } 572 | continue; 573 | } 574 | 575 | final Settings indexSettings = indexMD.getSettings(); 576 | final String script = SETTING_INDEX_DYNARANK_SCRIPT.get(indexSettings); 577 | if (script == null || script.length() == 0) { 578 | scriptInfoCache.invalidate(index); 579 | if (logger.isDebugEnabled()) { 580 | logger.debug("Invalidate cache for {}", index); 581 | } 582 | continue; 583 | } 584 | 585 | final ScriptInfo scriptInfo = new ScriptInfo(script, SETTING_INDEX_DYNARANK_LANG.get(indexSettings), 586 | SETTING_INDEX_DYNARANK_TYPE.get(indexSettings), SETTING_INDEX_DYNARANK_PARAMS.get(indexSettings), 587 | SETTING_INDEX_DYNARANK_REORDER_SIZE.get(indexSettings), SETTING_INDEX_DYNARANK_KEEP_TOPN.get(indexSettings), 588 | indexMD.mapping()); 589 | if (logger.isDebugEnabled()) { 590 | logger.debug("Reload cache for {} => {}", index, scriptInfo); 591 | } 592 | scriptInfoCache.put(index, scriptInfo); 593 | } 594 | } catch (final Exception e) { 595 | logger.warn("Failed to update a cache for ScriptInfo.", e); 596 | } finally { 597 | threadPool.schedule(reaper, cleanInterval, ThreadPool.Names.GENERIC); 598 | } 599 | 600 | } 601 | 602 | } 603 | 604 | } 605 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/ranker/RetrySearchException.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.ranker; 2 | 3 | import java.io.Serializable; 4 | 5 | import org.elasticsearch.search.builder.SearchSourceBuilder; 6 | 7 | public class RetrySearchException extends RuntimeException { 8 | 9 | private static final long serialVersionUID = 1L; 10 | 11 | private final QueryRewriter rewriter; 12 | 13 | public RetrySearchException(final QueryRewriter rewriter) { 14 | super(); 15 | this.rewriter = rewriter; 16 | } 17 | 18 | @Override 19 | public synchronized Throwable fillInStackTrace() { 20 | return null; 21 | } 22 | 23 | public SearchSourceBuilder rewrite(final SearchSourceBuilder source) { 24 | return rewriter.rewrite(source); 25 | } 26 | 27 | public interface QueryRewriter extends Serializable { 28 | SearchSourceBuilder rewrite(SearchSourceBuilder source); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/DiversitySortScriptEngine.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script; 2 | 3 | import java.io.IOException; 4 | import java.lang.reflect.Constructor; 5 | import java.security.AccessController; 6 | import java.security.PrivilegedAction; 7 | import java.util.Collections; 8 | import java.util.HashMap; 9 | import java.util.Map; 10 | import java.util.Set; 11 | 12 | import org.apache.logging.log4j.LogManager; 13 | import org.apache.logging.log4j.Logger; 14 | import org.codelibs.elasticsearch.dynarank.script.bucket.BucketFactory; 15 | import org.codelibs.elasticsearch.dynarank.script.bucket.Buckets; 16 | import org.codelibs.elasticsearch.dynarank.script.bucket.impl.MinhashBucketFactory; 17 | import org.codelibs.elasticsearch.dynarank.script.bucket.impl.StandardBucketFactory; 18 | import org.elasticsearch.ElasticsearchException; 19 | import org.elasticsearch.common.settings.Setting; 20 | import org.elasticsearch.common.settings.Setting.Property; 21 | import org.elasticsearch.common.settings.Settings; 22 | import org.elasticsearch.script.ScriptContext; 23 | import org.elasticsearch.script.ScriptEngine; 24 | import org.elasticsearch.search.SearchHit; 25 | 26 | public class DiversitySortScriptEngine implements ScriptEngine { 27 | private static final Logger logger = LogManager.getLogger(DiversitySortScriptEngine.class); 28 | 29 | public static final String SCRIPT_NAME = "dynarank_diversity_sort"; 30 | 31 | private static final String STANDARD = "standard"; 32 | 33 | private static final String MINHASH = "minhash"; 34 | 35 | public static final Setting SETTING_SCRIPT_DYNARANK_BUCKET = 36 | Setting.groupSetting("script.dynarank.bucket.", Property.NodeScope); 37 | 38 | private Map bucketFactories; 39 | 40 | public DiversitySortScriptEngine(final Settings settings) { 41 | 42 | final Settings bucketSettings = SETTING_SCRIPT_DYNARANK_BUCKET.get(settings); 43 | 44 | bucketFactories = new HashMap<>(); 45 | bucketFactories.put(STANDARD, new StandardBucketFactory(settings)); 46 | bucketFactories.put(MINHASH, new MinhashBucketFactory(settings)); 47 | 48 | for (final String name : bucketSettings.names()) { 49 | try { 50 | bucketFactories.put(name, AccessController.doPrivileged((PrivilegedAction) () -> { 51 | try { 52 | @SuppressWarnings("unchecked") 53 | final Class clazz = (Class) Class.forName(bucketSettings.get(name)); 54 | final Class[] types = new Class[] { Settings.class }; 55 | final Constructor constructor = clazz.getConstructor(types); 56 | 57 | final Object[] args = new Object[] { settings }; 58 | return constructor.newInstance(args); 59 | } catch (final Exception e) { 60 | throw new ElasticsearchException(e); 61 | } 62 | })); 63 | } catch (final Exception e) { 64 | logger.warn("BucketFactory {} is not found.", e, name); 65 | } 66 | } 67 | } 68 | 69 | @Override 70 | public void close() throws IOException { 71 | // no-op 72 | } 73 | 74 | @Override 75 | public String getType() { 76 | return SCRIPT_NAME; 77 | } 78 | 79 | @Override 80 | public T compile(String name, String code, ScriptContext context, Map options) { 81 | DynaRankScript.Factory compiled = params -> new DiversitySortExecutableScript(params, bucketFactories); 82 | return context.factoryClazz.cast(compiled); 83 | } 84 | 85 | private static class DiversitySortExecutableScript extends DynaRankScript { 86 | private final Map bucketFactories; 87 | 88 | public DiversitySortExecutableScript(final Map vars, final Map bucketFactories) { 89 | super(vars); 90 | this.bucketFactories = bucketFactories; 91 | } 92 | 93 | @Override 94 | public SearchHit[] execute(SearchHit[] searchHit) { 95 | if (logger.isDebugEnabled()) { 96 | logger.debug("Starting DiversitySortScript..."); 97 | } 98 | Object bucketFactoryName = STANDARD; 99 | if (params.get("bucket_factory") != null) { 100 | bucketFactoryName = ((String[]) params.get("bucket_factory"))[0]; 101 | } 102 | final BucketFactory bucketFactory = bucketFactories.get(bucketFactoryName); 103 | if (bucketFactory == null) { 104 | throw new ElasticsearchException("bucket_factory is invalid: " + bucketFactoryName); 105 | } 106 | 107 | final Buckets buckets = bucketFactory.createBucketList(params); 108 | return buckets.getHits(searchHit); 109 | } 110 | 111 | } 112 | 113 | @Override 114 | public Set> getSupportedContexts() { 115 | return Collections.singleton(DynaRankScript.CONTEXT); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/DynaRankScript.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script; 2 | 3 | import java.util.Map; 4 | 5 | import org.elasticsearch.script.ScriptContext; 6 | import org.elasticsearch.search.SearchHit; 7 | 8 | public abstract class DynaRankScript { 9 | 10 | protected final Map params; 11 | 12 | public DynaRankScript(final Map params) { 13 | this.params = params; 14 | } 15 | 16 | public Map getParams() { 17 | return params; 18 | } 19 | 20 | public abstract SearchHit[] execute(final SearchHit[] searchHits); 21 | 22 | public interface Factory { 23 | DynaRankScript newInstance(final Map params); 24 | } 25 | 26 | public static final String[] PARAMETERS = {"searchHits"}; 27 | public static final ScriptContext CONTEXT = new ScriptContext<>("dynarank", Factory.class); 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/bucket/Bucket.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script.bucket; 2 | 3 | import org.elasticsearch.search.SearchHit; 4 | 5 | public interface Bucket { 6 | 7 | boolean contains(Object value); 8 | 9 | SearchHit get(); 10 | 11 | void add(Object... args); 12 | 13 | void consume(); 14 | 15 | int size(); 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/bucket/BucketFactory.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script.bucket; 2 | 3 | import java.util.Map; 4 | 5 | public interface BucketFactory { 6 | 7 | Buckets createBucketList(Map params); 8 | 9 | Bucket createBucket(Object... args); 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/bucket/Buckets.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script.bucket; 2 | 3 | import org.elasticsearch.search.SearchHit; 4 | 5 | public interface Buckets { 6 | 7 | SearchHit[] getHits(final SearchHit[] searchHit); 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/bucket/impl/MinhashBucket.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script.bucket.impl; 2 | 3 | import org.codelibs.elasticsearch.dynarank.script.bucket.Bucket; 4 | import org.codelibs.minhash.MinHash; 5 | import org.elasticsearch.search.SearchHit; 6 | 7 | import java.util.LinkedList; 8 | import java.util.Queue; 9 | 10 | public class MinhashBucket implements Bucket { 11 | protected Queue queue = new LinkedList<>(); 12 | 13 | protected Object hash; 14 | 15 | private final float threshold; 16 | 17 | private final boolean isMinhash; 18 | 19 | public MinhashBucket(final SearchHit hit, final Object hash, final float threshold, final boolean isMinhash) { 20 | this.hash = hash; 21 | this.threshold = threshold; 22 | this.isMinhash = isMinhash; 23 | queue.add(hit); 24 | } 25 | 26 | @Override 27 | public void consume() { 28 | queue.poll(); 29 | } 30 | 31 | @Override 32 | public SearchHit get() { 33 | return queue.peek(); 34 | } 35 | 36 | @Override 37 | public boolean contains(final Object value) { 38 | if (hash == null) { 39 | return value == null; 40 | } 41 | 42 | if (value == null) { 43 | return false; 44 | } 45 | 46 | if (!hash.getClass().equals(value.getClass())) { 47 | return false; 48 | } 49 | 50 | if (value instanceof String) { 51 | if (isMinhash) { 52 | return MinHash.compare(hash.toString(), value.toString()) >= threshold; 53 | } 54 | return value.toString().equals(hash); 55 | } else if (value instanceof Number) { 56 | return Math.abs(((Number) value).doubleValue() - ((Number) hash).doubleValue()) < threshold; 57 | } else if (value instanceof byte[]) { 58 | final byte[] target = (byte[]) value; 59 | return MinHash.compare((byte[]) hash, target) >= threshold; 60 | } 61 | return false; 62 | } 63 | 64 | @Override 65 | public void add(final Object... args) { 66 | queue.add((SearchHit) args[0]); 67 | } 68 | 69 | @Override 70 | public int size() { 71 | return queue.size(); 72 | } 73 | 74 | @Override 75 | public String toString() { 76 | return "MinhashBucket [queue=" + queue + ", hash=" + hash + ", threshold=" + threshold + ", isMinhash=" + isMinhash + "]"; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/bucket/impl/MinhashBucketFactory.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script.bucket.impl; 2 | 3 | import org.codelibs.elasticsearch.dynarank.script.bucket.Bucket; 4 | import org.codelibs.elasticsearch.dynarank.script.bucket.BucketFactory; 5 | import org.codelibs.elasticsearch.dynarank.script.bucket.Buckets; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.search.SearchHit; 8 | 9 | import java.util.Map; 10 | 11 | public class MinhashBucketFactory implements BucketFactory { 12 | 13 | protected Settings settings; 14 | 15 | public MinhashBucketFactory(final Settings settings) { 16 | this.settings = settings; 17 | } 18 | 19 | @Override 20 | public Buckets createBucketList(final Map params) { 21 | return new MinhashBuckets(this, params); 22 | } 23 | 24 | @Override 25 | public Bucket createBucket(final Object... args) { 26 | return new MinhashBucket((SearchHit) args[0], args[1], (float) args[2], (boolean) args[3]); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/bucket/impl/MinhashBuckets.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script.bucket.impl; 2 | 3 | import org.apache.logging.log4j.LogManager; 4 | import org.apache.logging.log4j.Logger; 5 | import org.codelibs.elasticsearch.dynarank.ranker.RetrySearchException; 6 | import org.codelibs.elasticsearch.dynarank.script.bucket.Bucket; 7 | import org.codelibs.elasticsearch.dynarank.script.bucket.BucketFactory; 8 | import org.codelibs.elasticsearch.dynarank.script.bucket.Buckets; 9 | import org.elasticsearch.ElasticsearchException; 10 | import org.elasticsearch.common.bytes.BytesArray; 11 | import org.elasticsearch.common.bytes.BytesReference; 12 | import org.elasticsearch.common.document.DocumentField; 13 | import org.elasticsearch.common.lucene.search.function.CombineFunction; 14 | import org.elasticsearch.index.query.QueryBuilders; 15 | import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; 16 | import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; 17 | import org.elasticsearch.search.SearchHit; 18 | import org.elasticsearch.search.builder.SearchSourceBuilder; 19 | 20 | import java.util.ArrayList; 21 | import java.util.Arrays; 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | public class MinhashBuckets implements Buckets { 26 | 27 | private static final Logger logger = LogManager.getLogger(MinhashBuckets.class); 28 | 29 | protected BucketFactory bucketFactory; 30 | 31 | protected Map params; 32 | 33 | public MinhashBuckets(final BucketFactory bucketFactory, final Map params) { 34 | this.bucketFactory = bucketFactory; 35 | this.params = params; 36 | } 37 | 38 | @Override 39 | public SearchHit[] getHits(final SearchHit[] searchHits) { 40 | SearchHit[] hits = searchHits; 41 | final int length = hits.length; 42 | final String[] diversityFields = (String[]) params.get("diversity_fields"); 43 | if (diversityFields == null) { 44 | throw new ElasticsearchException("diversity_fields is null."); 45 | } 46 | final String[] thresholds = (String[]) params.get("diversity_thresholds"); 47 | if (thresholds == null) { 48 | throw new ElasticsearchException("diversity_thresholds is null."); 49 | } 50 | final Object sourceAsMap = params.get("source_as_map"); 51 | final float[] diversityThresholds = parseFloats(thresholds); 52 | final Object[][] ignoredObjGroups = new Object[diversityFields.length][]; 53 | final String[] minhashFields = new String[diversityFields.length]; 54 | for (int i = 0; i < diversityFields.length; i++) { 55 | ignoredObjGroups[i] = (String[]) params.get(diversityFields[i] + "_ignored_objects"); 56 | if (isMinhashFields(sourceAsMap, diversityFields[i])) { 57 | minhashFields[i] = diversityFields[i]; 58 | } 59 | } 60 | 61 | if (logger.isDebugEnabled()) { 62 | logger.debug("diversity_fields: {}, : diversity_thresholds{}", diversityFields, thresholds); 63 | } 64 | int maxNumOfBuckets = 0; 65 | int minNumOfBuckets = Integer.MAX_VALUE; 66 | for (int i = diversityFields.length - 1; i >= 0; i--) { 67 | final String diversityField = diversityFields[i]; 68 | final boolean isMinhash = Arrays.asList(minhashFields).contains(diversityField); 69 | final float diversityThreshold = diversityThresholds[i]; 70 | final Object[] ignoredObjs = ignoredObjGroups[i]; 71 | final List bucketList = new ArrayList<>(); 72 | for (int j = 0; j < length; j++) { 73 | boolean insert = false; 74 | final SearchHit hit = hits[j]; 75 | final Object value = getFieldValue(hit, diversityField); 76 | if (value == null) { 77 | if (logger.isDebugEnabled()) { 78 | logger.debug("diversityField {} does not exist. Reranking is skipped.", diversityField); 79 | } 80 | return hits; 81 | } 82 | if (ignoredObjs != null) { 83 | for (final Object ignoredObj : ignoredObjs) { 84 | if (ignoredObj.equals(value)) { 85 | bucketList.add(bucketFactory.createBucket(hit, value, diversityThreshold, isMinhash)); 86 | insert = true; 87 | break; 88 | } 89 | } 90 | } 91 | if (!insert) { 92 | for (final Bucket bucket : bucketList) { 93 | if (bucket.contains(value)) { 94 | bucket.add(hit, value); 95 | insert = true; 96 | break; 97 | } 98 | } 99 | if (!insert) { 100 | bucketList.add(bucketFactory.createBucket(hit, value, diversityThreshold, isMinhash)); 101 | } 102 | } 103 | } 104 | if (bucketList.size() > maxNumOfBuckets) { 105 | maxNumOfBuckets = bucketList.size(); 106 | } 107 | if (bucketList.size() < minNumOfBuckets) { 108 | minNumOfBuckets = bucketList.size(); 109 | } 110 | hits = createHits(length, bucketList); 111 | } 112 | 113 | int minBucketThreshold = 0; 114 | int maxBucketThreshold = 0; 115 | 116 | final Object minBucketThresholdStr = params.get("min_bucket_threshold"); 117 | if (minBucketThresholdStr instanceof String) { 118 | try { 119 | minBucketThreshold = Integer.parseInt(minBucketThresholdStr.toString()); 120 | } catch (final NumberFormatException e) { 121 | throw new ElasticsearchException("Invalid value of min_bucket_threshold: " + minBucketThresholdStr.toString(), e); 122 | } 123 | } else if (minBucketThresholdStr instanceof Number) { 124 | minBucketThreshold = ((Number) minBucketThresholdStr).intValue(); 125 | } 126 | 127 | final Object maxBucketThresholdStr = params.get("max_bucket_threshold"); 128 | if (maxBucketThresholdStr instanceof String) { 129 | try { 130 | maxBucketThreshold = Integer.parseInt(maxBucketThresholdStr.toString()); 131 | } catch (final NumberFormatException e) { 132 | throw new ElasticsearchException("Invalid value of max_bucket_threshold: " + maxBucketThresholdStr.toString(), e); 133 | } 134 | } else if (maxBucketThresholdStr instanceof Number) { 135 | maxBucketThreshold = ((Number) maxBucketThresholdStr).intValue(); 136 | } 137 | 138 | if (logger.isDebugEnabled()) { 139 | logger.debug("searchHits: {}, minNumOfBuckets: {}, maxNumOfBuckets: {}, minBucketSize: {}, maxBucketThreshold: {}", 140 | hits.length, minNumOfBuckets, maxNumOfBuckets, minBucketThreshold, maxBucketThreshold); 141 | } 142 | 143 | if ((minBucketThreshold > 0 && minBucketThreshold >= minNumOfBuckets) 144 | || (maxBucketThreshold > 0 && maxBucketThreshold >= maxNumOfBuckets)) { 145 | final Object shuffleSeed = params.get("shuffle_seed"); 146 | if (shuffleSeed != null) { 147 | if (logger.isDebugEnabled()) { 148 | logger.debug("minBucketSize: {}", shuffleSeed); 149 | } 150 | throw new RetrySearchException(new RetrySearchException.QueryRewriter() { 151 | private static final long serialVersionUID = 1L; 152 | 153 | @Override 154 | public SearchSourceBuilder rewrite(final SearchSourceBuilder source) { 155 | float shuffleWeight = 1; 156 | if (params.get("shuffle_weight") instanceof Number) { 157 | shuffleWeight = ((Number) params.get("shuffle_weight")).floatValue(); 158 | } 159 | final Object shuffleBoostMode = params.get("shuffle_boost_mode"); 160 | 161 | final FunctionScoreQueryBuilder functionScoreQuery = QueryBuilders.functionScoreQuery(source.query(), 162 | new FunctionScoreQueryBuilder.FilterFunctionBuilder[] { new FunctionScoreQueryBuilder.FilterFunctionBuilder( 163 | ScoreFunctionBuilders.randomFunction().seed(shuffleSeed.toString()).setWeight(shuffleWeight)) }); 164 | if (shuffleBoostMode != null) { 165 | functionScoreQuery.boostMode(CombineFunction.fromString(shuffleBoostMode.toString())); 166 | } 167 | source.query(functionScoreQuery); 168 | return source; 169 | } 170 | }); 171 | } 172 | } 173 | 174 | return hits; 175 | } 176 | 177 | private Object getFieldValue(final SearchHit hit, final String fieldName) { 178 | final DocumentField field = hit.getFields().get(fieldName); 179 | if (field == null) { 180 | final Map source = hit.getSourceAsMap(); 181 | // TODO nested 182 | final Object object = source.get(fieldName); 183 | if (object instanceof String) { 184 | return object; 185 | } else if (object instanceof Number) { 186 | return object; 187 | } 188 | return null; 189 | } 190 | final Object object = field.getValue(); 191 | if (object instanceof BytesReference) { 192 | return BytesReference.toBytes((BytesReference) object); 193 | } else if (object instanceof String) { 194 | return object; 195 | } else if (object instanceof Number) { 196 | return object; 197 | } else if (object instanceof BytesArray) { 198 | return ((BytesArray) object).array(); 199 | } 200 | return null; 201 | } 202 | 203 | private float[] parseFloats(final String[] strings) { 204 | final float[] values = new float[strings.length]; 205 | for (int i = 0; i < strings.length; i++) { 206 | values[i] = Float.parseFloat(strings[i]); 207 | } 208 | return values; 209 | } 210 | 211 | protected SearchHit[] createHits(final int size, final List bucketList) { 212 | if (logger.isDebugEnabled()) { 213 | logger.debug("{} docs -> {} buckets", size, bucketList.size()); 214 | for (int i = 0; i < bucketList.size(); i++) { 215 | final Bucket bucket = bucketList.get(i); 216 | logger.debug(" bucket[{}] -> {} docs", i, bucket.size()); 217 | } 218 | } 219 | 220 | int pos = 0; 221 | final SearchHit[] newSearchHits = new SearchHit[size]; 222 | while (pos < size) { 223 | for (final Bucket bucket : bucketList) { 224 | final SearchHit hit = bucket.get(); 225 | if (hit != null) { 226 | newSearchHits[pos] = hit; 227 | pos++; 228 | bucket.consume(); 229 | } 230 | } 231 | } 232 | 233 | return newSearchHits; 234 | } 235 | 236 | @SuppressWarnings("unchecked") 237 | private boolean isMinhashFields(Object sourceAsMap, String field) { 238 | if (sourceAsMap instanceof Map) { 239 | Object propertiesMap = ((Map) sourceAsMap).get("properties"); 240 | if (propertiesMap instanceof Map) { 241 | Object fieldMap = ((Map) propertiesMap).get(field); 242 | if (fieldMap instanceof Map) { 243 | Object fieldType = ((Map) fieldMap).get("type"); 244 | return fieldType != null && fieldType.toString().equals("minhash"); 245 | } 246 | } 247 | } 248 | return false; 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/bucket/impl/StandardBucket.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script.bucket.impl; 2 | 3 | import java.util.LinkedList; 4 | import java.util.Queue; 5 | 6 | import org.codelibs.elasticsearch.dynarank.script.bucket.Bucket; 7 | import org.codelibs.minhash.MinHash; 8 | import org.elasticsearch.search.SearchHit; 9 | 10 | public class StandardBucket implements Bucket { 11 | protected Queue queue = new LinkedList<>(); 12 | 13 | protected Object hash; 14 | 15 | private final float threshold; 16 | 17 | public StandardBucket(final SearchHit hit, final Object hash, final float threshold) { 18 | this.hash = hash; 19 | this.threshold = threshold; 20 | queue.add(hit); 21 | } 22 | 23 | @Override 24 | public void consume() { 25 | queue.poll(); 26 | } 27 | 28 | @Override 29 | public SearchHit get() { 30 | return queue.peek(); 31 | } 32 | 33 | @Override 34 | public boolean contains(final Object value) { 35 | if (hash == null) { 36 | return value == null; 37 | } 38 | 39 | if (value == null) { 40 | return false; 41 | } 42 | 43 | if (!hash.getClass().equals(value.getClass())) { 44 | return false; 45 | } 46 | 47 | if (value instanceof String) { 48 | return value.toString().equals(hash); 49 | } else if (value instanceof Number) { 50 | return Math.abs(((Number) value).doubleValue() - ((Number) hash).doubleValue()) < threshold; 51 | } else if (value instanceof byte[]) { 52 | final byte[] target = (byte[]) value; 53 | return MinHash.compare((byte[]) hash, target) >= threshold; 54 | } 55 | return false; 56 | } 57 | 58 | @Override 59 | public void add(final Object... args) { 60 | queue.add((SearchHit) args[0]); 61 | } 62 | 63 | @Override 64 | public int size() { 65 | return queue.size(); 66 | } 67 | 68 | @Override 69 | public String toString() { 70 | return "StandardBucket [queue=" + queue + ", hash=" + hash + ", threshold=" + threshold + "]"; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/bucket/impl/StandardBucketFactory.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script.bucket.impl; 2 | 3 | import java.util.Map; 4 | 5 | import org.codelibs.elasticsearch.dynarank.script.bucket.Bucket; 6 | import org.codelibs.elasticsearch.dynarank.script.bucket.BucketFactory; 7 | import org.codelibs.elasticsearch.dynarank.script.bucket.Buckets; 8 | import org.elasticsearch.common.settings.Settings; 9 | import org.elasticsearch.search.SearchHit; 10 | 11 | public class StandardBucketFactory implements BucketFactory { 12 | 13 | protected Settings settings; 14 | 15 | public StandardBucketFactory(final Settings settings) { 16 | this.settings = settings; 17 | } 18 | 19 | @Override 20 | public Buckets createBucketList(final Map params) { 21 | return new StandardBuckets(this, params); 22 | } 23 | 24 | @Override 25 | public Bucket createBucket(final Object... args) { 26 | return new StandardBucket((SearchHit) args[0], args[1], (float) args[2]); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/codelibs/elasticsearch/dynarank/script/bucket/impl/StandardBuckets.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank.script.bucket.impl; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | import org.apache.logging.log4j.LogManager; 8 | import org.apache.logging.log4j.Logger; 9 | import org.codelibs.elasticsearch.dynarank.ranker.RetrySearchException; 10 | import org.codelibs.elasticsearch.dynarank.script.bucket.Bucket; 11 | import org.codelibs.elasticsearch.dynarank.script.bucket.BucketFactory; 12 | import org.codelibs.elasticsearch.dynarank.script.bucket.Buckets; 13 | import org.elasticsearch.ElasticsearchException; 14 | import org.elasticsearch.common.bytes.BytesArray; 15 | import org.elasticsearch.common.bytes.BytesReference; 16 | import org.elasticsearch.common.document.DocumentField; 17 | import org.elasticsearch.common.lucene.search.function.CombineFunction; 18 | import org.elasticsearch.index.query.QueryBuilders; 19 | import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; 20 | import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; 21 | import org.elasticsearch.search.SearchHit; 22 | import org.elasticsearch.search.builder.SearchSourceBuilder; 23 | 24 | public class StandardBuckets implements Buckets { 25 | 26 | private static final Logger logger = LogManager.getLogger(StandardBuckets.class); 27 | 28 | protected BucketFactory bucketFactory; 29 | 30 | protected Map params; 31 | 32 | public StandardBuckets(final BucketFactory bucketFactory, final Map params) { 33 | this.bucketFactory = bucketFactory; 34 | this.params = params; 35 | } 36 | 37 | @Override 38 | public SearchHit[] getHits(final SearchHit[] searchHits) { 39 | SearchHit[] hits = searchHits; 40 | final int length = hits.length; 41 | final String[] diversityFields = (String[]) params.get("diversity_fields"); 42 | if (diversityFields == null) { 43 | throw new ElasticsearchException("diversity_fields is null."); 44 | } 45 | final String[] thresholds = (String[]) params.get("diversity_thresholds"); 46 | if (thresholds == null) { 47 | throw new ElasticsearchException("diversity_thresholds is null."); 48 | } 49 | final float[] diversityThresholds = parseFloats(thresholds); 50 | final Object[][] ignoredObjGroups = new Object[diversityFields.length][]; 51 | for (int i = 0; i < diversityFields.length; i++) { 52 | ignoredObjGroups[i] = (String[]) params.get(diversityFields[i] + "_ignored_objects"); 53 | } 54 | 55 | if (logger.isDebugEnabled()) { 56 | logger.debug("diversity_fields: {}, : diversity_thresholds{}", diversityFields, thresholds); 57 | } 58 | int maxNumOfBuckets = 0; 59 | int minNumOfBuckets = Integer.MAX_VALUE; 60 | for (int i = diversityFields.length - 1; i >= 0; i--) { 61 | final String diversityField = diversityFields[i]; 62 | final float diversityThreshold = diversityThresholds[i]; 63 | final Object[] ignoredObjs = ignoredObjGroups[i]; 64 | final List bucketList = new ArrayList<>(); 65 | for (int j = 0; j < length; j++) { 66 | boolean insert = false; 67 | final SearchHit hit = hits[j]; 68 | final Object value = getFieldValue(hit, diversityField); 69 | if (value == null) { 70 | if (logger.isDebugEnabled()) { 71 | logger.debug("diversityField {} does not exist. Reranking is skipped.", diversityField); 72 | } 73 | return hits; 74 | } 75 | if (ignoredObjs != null) { 76 | for (final Object ignoredObj : ignoredObjs) { 77 | if (ignoredObj.equals(value)) { 78 | bucketList.add(bucketFactory.createBucket(hit, value, diversityThreshold)); 79 | insert = true; 80 | break; 81 | } 82 | } 83 | } 84 | if (!insert) { 85 | for (final Bucket bucket : bucketList) { 86 | if (bucket.contains(value)) { 87 | bucket.add(hit, value); 88 | insert = true; 89 | break; 90 | } 91 | } 92 | if (!insert) { 93 | bucketList.add(bucketFactory.createBucket(hit, value, diversityThreshold)); 94 | } 95 | } 96 | } 97 | if (bucketList.size() > maxNumOfBuckets) { 98 | maxNumOfBuckets = bucketList.size(); 99 | } 100 | if (bucketList.size() < minNumOfBuckets) { 101 | minNumOfBuckets = bucketList.size(); 102 | } 103 | hits = createHits(length, bucketList); 104 | } 105 | 106 | int minBucketThreshold = 0; 107 | int maxBucketThreshold = 0; 108 | 109 | final Object minBucketThresholdStr = params.get("min_bucket_threshold"); 110 | if (minBucketThresholdStr instanceof String) { 111 | try { 112 | minBucketThreshold = Integer.parseInt(minBucketThresholdStr.toString()); 113 | } catch (final NumberFormatException e) { 114 | throw new ElasticsearchException("Invalid value of min_bucket_threshold: " + minBucketThresholdStr.toString(), e); 115 | } 116 | } else if (minBucketThresholdStr instanceof Number) { 117 | minBucketThreshold = ((Number) minBucketThresholdStr).intValue(); 118 | } 119 | 120 | final Object maxBucketThresholdStr = params.get("max_bucket_threshold"); 121 | if (maxBucketThresholdStr instanceof String) { 122 | try { 123 | maxBucketThreshold = Integer.parseInt(maxBucketThresholdStr.toString()); 124 | } catch (final NumberFormatException e) { 125 | throw new ElasticsearchException("Invalid value of max_bucket_threshold: " + maxBucketThresholdStr.toString(), e); 126 | } 127 | } else if (maxBucketThresholdStr instanceof Number) { 128 | maxBucketThreshold = ((Number) maxBucketThresholdStr).intValue(); 129 | } 130 | 131 | if (logger.isDebugEnabled()) { 132 | logger.debug("searchHits: {}, minNumOfBuckets: {}, maxNumOfBuckets: {}, minBucketSize: {}, maxBucketThreshold: {}", 133 | hits.length, minNumOfBuckets, maxNumOfBuckets, minBucketThreshold, maxBucketThreshold); 134 | } 135 | 136 | if ((minBucketThreshold > 0 && minBucketThreshold >= minNumOfBuckets) 137 | || (maxBucketThreshold > 0 && maxBucketThreshold >= maxNumOfBuckets)) { 138 | final Object shuffleSeed = params.get("shuffle_seed"); 139 | if (shuffleSeed != null) { 140 | if (logger.isDebugEnabled()) { 141 | logger.debug("minBucketSize: {}", shuffleSeed); 142 | } 143 | throw new RetrySearchException(new RetrySearchException.QueryRewriter() { 144 | private static final long serialVersionUID = 1L; 145 | 146 | @Override 147 | public SearchSourceBuilder rewrite(final SearchSourceBuilder source) { 148 | float shuffleWeight = 1; 149 | if (params.get("shuffle_weight") instanceof Number) { 150 | shuffleWeight = ((Number) params.get("shuffle_weight")).floatValue(); 151 | } 152 | final Object shuffleBoostMode = params.get("shuffle_boost_mode"); 153 | 154 | final FunctionScoreQueryBuilder functionScoreQuery = QueryBuilders.functionScoreQuery(source.query(), 155 | new FunctionScoreQueryBuilder.FilterFunctionBuilder[] { new FunctionScoreQueryBuilder.FilterFunctionBuilder( 156 | ScoreFunctionBuilders.randomFunction().seed(shuffleSeed.toString()).setWeight(shuffleWeight)) }); 157 | if (shuffleBoostMode != null) { 158 | functionScoreQuery.boostMode(CombineFunction.fromString(shuffleBoostMode.toString())); 159 | } 160 | source.query(functionScoreQuery); 161 | return source; 162 | } 163 | }); 164 | } 165 | } 166 | 167 | return hits; 168 | } 169 | 170 | private Object getFieldValue(final SearchHit hit, final String fieldName) { 171 | final DocumentField field = hit.getFields().get(fieldName); 172 | if (field == null) { 173 | final Map source = hit.getSourceAsMap(); 174 | // TODO nested 175 | final Object object = source.get(fieldName); 176 | if (object instanceof String) { 177 | return object; 178 | } else if (object instanceof Number) { 179 | return object; 180 | } 181 | return null; 182 | } 183 | final Object object = field.getValue(); 184 | if (object instanceof BytesReference) { 185 | return BytesReference.toBytes((BytesReference) object); 186 | } else if (object instanceof String) { 187 | return object; 188 | } else if (object instanceof Number) { 189 | return object; 190 | } else if (object instanceof BytesArray) { 191 | return ((BytesArray) object).array(); 192 | } 193 | return null; 194 | } 195 | 196 | private float[] parseFloats(final String[] strings) { 197 | final float[] values = new float[strings.length]; 198 | for (int i = 0; i < strings.length; i++) { 199 | values[i] = Float.parseFloat(strings[i]); 200 | } 201 | return values; 202 | } 203 | 204 | protected SearchHit[] createHits(final int size, final List bucketList) { 205 | if (logger.isDebugEnabled()) { 206 | logger.debug("{} docs -> {} buckets", size, bucketList.size()); 207 | for (int i = 0; i < bucketList.size(); i++) { 208 | final Bucket bucket = bucketList.get(i); 209 | logger.debug(" bucket[{}] -> {} docs", i, bucket.size()); 210 | } 211 | } 212 | 213 | int pos = 0; 214 | final SearchHit[] newSearchHits = new SearchHit[size]; 215 | while (pos < size) { 216 | for (final Bucket bucket : bucketList) { 217 | final SearchHit hit = bucket.get(); 218 | if (hit != null) { 219 | newSearchHits[pos] = hit; 220 | pos++; 221 | bucket.consume(); 222 | } 223 | } 224 | } 225 | 226 | return newSearchHits; 227 | } 228 | 229 | } 230 | -------------------------------------------------------------------------------- /src/main/plugin-metadata/plugin-descriptor.properties: -------------------------------------------------------------------------------- 1 | description=This plugin re-orders top N documents in a search results. 2 | version=${project.version} 3 | name=dynarank 4 | classname=${elasticsearch.plugin.classname} 5 | elasticsearch.version= 6 | java.version=${maven.compiler.target} 7 | extended.plugins=lang-painless 8 | 9 | -------------------------------------------------------------------------------- /src/main/plugin-metadata/plugin-security.policy: -------------------------------------------------------------------------------- 1 | grant { 2 | permission java.lang.reflect.ReflectPermission "suppressAccessChecks"; 3 | permission java.lang.RuntimePermission "getClassLoader"; 4 | }; 5 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension: -------------------------------------------------------------------------------- 1 | org.codelibs.elasticsearch.dynarank.painless.DynaRankWhitelistExtension -------------------------------------------------------------------------------- /src/main/resources/org/codelibs/elasticsearch/dynarank/painless/dynarank_whitelist.txt: -------------------------------------------------------------------------------- 1 | class org.elasticsearch.search.SearchHit { 2 | Map getSourceAsMap() 3 | } 4 | -------------------------------------------------------------------------------- /src/test/java/org/codelibs/elasticsearch/dynarank/DynamicRankingPluginTest.java: -------------------------------------------------------------------------------- 1 | package org.codelibs.elasticsearch.dynarank; 2 | 3 | import static org.codelibs.elasticsearch.runner.ElasticsearchClusterRunner.newConfigs; 4 | import static org.hamcrest.core.Is.is; 5 | import static org.junit.Assert.assertEquals; 6 | import static org.junit.Assert.assertFalse; 7 | import static org.junit.Assert.assertTrue; 8 | import static org.junit.Assert.fail; 9 | 10 | import org.codelibs.elasticsearch.dynarank.ranker.DynamicRanker; 11 | import org.codelibs.elasticsearch.dynarank.ranker.DynamicRanker.ScriptInfo; 12 | import org.codelibs.elasticsearch.runner.ElasticsearchClusterRunner; 13 | import org.elasticsearch.action.DocWriteResponse.Result; 14 | import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; 15 | import org.elasticsearch.action.index.IndexResponse; 16 | import org.elasticsearch.action.search.SearchResponse; 17 | import org.elasticsearch.action.support.master.AcknowledgedResponse; 18 | import org.elasticsearch.client.Client; 19 | import org.elasticsearch.common.settings.Settings; 20 | import org.elasticsearch.common.settings.Settings.Builder; 21 | import org.elasticsearch.index.query.BoolQueryBuilder; 22 | import org.elasticsearch.index.query.QueryBuilders; 23 | import org.elasticsearch.search.SearchHit; 24 | import org.elasticsearch.search.SearchHits; 25 | import org.elasticsearch.search.sort.SortBuilders; 26 | import org.elasticsearch.search.sort.SortOrder; 27 | import org.elasticsearch.xcontent.XContentBuilder; 28 | import org.elasticsearch.xcontent.XContentFactory; 29 | import org.elasticsearch.xcontent.XContentType; 30 | import org.junit.After; 31 | import org.junit.Before; 32 | import org.junit.Test; 33 | 34 | import java.util.Arrays; 35 | 36 | public class DynamicRankingPluginTest { 37 | ElasticsearchClusterRunner runner; 38 | 39 | private String clusterName; 40 | 41 | @Before 42 | public void setUp() throws Exception { 43 | clusterName = "es-dynarank-" + System.currentTimeMillis(); 44 | runner = new ElasticsearchClusterRunner(); 45 | runner.onBuild(new ElasticsearchClusterRunner.Builder() { 46 | @Override 47 | public void build(final int number, final Builder settingsBuilder) { 48 | settingsBuilder.put("dynarank.cache.clean_interval", "1s"); 49 | settingsBuilder.put("http.cors.enabled", true); 50 | settingsBuilder.put("http.cors.allow-origin", "*"); 51 | settingsBuilder.put("discovery.type", "single-node"); 52 | // settingsBuilder.putList("discovery.seed_hosts", "127.0.0.1:9301"); 53 | // settingsBuilder.putList("cluster.initial_master_nodes", "127.0.0.1:9301"); 54 | } 55 | }).build(newConfigs().numOfNode(1).clusterName(clusterName).pluginTypes( 56 | "org.codelibs.elasticsearch.dynarank.DynamicRankingPlugin" + ",org.codelibs.elasticsearch.minhash.MinHashPlugin")); 57 | runner.ensureGreen(); 58 | } 59 | 60 | @After 61 | public void tearDown() throws Exception { 62 | runner.close(); 63 | runner.clean(); 64 | } 65 | 66 | // @Test 67 | // public void scriptInfoCache() throws Exception { 68 | // 69 | // assertEquals(1, runner.getNodeSize()); 70 | // final Client client = runner.client(); 71 | // 72 | // final String index = "sample"; 73 | // final String alias = "test"; 74 | // final String type = "data"; 75 | // CreateIndexResponse createIndexResponse = runner.createIndex(index, 76 | // Settings.builder().put(DynamicRanker.SETTING_INDEX_DYNARANK_REORDER_SIZE.getKey(), 100) 77 | // .put(DynamicRanker.SETTING_INDEX_DYNARANK_LANG.getKey(), "painless") 78 | // .put(DynamicRanker.SETTING_INDEX_DYNARANK_SCRIPT.getKey(), 79 | // "Arrays.sort(hits, (s1,s2)-> s2.getSourceAsMap().get(\"counter\") - s1.getSourceAsMap().get(\"counter\"))") 80 | // .put(DynamicRanker.SETTING_INDEX_DYNARANK_PARAMS.getKey() + "foo", "bar").build()); 81 | // assertTrue(createIndexResponse.isAcknowledged()); 82 | // AcknowledgedResponse aliasesResponse = runner.updateAlias(alias, new String[] { index }, null); 83 | // assertTrue(aliasesResponse.isAcknowledged()); 84 | // 85 | // for (int i = 1; i <= 1000; i++) { 86 | // final IndexResponse indexResponse1 = runner.insert(index, type, String.valueOf(i), 87 | // "{\"id\":\"" + i + "\",\"msg\":\"test " + i + "\",\"counter\":" + i + "}"); 88 | // assertEquals(Result.CREATED, indexResponse1.getResult()); 89 | // } 90 | // 91 | // final DynamicRanker ranker = DynamicRanker.getInstance(); 92 | // { 93 | // final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 94 | // .addSort("counter", SortOrder.ASC).execute().actionGet(); 95 | // final SearchHits hits = searchResponse.getHits(); 96 | // assertEquals(1000, hits.getTotalHits().value); 97 | // assertEquals(10, hits.getHits().length); 98 | // assertEquals("100", hits.getHits()[0].getId()); 99 | // assertEquals("91", hits.getHits()[9].getId()); 100 | // } 101 | // 102 | // final ScriptInfo scriptInfo1 = ranker.getScriptInfo(index); 103 | // final ScriptInfo scriptInfo2 = ranker.getScriptInfo(index); 104 | // Thread.sleep(2000); 105 | // final ScriptInfo scriptInfo3 = ranker.getScriptInfo(index); 106 | // assertTrue(scriptInfo1 == scriptInfo2); 107 | // assertFalse(scriptInfo1 == scriptInfo3); 108 | // 109 | // { 110 | // final SearchResponse searchResponse = client.prepareSearch(alias).setQuery(QueryBuilders.matchAllQuery()) 111 | // .addSort("counter", SortOrder.ASC).execute().actionGet(); 112 | // final SearchHits hits = searchResponse.getHits(); 113 | // assertEquals(1000, hits.getTotalHits().value); 114 | // assertEquals(10, hits.getHits().length); 115 | // assertEquals("100", hits.getHits()[0].getId()); 116 | // assertEquals("91", hits.getHits()[9].getId()); 117 | // } 118 | // 119 | // final ScriptInfo scriptInfo4 = ranker.getScriptInfo(alias); 120 | // final ScriptInfo scriptInfo5 = ranker.getScriptInfo(alias); 121 | // Thread.sleep(2000); 122 | // final ScriptInfo scriptInfo6 = ranker.getScriptInfo(alias); 123 | // assertTrue(scriptInfo4 == scriptInfo5); 124 | // assertFalse(scriptInfo4 == scriptInfo6); 125 | // } 126 | 127 | // @Test 128 | // public void reorder() throws Exception { 129 | // 130 | // assertEquals(1, runner.getNodeSize()); 131 | // final Client client = runner.client(); 132 | // 133 | // final String index = "sample"; 134 | // final String alias = "test"; 135 | // final String type = "data"; 136 | // CreateIndexResponse createIndexResponse = runner.createIndex(index, 137 | // Settings.builder().put(DynamicRanker.SETTING_INDEX_DYNARANK_REORDER_SIZE.getKey(), 100) 138 | // .put(DynamicRanker.SETTING_INDEX_DYNARANK_LANG.getKey(), "painless") 139 | // .put(DynamicRanker.SETTING_INDEX_DYNARANK_SCRIPT.getKey(), 140 | // "Arrays.sort(hits, (s1,s2)-> s2.getSourceAsMap().get(\"counter\") - s1.getSourceAsMap().get(\"counter\"))") 141 | // .put(DynamicRanker.SETTING_INDEX_DYNARANK_PARAMS.getKey() + "foo", "bar").build()); 142 | // assertTrue(createIndexResponse.isAcknowledged()); 143 | // AcknowledgedResponse aliasesResponse = runner.updateAlias(alias, new String[] { index }, null); 144 | // assertTrue(aliasesResponse.isAcknowledged()); 145 | // 146 | // for (int i = 1; i <= 1000; i++) { 147 | // final IndexResponse indexResponse1 = runner.insert(index, type, String.valueOf(i), 148 | // "{\"id\":\"" + i + "\",\"msg\":\"test " + i + "\",\"counter\":" + i + "}"); 149 | // assertEquals(Result.CREATED, indexResponse1.getResult()); 150 | // } 151 | // 152 | // assertResultOrder(client, index); 153 | // assertResultOrder(client, alias); 154 | // 155 | // String index2 = index + "2"; 156 | // runner.createIndex(index2, (Settings) null); 157 | // runner.updateAlias(alias, new String[] { index2 }, null); 158 | // int tempId = 99999; 159 | // runner.insert(index2, type, String.valueOf(tempId), 160 | // "{\"id\":\"" + tempId + "\",\"msg\":\"test " + tempId + "\",\"counter\":" + tempId + "}"); 161 | // runner.delete(index2, type, String.valueOf(tempId)); 162 | // runner.refresh(); 163 | // assertResultOrder(client, alias); 164 | // } 165 | 166 | private void assertResultOrder(Client client, String index) { 167 | { 168 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 169 | .addSort("counter", SortOrder.ASC).execute().actionGet(); 170 | final SearchHits hits = searchResponse.getHits(); 171 | assertEquals(1000, hits.getTotalHits().value); 172 | assertEquals(10, hits.getHits().length); 173 | assertEquals("100", hits.getHits()[0].getId()); 174 | assertEquals("91", hits.getHits()[9].getId()); 175 | } 176 | 177 | { 178 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 179 | .addSort("counter", SortOrder.ASC).setFrom(50).execute().actionGet(); 180 | final SearchHits hits = searchResponse.getHits(); 181 | assertEquals(1000, hits.getTotalHits().value); 182 | assertEquals(10, hits.getHits().length); 183 | assertEquals("50", hits.getHits()[0].getId()); 184 | assertEquals("41", hits.getHits()[9].getId()); 185 | } 186 | 187 | { 188 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 189 | .addSort("counter", SortOrder.ASC).setFrom(90).execute().actionGet(); 190 | final SearchHits hits = searchResponse.getHits(); 191 | assertEquals(1000, hits.getTotalHits().value); 192 | assertEquals(10, hits.getHits().length); 193 | assertEquals("10", hits.getHits()[0].getId()); 194 | assertEquals("1", hits.getHits()[9].getId()); 195 | } 196 | 197 | { 198 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 199 | .addSort("counter", SortOrder.ASC).setFrom(91).execute().actionGet(); 200 | final SearchHits hits = searchResponse.getHits(); 201 | assertEquals(1000, hits.getTotalHits().value); 202 | assertEquals(10, hits.getHits().length); 203 | assertEquals("9", hits.getHits()[0].getId()); 204 | assertEquals("1", hits.getHits()[8].getId()); 205 | assertEquals("101", hits.getHits()[9].getId()); 206 | } 207 | 208 | { 209 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 210 | .addSort("counter", SortOrder.ASC).setFrom(95).execute().actionGet(); 211 | final SearchHits hits = searchResponse.getHits(); 212 | assertEquals(1000, hits.getTotalHits().value); 213 | assertEquals(10, hits.getHits().length); 214 | assertEquals("5", hits.getHits()[0].getId()); 215 | assertEquals("1", hits.getHits()[4].getId()); 216 | assertEquals("101", hits.getHits()[5].getId()); 217 | assertEquals("105", hits.getHits()[9].getId()); 218 | } 219 | 220 | { 221 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 222 | .addSort("counter", SortOrder.ASC).setFrom(99).execute().actionGet(); 223 | final SearchHits hits = searchResponse.getHits(); 224 | assertEquals(1000, hits.getTotalHits().value); 225 | assertEquals(10, hits.getHits().length); 226 | assertEquals("1", hits.getHits()[0].getId()); 227 | assertEquals("101", hits.getHits()[1].getId()); 228 | assertEquals("109", hits.getHits()[9].getId()); 229 | } 230 | 231 | { 232 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 233 | .addSort("counter", SortOrder.ASC).setFrom(100).execute().actionGet(); 234 | final SearchHits hits = searchResponse.getHits(); 235 | assertEquals(1000, hits.getTotalHits().value); 236 | assertEquals(10, hits.getHits().length); 237 | assertEquals("101", hits.getHits()[0].getId()); 238 | assertEquals("110", hits.getHits()[9].getId()); 239 | } 240 | 241 | { 242 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.rangeQuery("counter").from(0).to(20)) 243 | .addSort("counter", SortOrder.ASC).setFrom(0).execute().actionGet(); 244 | final SearchHits hits = searchResponse.getHits(); 245 | assertEquals(20, hits.getTotalHits().value); 246 | assertEquals(10, hits.getHits().length); 247 | assertEquals("20", hits.getHits()[0].getId()); 248 | assertEquals("11", hits.getHits()[9].getId()); 249 | } 250 | 251 | { 252 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.rangeQuery("counter").from(0).to(20)) 253 | .addSort("counter", SortOrder.ASC).setFrom(10).execute().actionGet(); 254 | final SearchHits hits = searchResponse.getHits(); 255 | assertEquals(20, hits.getTotalHits().value); 256 | assertEquals(10, hits.getHits().length); 257 | assertEquals("10", hits.getHits()[0].getId()); 258 | assertEquals("1", hits.getHits()[9].getId()); 259 | } 260 | 261 | { 262 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.rangeQuery("counter").from(0).to(20)) 263 | .addSort("counter", SortOrder.ASC).setFrom(11).execute().actionGet(); 264 | final SearchHits hits = searchResponse.getHits(); 265 | assertEquals(20, hits.getTotalHits().value); 266 | assertEquals(9, hits.getHits().length); 267 | assertEquals("9", hits.getHits()[0].getId()); 268 | assertEquals("1", hits.getHits()[8].getId()); 269 | } 270 | 271 | { 272 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.rangeQuery("counter")) 273 | .addSort("counter", SortOrder.ASC).setFrom(0).setSize(101).execute().actionGet(); 274 | final SearchHits hits = searchResponse.getHits(); 275 | assertEquals(1000, hits.getTotalHits().value); 276 | assertEquals(101, hits.getHits().length); 277 | assertEquals("100", hits.getHits()[0].getId()); 278 | assertEquals("1", hits.getHits()[99].getId()); 279 | assertEquals("101", hits.getHits()[100].getId()); 280 | } 281 | 282 | { 283 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.rangeQuery("counter").from(0).to(20)) 284 | .addSort("counter", SortOrder.ASC).setFrom(0).setSize(10).execute().actionGet(); 285 | final SearchHits hits = searchResponse.getHits(); 286 | assertEquals(20, hits.getTotalHits().value); 287 | assertEquals(10, hits.getHits().length); 288 | assertEquals("20", hits.getHits()[0].getId()); 289 | assertEquals("11", hits.getHits()[9].getId()); 290 | } 291 | 292 | { 293 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.rangeQuery("counter").from(0).to(20)) 294 | .addSort("counter", SortOrder.ASC).setFrom(15).setSize(10).execute().actionGet(); 295 | final SearchHits hits = searchResponse.getHits(); 296 | assertEquals(20, hits.getTotalHits().value); 297 | assertEquals(5, hits.getHits().length); 298 | assertEquals("5", hits.getHits()[0].getId()); 299 | assertEquals("1", hits.getHits()[4].getId()); 300 | } 301 | 302 | { 303 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.rangeQuery("counter").from(0).to(20)) 304 | .addSort("counter", SortOrder.ASC).setFrom(20).setSize(10).execute().actionGet(); 305 | final SearchHits hits = searchResponse.getHits(); 306 | assertEquals(20, hits.getTotalHits().value); 307 | assertEquals(0, hits.getHits().length); 308 | } 309 | 310 | { 311 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.rangeQuery("counter").from(0).to(20)) 312 | .addSort("counter", SortOrder.ASC).setFrom(21).setSize(10).execute().actionGet(); 313 | final SearchHits hits = searchResponse.getHits(); 314 | assertEquals(20, hits.getTotalHits().value); 315 | assertEquals(0, hits.getHits().length); 316 | } 317 | 318 | { 319 | final SearchResponse searchResponse = runner.search(index, QueryBuilders.queryStringQuery("msg:foo"), null, 0, 10); 320 | final SearchHits hits = searchResponse.getHits(); 321 | assertEquals(0, hits.getTotalHits().value); 322 | } 323 | 324 | for (int i = 0; i < 1000; i++) { 325 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 326 | .addSort("counter", SortOrder.ASC).execute().actionGet(); 327 | final SearchHits hits = searchResponse.getHits(); 328 | assertEquals(1000, hits.getTotalHits().value); 329 | assertEquals(10, hits.getHits().length); 330 | assertEquals("100", hits.getHits()[0].getId()); 331 | assertEquals("91", hits.getHits()[9].getId()); 332 | } 333 | } 334 | 335 | @Test 336 | public void standardBucketFactory() throws Exception { 337 | 338 | final String index = "test_index"; 339 | final String type = "_doc"; 340 | 341 | { 342 | // create an index 343 | final String indexSettings = "{\"index\":{" 344 | + "\"dynarank\":{\"script_sort\":{\"lang\":\"dynarank_diversity_sort\",\"params\":{\"diversity_fields\":[\"category\"],\"diversity_thresholds\":[0.95,1],\"reorder_size\":20}}}}" 345 | + "}"; 346 | runner.createIndex(index, Settings.builder().loadFromSource(indexSettings, XContentType.JSON).build()); 347 | runner.ensureYellow(index); 348 | 349 | // create a mapping 350 | final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder()// 351 | .startObject()// 352 | .startObject(type)// 353 | .startObject("properties")// 354 | 355 | // id 356 | .startObject("id")// 357 | .field("type", "keyword")// 358 | .endObject()// 359 | 360 | // msg 361 | .startObject("msg")// 362 | .field("type", "text")// 363 | .endObject()// 364 | 365 | // category 366 | .startObject("category")// 367 | .field("type", "keyword")// 368 | .endObject()// 369 | 370 | // order 371 | .startObject("order")// 372 | .field("type", "long")// 373 | .endObject()// 374 | 375 | .endObject()// 376 | .endObject()// 377 | .endObject(); 378 | runner.createMapping(index, mappingBuilder); 379 | } 380 | 381 | if (!runner.indexExists(index)) { 382 | fail(); 383 | } 384 | 385 | insertTestData(index, 1, "aaa bbb ccc", "cat1"); 386 | insertTestData(index, 2, "aaa bbb ccc", "cat1"); 387 | insertTestData(index, 3, "aaa bbb ccc", "cat2"); 388 | insertTestData(index, 4, "aaa bbb ddd", "cat1"); 389 | insertTestData(index, 5, "aaa bbb ddd", "cat2"); 390 | insertTestData(index, 6, "aaa bbb ddd", "cat2"); 391 | insertTestData(index, 7, "aaa bbb eee", "cat1"); 392 | insertTestData(index, 8, "aaa bbb eee", "cat1"); 393 | insertTestData(index, 9, "aaa bbb eee", "cat2"); 394 | insertTestData(index, 10, "aaa bbb fff", "cat1"); 395 | 396 | { 397 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 398 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).storedFields("_source", "category") 399 | .setFrom(0).setSize(10).execute().actionGet(); 400 | final SearchHits searchHits = response.getHits(); 401 | assertEquals(10, searchHits.getTotalHits().value); 402 | final SearchHit[] hits = searchHits.getHits(); 403 | assertEquals("1", hits[0].getSourceAsMap().get("id")); 404 | assertEquals("3", hits[1].getSourceAsMap().get("id")); 405 | assertEquals("2", hits[2].getSourceAsMap().get("id")); 406 | assertEquals("5", hits[3].getSourceAsMap().get("id")); 407 | assertEquals("4", hits[4].getSourceAsMap().get("id")); 408 | assertEquals("6", hits[5].getSourceAsMap().get("id")); 409 | assertEquals("7", hits[6].getSourceAsMap().get("id")); 410 | assertEquals("9", hits[7].getSourceAsMap().get("id")); 411 | assertEquals("8", hits[8].getSourceAsMap().get("id")); 412 | assertEquals("10", hits[9].getSourceAsMap().get("id")); 413 | } 414 | 415 | // disable rerank 416 | { 417 | final SearchResponse response = runner.client().prepareSearch("_all").setQuery(QueryBuilders.matchAllQuery()) 418 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).setFrom(0).setSize(10).execute().actionGet(); 419 | final SearchHits searchHits = response.getHits(); 420 | assertEquals(10, searchHits.getTotalHits().value); 421 | final SearchHit[] hits = searchHits.getHits(); 422 | assertEquals("1", hits[0].getSourceAsMap().get("id")); 423 | assertEquals("2", hits[1].getSourceAsMap().get("id")); 424 | assertEquals("3", hits[2].getSourceAsMap().get("id")); 425 | assertEquals("4", hits[3].getSourceAsMap().get("id")); 426 | assertEquals("5", hits[4].getSourceAsMap().get("id")); 427 | assertEquals("6", hits[5].getSourceAsMap().get("id")); 428 | assertEquals("7", hits[6].getSourceAsMap().get("id")); 429 | assertEquals("8", hits[7].getSourceAsMap().get("id")); 430 | assertEquals("9", hits[8].getSourceAsMap().get("id")); 431 | assertEquals("10", hits[9].getSourceAsMap().get("id")); 432 | } 433 | } 434 | 435 | @Test 436 | public void diversityMultiSort() throws Exception { 437 | 438 | final String index = "test_index"; 439 | final String type = "_doc"; 440 | 441 | { 442 | // create an index 443 | final String indexSettings = "{\"index\":{\"analysis\":{\"analyzer\":{" 444 | + "\"minhash_analyzer\":{\"type\":\"custom\",\"tokenizer\":\"standard\",\"filter\":[\"my_minhash\"]}" + "},\"filter\":{" 445 | + "\"my_minhash\":{\"type\":\"minhash\",\"seed\":1000}" + "}}}," 446 | + "\"dynarank\":{\"script_sort\":{\"lang\":\"dynarank_diversity_sort\",\"params\":{\"bucket_factory\":\"minhash\",\"diversity_fields\":[\"minhash_value\",\"category\"],\"diversity_thresholds\":[0.95,1]}},\"reorder_size\":20}" 447 | + "}"; 448 | runner.createIndex(index, Settings.builder().loadFromSource(indexSettings, XContentType.JSON).build()); 449 | runner.ensureYellow(index); 450 | 451 | // create a mapping 452 | final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder()// 453 | .startObject()// 454 | .startObject(type)// 455 | .startObject("properties")// 456 | 457 | // id 458 | .startObject("id")// 459 | .field("type", "keyword")// 460 | .endObject()// 461 | 462 | // msg 463 | .startObject("msg")// 464 | .field("type", "text")// 465 | .field("copy_to", "minhash_value")// 466 | .endObject()// 467 | 468 | // category 469 | .startObject("category")// 470 | .field("type", "keyword")// 471 | .endObject()// 472 | 473 | // order 474 | .startObject("order")// 475 | .field("type", "long")// 476 | .endObject()// 477 | 478 | // minhash 479 | .startObject("minhash_value")// 480 | .field("type", "minhash")// 481 | .field("store", true)// 482 | .field("minhash_analyzer", "minhash_analyzer")// 483 | .endObject()// 484 | 485 | .endObject()// 486 | .endObject()// 487 | .endObject(); 488 | runner.createMapping(index, mappingBuilder); 489 | } 490 | 491 | if (!runner.indexExists(index)) { 492 | fail(); 493 | } 494 | 495 | insertTestData(index, 1, "aaa bbb ccc", "cat1"); 496 | insertTestData(index, 2, "aaa bbb ccc", "cat1"); 497 | insertTestData(index, 3, "aaa bbb ccc", "cat2"); 498 | insertTestData(index, 4, "aaa bbb ddd", "cat1"); 499 | insertTestData(index, 5, "aaa bbb ddd", "cat2"); 500 | insertTestData(index, 6, "aaa bbb ddd", "cat2"); 501 | insertTestData(index, 7, "aaa bbb eee", "cat1"); 502 | insertTestData(index, 8, "aaa bbb eee", "cat1"); 503 | insertTestData(index, 9, "aaa bbb eee", "cat2"); 504 | insertTestData(index, 10, "aaa bbb fff", "cat1"); 505 | 506 | { 507 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 508 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).storedFields("_source", "minhash_value", "category") 509 | .setFrom(0).setSize(10).execute().actionGet(); 510 | final SearchHits searchHits = response.getHits(); 511 | assertEquals(10, searchHits.getTotalHits().value); 512 | final SearchHit[] hits = searchHits.getHits(); 513 | assertEquals("1", hits[0].getSourceAsMap().get("id")); 514 | assertEquals("5", hits[1].getSourceAsMap().get("id")); 515 | assertEquals("7", hits[2].getSourceAsMap().get("id")); 516 | assertEquals("10", hits[3].getSourceAsMap().get("id")); 517 | assertEquals("3", hits[4].getSourceAsMap().get("id")); 518 | assertEquals("4", hits[5].getSourceAsMap().get("id")); 519 | assertEquals("9", hits[6].getSourceAsMap().get("id")); 520 | assertEquals("2", hits[7].getSourceAsMap().get("id")); 521 | assertEquals("6", hits[8].getSourceAsMap().get("id")); 522 | assertEquals("8", hits[9].getSourceAsMap().get("id")); 523 | } 524 | 525 | // disable rerank 526 | { 527 | final SearchResponse response = runner.client().prepareSearch("_all").setQuery(QueryBuilders.matchAllQuery()) 528 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).setFrom(0).setSize(10).execute().actionGet(); 529 | final SearchHits searchHits = response.getHits(); 530 | assertEquals(10, searchHits.getTotalHits().value); 531 | final SearchHit[] hits = searchHits.getHits(); 532 | assertEquals("1", hits[0].getSourceAsMap().get("id")); 533 | assertEquals("2", hits[1].getSourceAsMap().get("id")); 534 | assertEquals("3", hits[2].getSourceAsMap().get("id")); 535 | assertEquals("4", hits[3].getSourceAsMap().get("id")); 536 | assertEquals("5", hits[4].getSourceAsMap().get("id")); 537 | assertEquals("6", hits[5].getSourceAsMap().get("id")); 538 | assertEquals("7", hits[6].getSourceAsMap().get("id")); 539 | assertEquals("8", hits[7].getSourceAsMap().get("id")); 540 | assertEquals("9", hits[8].getSourceAsMap().get("id")); 541 | assertEquals("10", hits[9].getSourceAsMap().get("id")); 542 | } 543 | } 544 | 545 | private void insertTestData(final String index, final int id, final String msg, final String category) { 546 | assertEquals(Result.CREATED, 547 | runner.insert(index, String.valueOf(id), 548 | "{\"id\":\"" + id + "\",\"msg\":\"" + msg + "\",\"category\":\"" + category + "\",\"order\":" + id + "}") 549 | .getResult()); 550 | 551 | } 552 | 553 | @Test 554 | public void diversitySort() throws Exception { 555 | 556 | final String index = "test_index"; 557 | final String type = "_doc"; 558 | 559 | { 560 | // create an index 561 | final String indexSettings = "{\"index\":{\"analysis\":{\"analyzer\":{" 562 | + "\"minhash_analyzer\":{\"type\":\"custom\",\"tokenizer\":\"standard\",\"filter\":[\"my_minhash\"]}" + "},\"filter\":{" 563 | + "\"my_minhash\":{\"type\":\"minhash\",\"seed\":1000}" + "}}}," 564 | + "\"dynarank\":{\"script_sort\":{\"lang\":\"dynarank_diversity_sort\",\"params\":{\"bucket_factory\":\"minhash\",\"diversity_fields\":[\"minhash_value\"],\"diversity_thresholds\":[0.95]}},\"reorder_size\":20}" 565 | + "}"; 566 | runner.createIndex(index, Settings.builder().loadFromSource(indexSettings, XContentType.JSON).build()); 567 | runner.ensureYellow(index); 568 | 569 | // create a mapping 570 | final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder()// 571 | .startObject()// 572 | .startObject(type)// 573 | .startObject("properties")// 574 | 575 | // id 576 | .startObject("id")// 577 | .field("type", "keyword")// 578 | .endObject()// 579 | 580 | // msg 581 | .startObject("msg")// 582 | .field("type", "text")// 583 | .field("copy_to", "minhash_value")// 584 | .endObject()// 585 | 586 | // order 587 | .startObject("order")// 588 | .field("type", "long")// 589 | .endObject()// 590 | 591 | // minhash 592 | .startObject("minhash_value")// 593 | .field("type", "minhash")// 594 | .field("store", true)// 595 | .field("minhash_analyzer", "minhash_analyzer")// 596 | .endObject()// 597 | 598 | .endObject()// 599 | .endObject()// 600 | .endObject(); 601 | runner.createMapping(index, mappingBuilder); 602 | } 603 | 604 | if (!runner.indexExists(index)) { 605 | fail(); 606 | } 607 | 608 | // create 1000 documents 609 | final StringBuilder[] texts = createTexts(); 610 | for (int i = 1; i <= 100; i++) { 611 | // System.out.println(texts[i - 1]); 612 | final IndexResponse indexResponse1 = runner.insert(index, String.valueOf(i), 613 | "{\"id\":\"" + i + "\",\"msg\":\"" + texts[i - 1].toString() + "\",\"order\":" + i + "}"); 614 | assertEquals(Result.CREATED, indexResponse1.getResult()); 615 | } 616 | runner.refresh(); 617 | 618 | { 619 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(QueryBuilders.idsQuery().addIds("0")) 620 | .storedFields("_source", "minhash_value").setFrom(20).setSize(10).execute().actionGet(); 621 | final SearchHits searchHits = response.getHits(); 622 | assertEquals(0, searchHits.getTotalHits().value); 623 | } 624 | 625 | { 626 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 627 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).storedFields("_source", "minhash_value").setFrom(0) 628 | .setSize(10).execute().actionGet(); 629 | final SearchHits searchHits = response.getHits(); 630 | assertEquals(100, searchHits.getTotalHits().value); 631 | final SearchHit[] hits = searchHits.getHits(); 632 | assertEquals("1", hits[0].getSourceAsMap().get("id")); 633 | assertEquals("7", hits[1].getSourceAsMap().get("id")); 634 | assertEquals("10", hits[2].getSourceAsMap().get("id")); 635 | assertEquals("13", hits[3].getSourceAsMap().get("id")); 636 | assertEquals("18", hits[4].getSourceAsMap().get("id")); 637 | assertEquals("2", hits[5].getSourceAsMap().get("id")); 638 | assertEquals("8", hits[6].getSourceAsMap().get("id")); 639 | assertEquals("11", hits[7].getSourceAsMap().get("id")); 640 | assertEquals("14", hits[8].getSourceAsMap().get("id")); 641 | assertEquals("19", hits[9].getSourceAsMap().get("id")); 642 | } 643 | 644 | { 645 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 646 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).storedFields("_source", "minhash_value").setFrom(10) 647 | .setSize(10).execute().actionGet(); 648 | final SearchHits searchHits = response.getHits(); 649 | assertEquals(100, searchHits.getTotalHits().value); 650 | final SearchHit[] hits = searchHits.getHits(); 651 | assertEquals("3", hits[0].getSourceAsMap().get("id")); 652 | assertEquals("9", hits[1].getSourceAsMap().get("id")); 653 | assertEquals("12", hits[2].getSourceAsMap().get("id")); 654 | assertEquals("15", hits[3].getSourceAsMap().get("id")); 655 | assertEquals("20", hits[4].getSourceAsMap().get("id")); 656 | assertEquals("4", hits[5].getSourceAsMap().get("id")); 657 | assertEquals("16", hits[6].getSourceAsMap().get("id")); 658 | assertEquals("5", hits[7].getSourceAsMap().get("id")); 659 | assertEquals("17", hits[8].getSourceAsMap().get("id")); 660 | assertEquals("6", hits[9].getSourceAsMap().get("id")); 661 | } 662 | 663 | { 664 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 665 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).storedFields("_source", "minhash_value").setFrom(20) 666 | .setSize(10).execute().actionGet(); 667 | final SearchHits searchHits = response.getHits(); 668 | assertEquals(100, searchHits.getTotalHits().value); 669 | final SearchHit[] hits = searchHits.getHits(); 670 | assertEquals("21", hits[0].getSourceAsMap().get("id")); 671 | assertEquals("22", hits[1].getSourceAsMap().get("id")); 672 | assertEquals("23", hits[2].getSourceAsMap().get("id")); 673 | assertEquals("24", hits[3].getSourceAsMap().get("id")); 674 | assertEquals("25", hits[4].getSourceAsMap().get("id")); 675 | assertEquals("26", hits[5].getSourceAsMap().get("id")); 676 | assertEquals("27", hits[6].getSourceAsMap().get("id")); 677 | assertEquals("28", hits[7].getSourceAsMap().get("id")); 678 | assertEquals("29", hits[8].getSourceAsMap().get("id")); 679 | assertEquals("30", hits[9].getSourceAsMap().get("id")); 680 | } 681 | 682 | final BoolQueryBuilder testQuery = QueryBuilders.boolQuery().should(QueryBuilders.rangeQuery("order").from(1).to(5)) 683 | .should(QueryBuilders.termQuery("order", 20)).should(QueryBuilders.termQuery("order", 30)) 684 | .should(QueryBuilders.termQuery("order", 40)).should(QueryBuilders.termQuery("order", 50)) 685 | .should(QueryBuilders.termQuery("order", 60)).should(QueryBuilders.termQuery("order", 70)) 686 | .should(QueryBuilders.rangeQuery("order").from(80).to(90)); 687 | { 688 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(testQuery) 689 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).storedFields("_source", "minhash_value").setFrom(0) 690 | .setSize(10).execute().actionGet(); 691 | final SearchHits searchHits = response.getHits(); 692 | assertEquals(22, searchHits.getTotalHits().value); 693 | final SearchHit[] hits = searchHits.getHits(); 694 | assertEquals("1", hits[0].getSourceAsMap().get("id")); 695 | assertEquals("20", hits[1].getSourceAsMap().get("id")); 696 | assertEquals("30", hits[2].getSourceAsMap().get("id")); 697 | assertEquals("40", hits[3].getSourceAsMap().get("id")); 698 | assertEquals("50", hits[4].getSourceAsMap().get("id")); 699 | assertEquals("60", hits[5].getSourceAsMap().get("id")); 700 | assertEquals("70", hits[6].getSourceAsMap().get("id")); 701 | assertEquals("82", hits[7].getSourceAsMap().get("id")); 702 | assertEquals("87", hits[8].getSourceAsMap().get("id")); 703 | assertEquals("2", hits[9].getSourceAsMap().get("id")); 704 | } 705 | 706 | { 707 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(testQuery) 708 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).storedFields("_source", "minhash_value").setFrom(10) 709 | .setSize(10).execute().actionGet(); 710 | final SearchHits searchHits = response.getHits(); 711 | assertEquals(22, searchHits.getTotalHits().value); 712 | final SearchHit[] hits = searchHits.getHits(); 713 | assertEquals("80", hits[0].getSourceAsMap().get("id")); 714 | assertEquals("83", hits[1].getSourceAsMap().get("id")); 715 | assertEquals("88", hits[2].getSourceAsMap().get("id")); 716 | assertEquals("3", hits[3].getSourceAsMap().get("id")); 717 | assertEquals("81", hits[4].getSourceAsMap().get("id")); 718 | assertEquals("84", hits[5].getSourceAsMap().get("id")); 719 | assertEquals("4", hits[6].getSourceAsMap().get("id")); 720 | assertEquals("85", hits[7].getSourceAsMap().get("id")); 721 | assertEquals("5", hits[8].getSourceAsMap().get("id")); 722 | assertEquals("86", hits[9].getSourceAsMap().get("id")); 723 | } 724 | 725 | { 726 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(testQuery) 727 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).storedFields("_source", "minhash_value").setFrom(20) 728 | .setSize(10).execute().actionGet(); 729 | final SearchHits searchHits = response.getHits(); 730 | assertEquals(22, searchHits.getTotalHits().value); 731 | final SearchHit[] hits = searchHits.getHits(); 732 | assertEquals("89", hits[0].getSourceAsMap().get("id")); 733 | assertEquals("90", hits[1].getSourceAsMap().get("id")); 734 | } 735 | 736 | for (int i = 0; i < 1000; i++) { 737 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 738 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).storedFields("_source", "minhash_value").setFrom(0) 739 | .setSize(10).execute().actionGet(); 740 | final SearchHits searchHits = response.getHits(); 741 | assertEquals(100, searchHits.getTotalHits().value); 742 | final SearchHit[] hits = searchHits.getHits(); 743 | assertEquals("1", hits[0].getSourceAsMap().get("id")); 744 | assertEquals("7", hits[1].getSourceAsMap().get("id")); 745 | assertEquals("10", hits[2].getSourceAsMap().get("id")); 746 | assertEquals("13", hits[3].getSourceAsMap().get("id")); 747 | assertEquals("18", hits[4].getSourceAsMap().get("id")); 748 | assertEquals("2", hits[5].getSourceAsMap().get("id")); 749 | assertEquals("8", hits[6].getSourceAsMap().get("id")); 750 | assertEquals("11", hits[7].getSourceAsMap().get("id")); 751 | assertEquals("14", hits[8].getSourceAsMap().get("id")); 752 | assertEquals("19", hits[9].getSourceAsMap().get("id")); 753 | } 754 | 755 | } 756 | 757 | // @Test 758 | // public void diversitySortWithShuffleMin() throws Exception { 759 | // diversitySortWithShuffle("min_bucket_threshold"); 760 | // } 761 | // 762 | // @Test 763 | // public void diversitySortWithShuffleMax() throws Exception { 764 | // diversitySortWithShuffle("max_bucket_threshold"); 765 | // } 766 | 767 | private void diversitySortWithShuffle(String name) throws Exception { 768 | final String index = "test_index"; 769 | final String type = "_doc"; 770 | 771 | { 772 | // create an index 773 | final String indexSettings = "{\"index\":{\"analysis\":{\"analyzer\":{" 774 | + "\"minhash_analyzer\":{\"type\":\"custom\",\"tokenizer\":\"standard\",\"filter\":[\"my_minhash\"]}" + "},\"filter\":{" 775 | + "\"my_minhash\":{\"type\":\"minhash\",\"seed\":1000}" + "}}}," 776 | + "\"dynarank\":{\"script_sort\":{\"lang\":\"dynarank_diversity_sort\",\"params\":{\"bucket_factory\":\"minhash\",\"diversity_fields\":[\"minhash_value\"],\"diversity_thresholds\":[0.95],\"" 777 | + name + "\":\"1\",\"shuffle_seed\":\"1\"}},\"reorder_size\":10}" + "}"; 778 | runner.createIndex(index, Settings.builder().loadFromSource(indexSettings, XContentType.JSON).build()); 779 | runner.ensureYellow(index); 780 | 781 | // create a mapping 782 | final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder()// 783 | .startObject()// 784 | .startObject(type)// 785 | .startObject("properties")// 786 | 787 | // id 788 | .startObject("id")// 789 | .field("type", "keyword")// 790 | .endObject()// 791 | 792 | // msg 793 | .startObject("msg")// 794 | .field("type", "text")// 795 | .field("copy_to", "minhash_value")// 796 | .endObject()// 797 | 798 | // order 799 | .startObject("order")// 800 | .field("type", "long")// 801 | .endObject()// 802 | 803 | // minhash 804 | .startObject("minhash_value")// 805 | .field("type", "minhash")// 806 | .field("store", true)// 807 | .field("minhash_analyzer", "minhash_analyzer")// 808 | .endObject()// 809 | 810 | .endObject()// 811 | .endObject()// 812 | .endObject(); 813 | runner.createMapping(index, mappingBuilder); 814 | } 815 | 816 | if (!runner.indexExists(index)) { 817 | fail(); 818 | } 819 | 820 | // create 200 documents 821 | final StringBuilder[] texts = createTexts(); 822 | for (int i = 1; i <= 200; i++) { 823 | final IndexResponse indexResponse1 = runner.insert(index, String.valueOf(i), 824 | "{\"id\":\"" + i + "\",\"msg\":\"" + texts[(i - 1) % 10].toString() + "\",\"order\":" + i + "}"); 825 | assertEquals(Result.CREATED, indexResponse1.getResult()); 826 | } 827 | runner.refresh(); 828 | 829 | { 830 | final SearchResponse response = runner.client().prepareSearch(index) 831 | .setQuery(QueryBuilders.termQuery("msg", "aaa0")).storedFields("_source", "minhash_value") 832 | .addSort("_score", SortOrder.DESC).addSort("order", SortOrder.ASC).setFrom(0).setSize(5).execute().actionGet(); 833 | final SearchHits searchHits = response.getHits(); 834 | assertEquals(20, searchHits.getTotalHits().value); 835 | final SearchHit[] hits = searchHits.getHits(); 836 | assertEquals("21", hits[0].getSourceAsMap().get("id")); 837 | assertEquals("161", hits[1].getSourceAsMap().get("id")); 838 | assertEquals("81", hits[2].getSourceAsMap().get("id")); 839 | assertEquals("51", hits[3].getSourceAsMap().get("id")); 840 | assertEquals("191", hits[4].getSourceAsMap().get("id")); 841 | } 842 | 843 | { 844 | final SearchResponse response = runner.client().prepareSearch(index) 845 | .setQuery(QueryBuilders.termQuery("msg", "aaa0")).storedFields("_source", "minhash_value") 846 | .addSort("_score", SortOrder.DESC).addSort("order", SortOrder.ASC).setFrom(5).setSize(5).execute().actionGet(); 847 | final SearchHits searchHits = response.getHits(); 848 | assertEquals(20, searchHits.getTotalHits().value); 849 | final SearchHit[] hits = searchHits.getHits(); 850 | assertEquals("111", hits[0].getSourceAsMap().get("id")); 851 | assertEquals("61", hits[1].getSourceAsMap().get("id")); 852 | assertEquals("151", hits[2].getSourceAsMap().get("id")); 853 | assertEquals("131", hits[3].getSourceAsMap().get("id")); 854 | assertEquals("71", hits[4].getSourceAsMap().get("id")); 855 | } 856 | 857 | { 858 | final SearchResponse response = runner.client().prepareSearch(index) 859 | .setQuery(QueryBuilders.termQuery("msg", "aaa0")).storedFields("_source", "minhash_value") 860 | .addSort("_score", SortOrder.DESC).addSort("order", SortOrder.ASC).setFrom(10).setSize(10).execute().actionGet(); 861 | final SearchHits searchHits = response.getHits(); 862 | assertEquals(20, searchHits.getTotalHits().value); 863 | final SearchHit[] hits = searchHits.getHits(); 864 | assertEquals("91", hits[0].getSourceAsMap().get("id")); 865 | assertEquals("141", hits[1].getSourceAsMap().get("id")); 866 | assertEquals("151", hits[2].getSourceAsMap().get("id")); 867 | assertEquals("171", hits[3].getSourceAsMap().get("id")); 868 | assertEquals("1", hits[4].getSourceAsMap().get("id")); 869 | assertEquals("71", hits[5].getSourceAsMap().get("id")); 870 | assertEquals("81", hits[6].getSourceAsMap().get("id")); 871 | assertEquals("111", hits[7].getSourceAsMap().get("id")); 872 | assertEquals("121", hits[8].getSourceAsMap().get("id")); 873 | assertEquals("181", hits[9].getSourceAsMap().get("id")); 874 | } 875 | } 876 | 877 | private StringBuilder[] createTexts() { 878 | final StringBuilder[] texts = new StringBuilder[100]; 879 | for (int i = 0; i < 100; i++) { 880 | texts[i] = new StringBuilder(); 881 | } 882 | for (int i = 0; i < 100; i++) { 883 | for (int j = 0; j < 100; j++) { 884 | if (i - j >= 0) { 885 | texts[j].append(" aaa" + i); 886 | } else { 887 | texts[j].append(" bbb" + i); 888 | } 889 | } 890 | } 891 | return texts; 892 | } 893 | 894 | @Test 895 | public void skipReorder() throws Exception { 896 | 897 | assertEquals(1, runner.getNodeSize()); 898 | final Client client = runner.client(); 899 | 900 | final String index = "sample"; 901 | runner.createIndex(index, 902 | Settings.builder().put(DynamicRanker.SETTING_INDEX_DYNARANK_REORDER_SIZE.getKey(), 100) 903 | .put(DynamicRanker.SETTING_INDEX_DYNARANK_SCRIPT.getKey(), 904 | "searchHits.sort {s1, s2 -> s2.getSourceAsMap().get('counter') - s1.getSourceAsMap().get('counter')} as org.elasticsearch.search.SearchHit[]") 905 | .put(DynamicRanker.SETTING_INDEX_DYNARANK_PARAMS.getKey() + "foo", "bar").build()); 906 | 907 | for (int i = 1; i <= 1000; i++) { 908 | final IndexResponse indexResponse1 = runner.insert(index, String.valueOf(i), 909 | "{\"id\":\"" + i + "\",\"msg\":\"test " + i + "\",\"counter\":" + i + "}"); 910 | assertEquals(Result.CREATED, indexResponse1.getResult()); 911 | } 912 | 913 | { 914 | final SearchResponse searchResponse = client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 915 | .addSort("counter", SortOrder.ASC).setScroll("1m").execute().actionGet(); 916 | final SearchHits hits = searchResponse.getHits(); 917 | assertEquals(1000, hits.getTotalHits().value); 918 | assertEquals(10, hits.getHits().length); 919 | assertEquals("1", hits.getHits()[0].getId()); 920 | assertEquals("10", hits.getHits()[9].getId()); 921 | } 922 | 923 | } 924 | 925 | @Test 926 | public void skipReorder_scrollSearch() throws Exception { 927 | 928 | assertEquals(1, runner.getNodeSize()); 929 | final Client client = runner.client(); 930 | 931 | final String index = "sample"; 932 | runner.createIndex(index, 933 | Settings.builder().put(DynamicRanker.SETTING_INDEX_DYNARANK_REORDER_SIZE.getKey(), 100) 934 | .put(DynamicRanker.SETTING_INDEX_DYNARANK_SCRIPT.getKey(), 935 | "Arrays.sort(hits, (s1,s2)-> s2.getSourceAsMap().get(\"counter\") - s1.getSourceAsMap().get(\"counter\"))") 936 | .put(DynamicRanker.SETTING_INDEX_DYNARANK_PARAMS.getKey() + "foo", "bar").build()); 937 | 938 | for (int i = 1; i <= 1000; i++) { 939 | final IndexResponse indexResponse1 = runner.insert(index, String.valueOf(i), 940 | "{\"id\":\"" + i + "\",\"msg\":\"test " + i + "\",\"counter\":" + i + "}"); 941 | assertEquals(Result.CREATED, indexResponse1.getResult()); 942 | } 943 | 944 | { 945 | final SearchResponse searchResponse = 946 | client.prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()).addSort("counter", SortOrder.ASC) 947 | //.putHeader("_rerank", false) 948 | .execute().actionGet(); 949 | final SearchHits hits = searchResponse.getHits(); 950 | assertEquals(1000, hits.getTotalHits().value); 951 | assertEquals(10, hits.getHits().length); 952 | assertEquals("1", hits.getHits()[0].getId()); 953 | assertEquals("10", hits.getHits()[9].getId()); 954 | } 955 | 956 | } 957 | 958 | @Test 959 | public void reorder_with_ignored() throws Exception { 960 | final String index = "test_index"; 961 | final String type = "_doc"; 962 | 963 | { 964 | // create an index 965 | final String indexSettings = "{\"index\":{\"analysis\":{\"analyzer\":{" 966 | + "\"minhash_analyzer\":{\"type\":\"custom\",\"tokenizer\":\"standard\",\"filter\":[\"my_minhash\"]}" + "},\"filter\":{" 967 | + "\"my_minhash\":{\"type\":\"minhash\",\"seed\":1000}" + "}}}," 968 | + "\"dynarank\":{\"script_sort\":{\"lang\":\"dynarank_diversity_sort\",\"params\":{\"bucket_factory\":\"minhash\",\"diversity_fields\":[\"minhash_value\",\"category\"],\"diversity_thresholds\":[0.95,1],\"category_ignored_objects\":[\"category1\"]}},\"reorder_size\":20}" 969 | + "}"; 970 | runner.createIndex(index, Settings.builder().loadFromSource(indexSettings, XContentType.JSON).build()); 971 | runner.ensureYellow(index); 972 | 973 | // create a mapping 974 | final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder()// 975 | .startObject()// 976 | .startObject(type)// 977 | .startObject("properties")// 978 | 979 | // id 980 | .startObject("id")// 981 | .field("type", "keyword")// 982 | .endObject()// 983 | 984 | // msg 985 | .startObject("msg")// 986 | .field("type", "text")// 987 | .field("copy_to", "minhash_value")// 988 | .endObject()// 989 | 990 | // category 991 | .startObject("category")// 992 | .field("type", "keyword")// 993 | .endObject()// 994 | 995 | // order 996 | .startObject("order")// 997 | .field("type", "long")// 998 | .endObject()// 999 | 1000 | // minhash 1001 | .startObject("minhash_value")// 1002 | .field("type", "minhash")// 1003 | .field("store", true)// 1004 | .field("minhash_analyzer", "minhash_analyzer")// 1005 | .endObject()// 1006 | 1007 | .endObject()// 1008 | .endObject()// 1009 | .endObject(); 1010 | runner.createMapping(index, mappingBuilder); 1011 | } 1012 | 1013 | if (!runner.indexExists(index)) { 1014 | fail(); 1015 | } 1016 | 1017 | // create 1000 documents 1018 | final StringBuilder[] texts = createTexts(); 1019 | for (int i = 1; i <= 100; i++) { 1020 | // System.out.println(texts[i - 1]); 1021 | final IndexResponse indexResponse1 = runner.insert(index, String.valueOf(i), "{\"id\":\"" + i + "\",\"msg\":\"" 1022 | + texts[i - 1].toString() + "\",\"category\":\"category" + (i % 2) + "\",\"order\":" + i + "}"); 1023 | assertEquals(Result.CREATED, indexResponse1.getResult()); 1024 | } 1025 | runner.refresh(); 1026 | 1027 | { 1028 | final SearchResponse response = runner.client().prepareSearch(index).setQuery(QueryBuilders.matchAllQuery()) 1029 | .addSort(SortBuilders.fieldSort("order").order(SortOrder.ASC)).storedFields("_source", "minhash_value", "category") 1030 | .setFrom(0).setSize(10).execute().actionGet(); 1031 | final SearchHits searchHits = response.getHits(); 1032 | assertEquals(100, searchHits.getTotalHits().value); 1033 | final SearchHit[] hits = searchHits.getHits(); 1034 | assertEquals("1", hits[0].getSourceAsMap().get("id")); 1035 | assertEquals("7", hits[1].getSourceAsMap().get("id")); 1036 | assertEquals("11", hits[2].getSourceAsMap().get("id")); 1037 | assertEquals("17", hits[3].getSourceAsMap().get("id")); 1038 | assertEquals("2", hits[4].getSourceAsMap().get("id")); 1039 | assertEquals("9", hits[5].getSourceAsMap().get("id")); 1040 | assertEquals("13", hits[6].getSourceAsMap().get("id")); 1041 | assertEquals("19", hits[7].getSourceAsMap().get("id")); 1042 | assertEquals("3", hits[8].getSourceAsMap().get("id")); 1043 | assertEquals("8", hits[9].getSourceAsMap().get("id")); 1044 | } 1045 | 1046 | } 1047 | 1048 | @Test 1049 | public void keepTop5() throws Exception { 1050 | final String index = "test_index"; 1051 | final String type = "_doc"; 1052 | 1053 | { 1054 | // create an index 1055 | final String indexSettings = "{\"index\":{\"analysis\":{\"analyzer\":{" 1056 | + "\"minhash_analyzer\":{\"type\":\"custom\",\"tokenizer\":\"standard\",\"filter\":[\"my_minhash\"]}" 1057 | + "},\"filter\":{" 1058 | + "\"my_minhash\":{\"type\":\"minhash\",\"seed\":1000}" 1059 | + "}}}," 1060 | + "\"dynarank\":{\"script_sort\":{\"lang\":\"dynarank_diversity_sort\",\"params\":{\"bucket_factory\":\"minhash\",\"diversity_fields\":[\"minhash_value\",\"category\"],\"diversity_thresholds\":[0.95,1],\"category_ignored_objects\":[\"category1\"]}},\"reorder_size\":20,\"keep_topn\":5}" 1061 | + "}"; 1062 | runner.createIndex(index, Settings.builder() 1063 | .loadFromSource(indexSettings, XContentType.JSON).build()); 1064 | runner.ensureYellow(index); 1065 | 1066 | // create a mapping 1067 | final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder()// 1068 | .startObject()// 1069 | .startObject(type)// 1070 | .startObject("properties")// 1071 | 1072 | // id 1073 | .startObject("id")// 1074 | .field("type", "keyword")// 1075 | .endObject()// 1076 | 1077 | // msg 1078 | .startObject("msg")// 1079 | .field("type", "text")// 1080 | .field("copy_to", "minhash_value")// 1081 | .endObject()// 1082 | 1083 | // category 1084 | .startObject("category")// 1085 | .field("type", "keyword")// 1086 | .endObject()// 1087 | 1088 | // order 1089 | .startObject("order")// 1090 | .field("type", "long")// 1091 | .endObject()// 1092 | 1093 | // minhash 1094 | .startObject("minhash_value")// 1095 | .field("type", "minhash")// 1096 | .field("store", true)// 1097 | .field("minhash_analyzer", "minhash_analyzer")// 1098 | .endObject()// 1099 | 1100 | .endObject()// 1101 | .endObject()// 1102 | .endObject(); 1103 | runner.createMapping(index, mappingBuilder); 1104 | } 1105 | 1106 | if (!runner.indexExists(index)) { 1107 | fail(); 1108 | } 1109 | 1110 | // create 1000 documents 1111 | final StringBuilder[] texts = createTexts(); 1112 | for (int i = 1; i <= 100; i++) { 1113 | // System.out.println(texts[i - 1]); 1114 | final IndexResponse indexResponse1 = runner.insert(index, 1115 | String.valueOf(i), 1116 | "{\"id\":\"" + i + "\",\"msg\":\"" + texts[i - 1].toString() 1117 | + "\",\"category\":\"category" + (i % 2) 1118 | + "\",\"order\":" + i + "}"); 1119 | assertEquals(Result.CREATED, indexResponse1.getResult()); 1120 | } 1121 | runner.refresh(); 1122 | 1123 | { 1124 | final SearchResponse response = runner.client().prepareSearch(index) 1125 | .setQuery(QueryBuilders.matchAllQuery()) 1126 | .addSort(SortBuilders.fieldSort("order") 1127 | .order(SortOrder.ASC)) 1128 | .storedFields("_source", "minhash_value", "category") 1129 | .setFrom(0).setSize(10).execute().actionGet(); 1130 | final SearchHits searchHits = response.getHits(); 1131 | assertEquals(100, searchHits.getTotalHits().value); 1132 | final SearchHit[] hits = searchHits.getHits(); 1133 | assertEquals("1", hits[0].getSourceAsMap().get("id")); 1134 | assertEquals("2", hits[1].getSourceAsMap().get("id")); 1135 | assertEquals("3", hits[2].getSourceAsMap().get("id")); 1136 | assertEquals("4", hits[3].getSourceAsMap().get("id")); 1137 | assertEquals("5", hits[4].getSourceAsMap().get("id")); 1138 | assertEquals("6", hits[5].getSourceAsMap().get("id")); 1139 | assertEquals("9", hits[6].getSourceAsMap().get("id")); 1140 | assertEquals("13", hits[7].getSourceAsMap().get("id")); 1141 | assertEquals("19", hits[8].getSourceAsMap().get("id")); 1142 | assertEquals("7", hits[9].getSourceAsMap().get("id")); 1143 | } 1144 | 1145 | { 1146 | final SearchResponse response = runner.client().prepareSearch(index) 1147 | .setQuery(QueryBuilders.termQuery("id", "1")) 1148 | .addSort(SortBuilders.fieldSort("order") 1149 | .order(SortOrder.ASC)) 1150 | .storedFields("_source", "minhash_value", "category") 1151 | .setFrom(0).setSize(10).execute().actionGet(); 1152 | final SearchHits searchHits = response.getHits(); 1153 | assertEquals(1, searchHits.getTotalHits().value); 1154 | final SearchHit[] hits = searchHits.getHits(); 1155 | assertEquals("1", hits[0].getSourceAsMap().get("id")); 1156 | } 1157 | } 1158 | } 1159 | --------------------------------------------------------------------------------