├── .env ├── .github └── workflows │ └── build.yml ├── .gitignore ├── LICENSE ├── README.md ├── build.gradle ├── build.sh ├── docker-compose.yml ├── docker └── Dockerfile ├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── prepare-version.sh └── src ├── main └── java │ └── org │ └── opendatasoft │ └── elasticsearch │ ├── plugin │ └── PathHierarchyAggregation.java │ └── search │ └── aggregations │ └── bucket │ ├── DateHierarchyAggregationBuilder.java │ ├── DateHierarchyAggregationSupplier.java │ ├── DateHierarchyAggregator.java │ ├── DateHierarchyAggregatorFactory.java │ ├── InternalDateHierarchy.java │ ├── InternalPathHierarchy.java │ ├── PathHierarchyAggregationBuilder.java │ ├── PathHierarchyAggregationSupplier.java │ ├── PathHierarchyAggregator.java │ ├── PathHierarchyAggregatorFactory.java │ └── PathSortedTree.java ├── test └── java │ └── org │ └── opendatasoft │ └── elasticsearch │ └── PathHierarchyTests.java └── yamlRestTest ├── java └── org │ └── opendatasoft │ └── elasticsearch │ └── RestApiYamlIT.java └── resources └── rest-api-spec └── test └── PathHierarchy ├── 10_basic.yml ├── 20_path_hierarchy.yml ├── 30_date_hierarchy.yml ├── 40_max_buckets_path_hierarchy.yml ├── 50_max_buckets_date_hierarchy.yml └── 60_path_hierarchy_multi_buckets.yml /.env: -------------------------------------------------------------------------------- 1 | ES_VERSION=7.17.28 2 | PLUGIN_VERSION=7.17.28.0 3 | JAVA_COMPILER_VERSION=17 4 | GRADLE_VERSION=8.10.2 5 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | pull_request: 5 | 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout sources 11 | uses: actions/checkout@v4 12 | - name: Setup Java 13 | uses: actions/setup-java@v4 14 | with: 15 | distribution: 'temurin' 16 | java-version: 17 17 | - name: Setup Gradle 18 | uses: gradle/actions/setup-gradle@v4 19 | - name: Build with Gradle 20 | run: ./gradlew build 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .gradle/ 3 | .idea/ 4 | build/ 5 | *.iml 6 | *.log 7 | .vscode/ 8 | 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Elasticsearch Aggregation Path Hierarchy Plugin 2 | ========================================= 3 | 4 | This plugin adds the possibility to create hierarchical aggregations. 5 | Each term is split on a provided separator (default "/") then aggregated by level. 6 | For a complete example see https://github.com/elastic/elasticsearch/issues/8896 7 | 8 | Two different aggregations are available: 9 | - `path_hierarchy` for hierarchical aggregations on `keywords` field or `scripts` 10 | - `date_hierarchy` for hierachical aggregations on `date` fields. It is more optimized to use this aggregation for date instead of a script. 11 | 12 | This is a multi bucket aggregation. 13 | 14 | 15 | Installation 16 | ------------ 17 | 18 | To install it, launch this command in Elasticsearch directory replacing the url with a release suiting your case (please check available releases [here](https://github.com/opendatasoft/elasticsearch-aggregation-pathhierarchy/releases)): 19 | `./bin/elasticsearch-plugin install https://github.com/opendatasoft/elasticsearch-aggregation-pathhierarchy/releases/download/v7.17.28.0/pathhierarchy-aggregation-7.17.28.0.zip` 20 | 21 | Build 22 | ----- 23 | 24 | Built with Java 17 and Gradle 8.10.2. 25 | 26 | The first 3 digits of plugin version is Elasticsearch versioning. The last digit is used for plugin versioning under an elasticsearch version. 27 | 28 | 29 | Upgrade the plugin 30 | ------------ 31 | 32 | In order to upgrade the plugin, one can simply run for example `./prepare-version.sh 7.17.28`. It will: 33 | - fetch dependencies versions form Elasticsearch internals 34 | - upgrade the configuration files 35 | 36 | You can then run `./build.sh` that will build the plugin in a docker container using gradle. 37 | If successful, the plugin will be available in `./build/distributions/` (under the name `pathhierarchy-aggregation-7.17.28.0.zip` in this example). 38 | 39 | 40 | 41 | Development Environment Setup 42 | ------------ 43 | 44 | Build the plugin using gradle: 45 | 46 | `./gradlew build` 47 | 48 | OR 49 | 50 | `./gradlew assemble` (to avoid tests) 51 | 52 | In case you have to upgrade Gradle, you can do it with `./gradlew wrapper --gradler-version x.y.z`. 53 | 54 | Then the following command will start a dockerized ES and will install the previously built plugin: 55 | 56 | ``` 57 | docker compose up 58 | ``` 59 | 60 | Check the Elasticsearch instance at `localhost:9200` and the plugin version with `localhost:9200/_cat/plugins`. 61 | 62 | 63 | > NOTE: In `docker-compose.yml` you can uncomment the debug env and attach a REMOTE JVM on `*:5005` to debug the plugin. 64 | 65 | Path hierarchy aggregation 66 | -------------------------- 67 | 68 | ### Parameters 69 | 70 | - `field` or `script` : field to aggregate on 71 | - `separator` : separator for path hierarchy (default to "/") 72 | - `order` : order parameter to define how to sort result. Allowed parameters are `_key`, `_count` or sub aggregation name. Default to {"_count": "desc}. 73 | - `size`: size parameter to define how many buckets should be returned. Default to 10. 74 | - `shard_size`: how many buckets returned by each shards. Set to size if smaller, default to size if the search request needs to go to a single shard, and (size * 1.5 + 10) otherwise (more information here: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_shard_size_3). 75 | - `min_depth`: Set minimum depth level. Default to 0. 76 | - `max_depth`: Set maximum depth level. `-1` means no limit. Default to 3. 77 | - `depth`: Retrieve values for specified depth. Shortcut, instead of setting `min_depth` and `max_depth` parameters to the same value. 78 | - `keep_blank_path`: Keep blank path as bucket. if this option is set to false, chained separator will be ignored. Default to false. 79 | - `min_doc_count`: Return buckets containing at least `min_doc_count` document. Default to 0 80 | 81 | 82 | Examples 83 | ------- 84 | 85 | #### String field 86 | 87 | ``` 88 | # Add data: 89 | PUT filesystem 90 | { 91 | "mappings": { 92 | "properties": { 93 | "path": { 94 | "type": "keyword" 95 | } 96 | } 97 | } 98 | } 99 | POST filesystem/_bulk?refresh 100 | {"index":{}} 101 | {"path":"/My documents/Spreadsheets/Budget_2013.xls","views":10} 102 | {"index":{}} 103 | {"path":"/My documents/Spreadsheets/Budget_2014.xls","views":7} 104 | {"index":{}} 105 | {"path":"/My documents/Test.txt","views":1} 106 | 107 | 108 | # Path hierarchy request : 109 | GET /filesystem/_search?size=0 110 | { 111 | "aggs": { 112 | "tree": { 113 | "path_hierarchy": { 114 | "field": "path", 115 | "separator": "/" 116 | }, 117 | "aggs": { 118 | "total_views": { 119 | "sum": { 120 | "field": "views" 121 | } 122 | } 123 | } 124 | } 125 | } 126 | } 127 | 128 | 129 | Result : 130 | {"aggregations": { 131 | "tree": { 132 | "sum_other_doc_count": 0, 133 | "buckets": [ 134 | { 135 | "key": "My documents", 136 | "doc_count": 3, 137 | "total_views": { 138 | "value": 18 139 | }, 140 | "tree": { 141 | "buckets": [ 142 | { 143 | "key": "Spreadsheets", 144 | "doc_count": 2, 145 | "total_views": { 146 | "value": 17 147 | }, 148 | "tree": { 149 | "buckets": [ 150 | { 151 | "key": "Budget_2013.xls", 152 | "doc_count": 1, 153 | "total_views": { 154 | "value": 10 155 | } 156 | }, 157 | { 158 | "key": "Budget_2014.xls", 159 | "doc_count": 1, 160 | "total_views": { 161 | "value": 7 162 | } 163 | } 164 | ] 165 | } 166 | }, 167 | { 168 | "key": "Test.txt", 169 | "doc_count": 1, 170 | "total_views": { 171 | "value": 1 172 | } 173 | } 174 | ] 175 | } 176 | } 177 | ] 178 | } 179 | } 180 | ``` 181 | 182 | #### Script 183 | 184 | ``` 185 | PUT calendar 186 | { 187 | "mappings": { 188 | "properties": { 189 | "date": { 190 | "type": "date" 191 | } 192 | } 193 | } 194 | } 195 | 196 | POST calendar/_bulk?refresh 197 | {"index":{}} 198 | {"date": "2012-01-10T02:47:28"} 199 | {"index":{}} 200 | {"date": "2012-01-05T01:43:35"} 201 | {"index":{}} 202 | {"date": "2012-05-01T12:24:19"} 203 | 204 | GET /calendar/_search?size=0 205 | { 206 | "aggs": { 207 | "tree": { 208 | "path_hierarchy": { 209 | "script": "doc['date'].value.toOffsetDateTime().format(DateTimeFormatter.ofPattern('yyyy/MM/dd'))", 210 | "order": { 211 | "_key": "asc" 212 | } 213 | } 214 | } 215 | } 216 | } 217 | 218 | 219 | Result : 220 | 221 | {"aggregations": { 222 | "tree": { 223 | "buckets": [ 224 | { 225 | "key": "2012", 226 | "doc_count": 3, 227 | "tree": { 228 | "buckets": [ 229 | { 230 | "key": "01", 231 | "doc_count": 2, 232 | "tree": { 233 | "buckets": [ 234 | { 235 | "key": "05", 236 | "doc_count": 1 237 | }, 238 | { 239 | "key": "10", 240 | "doc_count": 1 241 | } 242 | ] 243 | } 244 | }, 245 | { 246 | "key": "05", 247 | "doc_count": 1, 248 | "tree": { 249 | "buckets": [ 250 | { 251 | "key": "01", 252 | "doc_count": 1 253 | } 254 | ] 255 | } 256 | } 257 | ] 258 | } 259 | } 260 | ] 261 | } 262 | } 263 | } 264 | 265 | ``` 266 | 267 | Date hierarchy 268 | -------------- 269 | 270 | ### Parameters 271 | 272 | - `field` : field to aggregate on. This parameter is mandatory 273 | - `interval`: date interval used to create the hierarchy. Allowed values are: `years`, `months`, `days`, `hours`, `minutes`, `seconds` Default to `years`. 274 | - `order` : order parameter to define how to sort result. Allowed parameters are `_key`, `_count` or sub aggregation name. Default to {"_count": "desc}. 275 | - `size`: size parameter to define how many buckets should be returned. Default to 10. 276 | - `shard_size`: how many buckets returned by each shards. Set to size if smaller, default to size if the search request needs to go to a single shard, and (size * 1.5 + 10) otherwise (more information here: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_shard_size_3). 277 | - `min_doc_count`: Return buckets containing at least `min_doc_count` document. Default to 0 278 | 279 | 280 | Example 281 | ------- 282 | 283 | ``` 284 | 285 | PUT calendar 286 | { 287 | "mappings": { 288 | "properties": { 289 | "date": { 290 | "type": "date" 291 | } 292 | } 293 | } 294 | } 295 | 296 | PUT /calendar/_doc/1 297 | { 298 | "date": "2012-01-10T02:47:28" 299 | } 300 | PUT /calendar/_doc/2 301 | { 302 | "date": "2012-01-05T01:43:35" 303 | } 304 | PUT /calendar/_doc/3 305 | { 306 | "date": "2012-05-01T12:24:19" 307 | } 308 | 309 | GET /calendar/_search?size=0 310 | { 311 | "aggs": { 312 | "tree": { 313 | "date_hierarchy": { 314 | "field": "date", 315 | "interval": "days", 316 | "order": { 317 | "_key": "asc" 318 | } 319 | } 320 | } 321 | } 322 | } 323 | 324 | ``` 325 | 326 | 327 | License 328 | ------- 329 | 330 | This software is under The MIT License (MIT). 331 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | repositories { 3 | mavenLocal() 4 | mavenCentral() 5 | } 6 | 7 | dependencies { 8 | classpath "org.elasticsearch.gradle:build-tools:${es_version}" 9 | } 10 | } 11 | 12 | repositories { 13 | mavenLocal() 14 | mavenCentral() 15 | } 16 | 17 | group = 'org.elasticsearch.plugin' 18 | version = "${plugin_version}" 19 | 20 | def versions = org.elasticsearch.gradle.VersionProperties.versions 21 | 22 | apply plugin: 'java' 23 | apply plugin: 'idea' 24 | apply plugin: 'elasticsearch.esplugin' 25 | apply plugin: 'elasticsearch.yaml-rest-test' 26 | 27 | 28 | esplugin { 29 | name 'pathhierarchy-aggregation' 30 | description 'Return a path hierarchy aggregation' 31 | classname 'org.opendatasoft.elasticsearch.plugin.PathHierarchyAggregation' 32 | licenseFile = rootProject.file('LICENSE') 33 | noticeFile = rootProject.file('README.md') 34 | } 35 | 36 | 37 | dependencies { 38 | implementation "org.elasticsearch:elasticsearch:${es_version}" 39 | yamlRestTestImplementation "org.elasticsearch.test:framework:${es_version}" 40 | yamlRestTestImplementation "org.apache.logging.log4j:log4j-core:${versions.log4j}" 41 | } 42 | 43 | tasks.named("yamlRestTest").configure { 44 | systemProperty 'tests.security.manager', 'false' 45 | } 46 | 47 | tasks.named("test").configure { 48 | systemProperty 'tests.security.manager', 'false' 49 | } 50 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # helper script that runs the build script inside a gradle based docker container 4 | 5 | source .env 6 | 7 | echo "GRADLE_VERSION ${GRADLE_VERSION}" 8 | 9 | docker run --rm \ 10 | -v .:/opt/gen \ 11 | -w /opt/gen \ 12 | -u gradle \ 13 | gradle:${GRADLE_VERSION} ./gradlew build 14 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | networks: 2 | es-network: 3 | 4 | services: 5 | elasticsearch-plugin-debug: 6 | build: 7 | context: . 8 | dockerfile: docker/Dockerfile 9 | target: elasticsearch-plugin-debug 10 | args: 11 | ES_VERSION: ${ES_VERSION} 12 | PLUGIN_VERSION: ${PLUGIN_VERSION} 13 | PLUGIN_FILENAME: pathhierarchy-aggregation-${PLUGIN_VERSION}.zip 14 | environment: 15 | - discovery.type=single-node 16 | # NO DEBUG 17 | - ES_JAVA_OPTS=-Xms512m -Xmx512m 18 | # DEBUG 19 | # - ES_JAVA_OPTS=-Xms512m -Xmx512m -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005 20 | ports: 21 | - "9200:9200" 22 | # DEBUG 23 | # - "5005:5005" 24 | networks: 25 | - es-network 26 | 27 | kibana: 28 | image: docker.elastic.co/kibana/kibana:7.17.24 29 | environment: 30 | ELASTICSEARCH_HOSTS: http://elasticsearch-plugin-debug:9200/ 31 | ports: 32 | - "5602:5601" 33 | networks: 34 | - es-network 35 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG ES_VERSION 2 | ARG PLUGIN_FILENAME 3 | 4 | FROM docker.elastic.co/elasticsearch/elasticsearch:${ES_VERSION} AS elasticsearch-plugin-debug 5 | 6 | COPY build/distributions/${PLUGIN_FILENAME} /tmp/${PLUGIN_FILENAME} 7 | 8 | # mandatory because docker ARG is scoped to stage (forgotten after the FROM scope above...!) 9 | ARG PLUGIN_FILENAME 10 | # madantory because ARGS cannot be used inside RUN shell 11 | ENV PLUGIN_FILENAME=${PLUGIN_FILENAME} 12 | RUN ./bin/elasticsearch-plugin install --batch file:/tmp/${PLUGIN_FILENAME} 13 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | es_version = 7.17.28 2 | plugin_version = 7.17.28.0 3 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opendatasoft/elasticsearch-aggregation-pathhierarchy/06fc00f47cb146b319754f62f9010012e3073f56/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # SPDX-License-Identifier: Apache-2.0 19 | # 20 | 21 | ############################################################################## 22 | # 23 | # Gradle start up script for POSIX generated by Gradle. 24 | # 25 | # Important for running: 26 | # 27 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 28 | # noncompliant, but you have some other compliant shell such as ksh or 29 | # bash, then to run this script, type that shell name before the whole 30 | # command line, like: 31 | # 32 | # ksh Gradle 33 | # 34 | # Busybox and similar reduced shells will NOT work, because this script 35 | # requires all of these POSIX shell features: 36 | # * functions; 37 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 38 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 39 | # * compound commands having a testable exit status, especially «case»; 40 | # * various built-in commands including «command», «set», and «ulimit». 41 | # 42 | # Important for patching: 43 | # 44 | # (2) This script targets any POSIX shell, so it avoids extensions provided 45 | # by Bash, Ksh, etc; in particular arrays are avoided. 46 | # 47 | # The "traditional" practice of packing multiple parameters into a 48 | # space-separated string is a well documented source of bugs and security 49 | # problems, so this is (mostly) avoided, by progressively accumulating 50 | # options in "$@", and eventually passing that to Java. 51 | # 52 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 53 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 54 | # see the in-line comments for details. 55 | # 56 | # There are tweaks for specific operating systems such as AIX, CygWin, 57 | # Darwin, MinGW, and NonStop. 58 | # 59 | # (3) This script is generated from the Groovy template 60 | # https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 61 | # within the Gradle project. 62 | # 63 | # You can find Gradle at https://github.com/gradle/gradle/. 64 | # 65 | ############################################################################## 66 | 67 | # Attempt to set APP_HOME 68 | 69 | # Resolve links: $0 may be a link 70 | app_path=$0 71 | 72 | # Need this for daisy-chained symlinks. 73 | while 74 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 75 | [ -h "$app_path" ] 76 | do 77 | ls=$( ls -ld "$app_path" ) 78 | link=${ls#*' -> '} 79 | case $link in #( 80 | /*) app_path=$link ;; #( 81 | *) app_path=$APP_HOME$link ;; 82 | esac 83 | done 84 | 85 | # This is normally unused 86 | # shellcheck disable=SC2034 87 | APP_BASE_NAME=${0##*/} 88 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) 89 | APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s 90 | ' "$PWD" ) || exit 91 | 92 | # Use the maximum available, or set MAX_FD != -1 to use that value. 93 | MAX_FD=maximum 94 | 95 | warn () { 96 | echo "$*" 97 | } >&2 98 | 99 | die () { 100 | echo 101 | echo "$*" 102 | echo 103 | exit 1 104 | } >&2 105 | 106 | # OS specific support (must be 'true' or 'false'). 107 | cygwin=false 108 | msys=false 109 | darwin=false 110 | nonstop=false 111 | case "$( uname )" in #( 112 | CYGWIN* ) cygwin=true ;; #( 113 | Darwin* ) darwin=true ;; #( 114 | MSYS* | MINGW* ) msys=true ;; #( 115 | NONSTOP* ) nonstop=true ;; 116 | esac 117 | 118 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 119 | 120 | 121 | # Determine the Java command to use to start the JVM. 122 | if [ -n "$JAVA_HOME" ] ; then 123 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 124 | # IBM's JDK on AIX uses strange locations for the executables 125 | JAVACMD=$JAVA_HOME/jre/sh/java 126 | else 127 | JAVACMD=$JAVA_HOME/bin/java 128 | fi 129 | if [ ! -x "$JAVACMD" ] ; then 130 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 131 | 132 | Please set the JAVA_HOME variable in your environment to match the 133 | location of your Java installation." 134 | fi 135 | else 136 | JAVACMD=java 137 | if ! command -v java >/dev/null 2>&1 138 | then 139 | die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 140 | 141 | Please set the JAVA_HOME variable in your environment to match the 142 | location of your Java installation." 143 | fi 144 | fi 145 | 146 | # Increase the maximum file descriptors if we can. 147 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 148 | case $MAX_FD in #( 149 | max*) 150 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. 151 | # shellcheck disable=SC2039,SC3045 152 | MAX_FD=$( ulimit -H -n ) || 153 | warn "Could not query maximum file descriptor limit" 154 | esac 155 | case $MAX_FD in #( 156 | '' | soft) :;; #( 157 | *) 158 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. 159 | # shellcheck disable=SC2039,SC3045 160 | ulimit -n "$MAX_FD" || 161 | warn "Could not set maximum file descriptor limit to $MAX_FD" 162 | esac 163 | fi 164 | 165 | # Collect all arguments for the java command, stacking in reverse order: 166 | # * args from the command line 167 | # * the main class name 168 | # * -classpath 169 | # * -D...appname settings 170 | # * --module-path (only if needed) 171 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 172 | 173 | # For Cygwin or MSYS, switch paths to Windows format before running java 174 | if "$cygwin" || "$msys" ; then 175 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 176 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 177 | 178 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 179 | 180 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 181 | for arg do 182 | if 183 | case $arg in #( 184 | -*) false ;; # don't mess with options #( 185 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 186 | [ -e "$t" ] ;; #( 187 | *) false ;; 188 | esac 189 | then 190 | arg=$( cygpath --path --ignore --mixed "$arg" ) 191 | fi 192 | # Roll the args list around exactly as many times as the number of 193 | # args, so each arg winds up back in the position where it started, but 194 | # possibly modified. 195 | # 196 | # NB: a `for` loop captures its iteration list before it begins, so 197 | # changing the positional parameters here affects neither the number of 198 | # iterations, nor the values presented in `arg`. 199 | shift # remove old arg 200 | set -- "$@" "$arg" # push replacement arg 201 | done 202 | fi 203 | 204 | 205 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 206 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 207 | 208 | # Collect all arguments for the java command: 209 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, 210 | # and any embedded shellness will be escaped. 211 | # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be 212 | # treated as '${Hostname}' itself on the command line. 213 | 214 | set -- \ 215 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 216 | -classpath "$CLASSPATH" \ 217 | org.gradle.wrapper.GradleWrapperMain \ 218 | "$@" 219 | 220 | # Stop when "xargs" is not available. 221 | if ! command -v xargs >/dev/null 2>&1 222 | then 223 | die "xargs is not available" 224 | fi 225 | 226 | # Use "xargs" to parse quoted args. 227 | # 228 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 229 | # 230 | # In Bash we could simply go: 231 | # 232 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 233 | # set -- "${ARGS[@]}" "$@" 234 | # 235 | # but POSIX shell has neither arrays nor command substitution, so instead we 236 | # post-process each arg (as a line of input to sed) to backslash-escape any 237 | # character that might be a shell metacharacter, then use eval to reverse 238 | # that process (while maintaining the separation between arguments), and wrap 239 | # the whole thing up as a single "set" statement. 240 | # 241 | # This will of course break if any of these variables contains a newline or 242 | # an unmatched quote. 243 | # 244 | 245 | eval "set -- $( 246 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 247 | xargs -n1 | 248 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 249 | tr '\n' ' ' 250 | )" '"$@"' 251 | 252 | exec "$JAVACMD" "$@" 253 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | @rem SPDX-License-Identifier: Apache-2.0 17 | @rem 18 | 19 | @if "%DEBUG%"=="" @echo off 20 | @rem ########################################################################## 21 | @rem 22 | @rem Gradle startup script for Windows 23 | @rem 24 | @rem ########################################################################## 25 | 26 | @rem Set local scope for the variables with windows NT shell 27 | if "%OS%"=="Windows_NT" setlocal 28 | 29 | set DIRNAME=%~dp0 30 | if "%DIRNAME%"=="" set DIRNAME=. 31 | @rem This is normally unused 32 | set APP_BASE_NAME=%~n0 33 | set APP_HOME=%DIRNAME% 34 | 35 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 36 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 37 | 38 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 39 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 40 | 41 | @rem Find java.exe 42 | if defined JAVA_HOME goto findJavaFromJavaHome 43 | 44 | set JAVA_EXE=java.exe 45 | %JAVA_EXE% -version >NUL 2>&1 46 | if %ERRORLEVEL% equ 0 goto execute 47 | 48 | echo. 1>&2 49 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 50 | echo. 1>&2 51 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 52 | echo location of your Java installation. 1>&2 53 | 54 | goto fail 55 | 56 | :findJavaFromJavaHome 57 | set JAVA_HOME=%JAVA_HOME:"=% 58 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 59 | 60 | if exist "%JAVA_EXE%" goto execute 61 | 62 | echo. 1>&2 63 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 64 | echo. 1>&2 65 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 66 | echo location of your Java installation. 1>&2 67 | 68 | goto fail 69 | 70 | :execute 71 | @rem Setup the command line 72 | 73 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 74 | 75 | 76 | @rem Execute Gradle 77 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 78 | 79 | :end 80 | @rem End local scope for the variables with windows NT shell 81 | if %ERRORLEVEL% equ 0 goto mainEnd 82 | 83 | :fail 84 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 85 | rem the _cmd.exe /c_ return code! 86 | set EXIT_CODE=%ERRORLEVEL% 87 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 88 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 89 | exit /b %EXIT_CODE% 90 | 91 | :mainEnd 92 | if "%OS%"=="Windows_NT" endlocal 93 | 94 | :omega 95 | -------------------------------------------------------------------------------- /prepare-version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Helper script to prepare a version of the plugin based on a specific elasticsearch version 4 | # 5 | # given the elasticsearch version as argument, it generates the needed configuration file to build the plugin 6 | 7 | [ -z "$1" ] && { 8 | echo "1st argument should be the targeted elasticsearch version" 9 | exit 1 10 | } 11 | 12 | ES_VERSION="$1" 13 | 14 | # retrieve version information from ES repository 15 | GRADLE_VERSION=$(curl -s https://raw.githubusercontent.com/elastic/elasticsearch/refs/tags/v${ES_VERSION}/build-tools-internal/src/main/resources/minimumGradleVersion) 16 | JAVA_COMPILER_VERSION=$(curl -s https://raw.githubusercontent.com/elastic/elasticsearch/refs/tags/v${ES_VERSION}/build-tools-internal/src/main/resources/minimumCompilerVersion) 17 | JAVA_RUNTIME_VERSION=$(curl -s https://raw.githubusercontent.com/elastic/elasticsearch/refs/tags/v${ES_VERSION}/build-tools-internal/src/main/resources/minimumRuntimeVersion) 18 | 19 | PLUGIN_VERSION="${ES_VERSION}.0" 20 | 21 | echo "GRADLE_VERSION ${GRADLE_VERSION}" 22 | echo "JAVA_COMPILER_VERSION ${JAVA_COMPILER_VERSION}" 23 | echo "ES_VERSION ${ES_VERSION}" 24 | echo "PLUGIN_VERSION ${PLUGIN_VERSION}" 25 | 26 | echo "es_version = ${ES_VERSION} 27 | plugin_version = ${PLUGIN_VERSION}" > gradle.properties 28 | 29 | echo "ES_VERSION=${ES_VERSION} 30 | PLUGIN_VERSION=${PLUGIN_VERSION} 31 | JAVA_COMPILER_VERSION=${JAVA_COMPILER_VERSION} 32 | GRADLE_VERSION=${GRADLE_VERSION}" > .env 33 | 34 | docker run --rm \ 35 | -v .:/opt/gen \ 36 | -w /opt/gen \ 37 | -u gradle \ 38 | gradle:"${GRADLE_VERSION}" /usr/bin/gradle wrapper --gradle-version "${GRADLE_VERSION}" --distribution-type bin 39 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/plugin/PathHierarchyAggregation.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.plugin; 2 | 3 | import org.elasticsearch.plugins.Plugin; 4 | import org.elasticsearch.plugins.SearchPlugin; 5 | import org.opendatasoft.elasticsearch.search.aggregations.bucket.DateHierarchyAggregationBuilder; 6 | import org.opendatasoft.elasticsearch.search.aggregations.bucket.InternalDateHierarchy; 7 | import org.opendatasoft.elasticsearch.search.aggregations.bucket.InternalPathHierarchy; 8 | import org.opendatasoft.elasticsearch.search.aggregations.bucket.PathHierarchyAggregationBuilder; 9 | 10 | import java.util.ArrayList; 11 | 12 | public class PathHierarchyAggregation extends Plugin implements SearchPlugin { 13 | @Override 14 | public ArrayList getAggregations() { 15 | ArrayList r = new ArrayList<>(); 16 | r.add( 17 | new AggregationSpec( 18 | PathHierarchyAggregationBuilder.NAME, 19 | PathHierarchyAggregationBuilder::new, 20 | PathHierarchyAggregationBuilder.PARSER) 21 | .addResultReader(InternalPathHierarchy::new) 22 | .setAggregatorRegistrar(PathHierarchyAggregationBuilder::registerAggregators) 23 | ); 24 | r.add( 25 | new AggregationSpec( 26 | DateHierarchyAggregationBuilder.NAME, 27 | DateHierarchyAggregationBuilder::new, 28 | DateHierarchyAggregationBuilder.PARSER) 29 | .addResultReader(InternalDateHierarchy::new) 30 | .setAggregatorRegistrar(DateHierarchyAggregationBuilder::registerAggregators) 31 | ); 32 | return r; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/DateHierarchyAggregationBuilder.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | import org.elasticsearch.Version; 4 | import org.elasticsearch.xcontent.ParseField; 5 | import org.elasticsearch.common.Rounding; 6 | import org.elasticsearch.common.io.stream.StreamInput; 7 | import org.elasticsearch.common.io.stream.StreamOutput; 8 | import org.elasticsearch.common.io.stream.Writeable; 9 | import org.elasticsearch.common.time.DateFormatter; 10 | import org.elasticsearch.xcontent.ObjectParser; 11 | import org.elasticsearch.xcontent.XContentBuilder; 12 | import org.elasticsearch.xcontent.XContentParser; 13 | import org.elasticsearch.index.mapper.DateFieldMapper; 14 | import org.elasticsearch.search.aggregations.support.AggregationContext; 15 | import org.elasticsearch.search.DocValueFormat; 16 | import org.elasticsearch.search.aggregations.AggregationBuilder; 17 | import org.elasticsearch.search.aggregations.AggregatorFactories.Builder; 18 | import org.elasticsearch.search.aggregations.AggregatorFactory; 19 | import org.elasticsearch.search.aggregations.BucketOrder; 20 | import org.elasticsearch.search.aggregations.InternalOrder; 21 | import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; 22 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder; 23 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; 24 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; 25 | import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; 26 | import org.elasticsearch.search.aggregations.support.ValuesSourceType; 27 | 28 | import java.io.IOException; 29 | import java.time.ZoneId; 30 | import java.time.ZoneOffset; 31 | import java.util.ArrayList; 32 | import java.util.LinkedHashMap; 33 | import java.util.List; 34 | import java.util.Map; 35 | import java.util.Objects; 36 | 37 | import static java.util.Collections.unmodifiableMap; 38 | 39 | 40 | /** 41 | * The builder of the aggregatorFactory. Also implements the parsing of the request. 42 | */ 43 | public class DateHierarchyAggregationBuilder extends ValuesSourceAggregationBuilder { 44 | public static final String NAME = "date_hierarchy"; 45 | public static final ValuesSourceRegistry.RegistryKey REGISTRY_KEY = 46 | new ValuesSourceRegistry.RegistryKey<>(NAME, DateHierarchyAggregationSupplier.class); 47 | 48 | 49 | public static final ParseField INTERVAL_FIELD = new ParseField("interval"); 50 | public static final ParseField ORDER_FIELD = new ParseField("order"); 51 | public static final ParseField SIZE_FIELD = new ParseField("size"); 52 | public static final ParseField SHARD_SIZE_FIELD = new ParseField("shard_size"); 53 | public static final ParseField MIN_DOC_COUNT_FIELD = new ParseField("min_doc_count"); 54 | 55 | 56 | public static final Map INTERVAL_CONFIG; 57 | static { 58 | Map dateFieldUnits = new LinkedHashMap<>(); 59 | dateFieldUnits.put("years", new IntervalConfig(Rounding.DateTimeUnit.YEAR_OF_CENTURY, "yyyy")); 60 | dateFieldUnits.put("months", new IntervalConfig(Rounding.DateTimeUnit.MONTH_OF_YEAR, "MM")); 61 | dateFieldUnits.put("days", new IntervalConfig(Rounding.DateTimeUnit.DAY_OF_MONTH, "dd")); 62 | dateFieldUnits.put("hours", new IntervalConfig(Rounding.DateTimeUnit.HOUR_OF_DAY, "hh")); 63 | dateFieldUnits.put("minutes", new IntervalConfig(Rounding.DateTimeUnit.MINUTES_OF_HOUR, "mm")); 64 | dateFieldUnits.put("seconds", new IntervalConfig(Rounding.DateTimeUnit.SECOND_OF_MINUTE, "ss")); 65 | INTERVAL_CONFIG = unmodifiableMap(dateFieldUnits); 66 | } 67 | 68 | public static class IntervalConfig { 69 | final Rounding.DateTimeUnit dateTimeUnit; 70 | final String format; 71 | 72 | public IntervalConfig(Rounding.DateTimeUnit dateTimeUnit, String format) { 73 | this.dateTimeUnit = dateTimeUnit; 74 | this.format = format; 75 | } 76 | } 77 | 78 | public static class PreparedRounding { 79 | final RoundingInfo roundingInfo; 80 | final Rounding.Prepared prepared; 81 | 82 | public PreparedRounding(RoundingInfo roundingInfo, Rounding.Prepared prepared) { 83 | this.roundingInfo = roundingInfo; 84 | this.prepared = prepared; 85 | } 86 | } 87 | 88 | public List buildRoundings() { 89 | List roundings = new ArrayList<>(); 90 | 91 | ZoneId timeZone = timeZone() == null ? ZoneOffset.UTC: timeZone(); 92 | 93 | long now = System.currentTimeMillis(); 94 | for (String interval : INTERVAL_CONFIG.keySet()) { 95 | RoundingInfo ri = new RoundingInfo(interval, createRounding(INTERVAL_CONFIG.get(interval).dateTimeUnit), 96 | new DocValueFormat.DateTime(DateFormatter.forPattern(INTERVAL_CONFIG.get(interval).format), timeZone, 97 | DateFieldMapper.Resolution.MILLISECONDS)); 98 | roundings.add(new PreparedRounding(ri, ri.rounding.prepareForUnknown())); 99 | 100 | if (interval.equals(interval())) { 101 | break; 102 | } 103 | } 104 | 105 | return roundings; 106 | } 107 | 108 | public static class RoundingInfo implements Writeable { 109 | final DocValueFormat format; 110 | final Rounding rounding; 111 | final String interval; 112 | 113 | public RoundingInfo(String interval, Rounding rounding, DocValueFormat docValueFormat) { 114 | this.interval = interval; 115 | this.rounding = rounding; 116 | this.format = docValueFormat; 117 | } 118 | 119 | public RoundingInfo(StreamInput in) throws IOException { 120 | rounding = Rounding.read(in); 121 | interval = in.readString(); 122 | format = in.readNamedWriteable(DocValueFormat.class); 123 | } 124 | 125 | @Override 126 | public void writeTo(StreamOutput out) throws IOException { 127 | rounding.writeTo(out); 128 | out.writeString(interval); 129 | out.writeNamedWriteable(format); 130 | } 131 | } 132 | 133 | public static final DateHierarchyAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new 134 | DateHierarchyAggregator.BucketCountThresholds(10, -1); 135 | public static final ObjectParser PARSER = 136 | ObjectParser.fromBuilder(NAME, DateHierarchyAggregationBuilder::new); 137 | static { 138 | 139 | ValuesSourceAggregationBuilder.declareFields(PARSER, true, true, true); 140 | 141 | PARSER.declareString(DateHierarchyAggregationBuilder::interval, INTERVAL_FIELD); 142 | 143 | PARSER.declareField(DateHierarchyAggregationBuilder::timeZone, p -> { 144 | if (p.currentToken() == XContentParser.Token.VALUE_STRING) { 145 | return ZoneId.of(p.text()); 146 | } else { 147 | return ZoneOffset.ofHours(p.intValue()); 148 | } 149 | }, new ParseField("time_zone"), ObjectParser.ValueType.LONG); 150 | 151 | PARSER.declareInt(DateHierarchyAggregationBuilder::size, SIZE_FIELD); 152 | PARSER.declareLong(DateHierarchyAggregationBuilder::minDocCount, MIN_DOC_COUNT_FIELD); 153 | PARSER.declareInt(DateHierarchyAggregationBuilder::shardSize, SHARD_SIZE_FIELD); 154 | PARSER.declareObjectArray(DateHierarchyAggregationBuilder::order, (p, c) -> InternalOrder.Parser.parseOrderParam(p), 155 | ORDER_FIELD); 156 | } 157 | 158 | public static AggregationBuilder parse(String aggregationName, XContentParser parser) throws IOException { 159 | return PARSER.parse(parser, new DateHierarchyAggregationBuilder(aggregationName), null); 160 | } 161 | 162 | public static void registerAggregators(ValuesSourceRegistry.Builder builder) { 163 | DateHierarchyAggregatorFactory.registerAggregators(builder); 164 | } 165 | 166 | private long minDocCount = 0; 167 | private ZoneId timeZone = null; 168 | private String interval = "years"; 169 | private BucketOrder order = BucketOrder.compound(BucketOrder.count(false)); // automatically adds tie-breaker key asc order 170 | private DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds = new DateHierarchyAggregator.BucketCountThresholds( 171 | DEFAULT_BUCKET_COUNT_THRESHOLDS); 172 | 173 | 174 | private DateHierarchyAggregationBuilder(String name) { 175 | super(name); 176 | } 177 | 178 | @Override 179 | protected boolean serializeTargetValueType(Version version) { 180 | return true; 181 | } 182 | 183 | /** 184 | * Read from a stream 185 | * 186 | */ 187 | public DateHierarchyAggregationBuilder(StreamInput in) throws IOException { 188 | super(in); 189 | bucketCountThresholds = new DateHierarchyAggregator.BucketCountThresholds(in); 190 | minDocCount = in.readVLong(); 191 | interval = in.readString(); 192 | order = InternalOrder.Streams.readOrder(in); 193 | timeZone = in.readOptionalZoneId(); 194 | } 195 | 196 | private DateHierarchyAggregationBuilder(DateHierarchyAggregationBuilder clone, Builder factoriesBuilder, 197 | Map metaData) { 198 | super(clone, factoriesBuilder, metaData); 199 | order = clone.order; 200 | minDocCount = clone.minDocCount; 201 | this.bucketCountThresholds = new DateHierarchyAggregator.BucketCountThresholds(clone.bucketCountThresholds); 202 | } 203 | 204 | @Override 205 | protected AggregationBuilder shallowCopy(Builder factoriesBuilder, Map metaData) { 206 | return new DateHierarchyAggregationBuilder(this, factoriesBuilder, metaData); 207 | } 208 | 209 | @Override 210 | protected ValuesSourceType defaultValueSourceType() { 211 | return CoreValuesSourceType.DATE; 212 | } 213 | 214 | /** 215 | * Write to a stream 216 | */ 217 | @Override 218 | protected void innerWriteTo(StreamOutput out) throws IOException { 219 | bucketCountThresholds.writeTo(out); 220 | out.writeVLong(minDocCount); 221 | out.writeString(interval); 222 | order.writeTo(out); 223 | out.writeOptionalZoneId(timeZone); 224 | } 225 | 226 | /** 227 | * Returns the date interval that is set on this source 228 | **/ 229 | public String interval() { 230 | return interval; 231 | } 232 | 233 | public DateHierarchyAggregationBuilder interval(String interval) { 234 | 235 | if (INTERVAL_CONFIG.get(interval) == null) { 236 | throw new IllegalArgumentException("[interval] is invalid"); 237 | } 238 | 239 | this.interval = interval; 240 | return this; 241 | } 242 | 243 | /** 244 | * Sets the time zone to use for this aggregation 245 | */ 246 | public DateHierarchyAggregationBuilder timeZone(ZoneId timeZone) { 247 | if (timeZone == null) { 248 | throw new IllegalArgumentException("[timeZone] must not be null: [" + name + "]"); 249 | } 250 | this.timeZone = timeZone; 251 | return this; 252 | } 253 | 254 | /** 255 | * Gets the time zone to use for this aggregation 256 | */ 257 | public ZoneId timeZone() { 258 | return timeZone; 259 | } 260 | 261 | private Rounding createRounding(Rounding.DateTimeUnit dateTimeUnit) { 262 | Rounding.Builder tzRoundingBuilder; 263 | tzRoundingBuilder = Rounding.builder(dateTimeUnit); 264 | 265 | if (timeZone() != null) { 266 | tzRoundingBuilder.timeZone(timeZone()); 267 | } 268 | Rounding rounding = tzRoundingBuilder.build(); 269 | return rounding; 270 | } 271 | 272 | /** Set the order in which the buckets will be returned. It returns the builder so that calls 273 | * can be chained. A tie-breaker may be added to avoid non-deterministic ordering. */ 274 | private DateHierarchyAggregationBuilder order(BucketOrder order) { 275 | if (order == null) { 276 | throw new IllegalArgumentException("[order] must not be null: [" + name + "]"); 277 | } 278 | if(order instanceof InternalOrder.CompoundOrder || InternalOrder.isKeyOrder(order)) { 279 | this.order = order; // if order already contains a tie-breaker we are good to go 280 | } else { // otherwise add a tie-breaker by using a compound order 281 | this.order = BucketOrder.compound(order); 282 | } 283 | return this; 284 | } 285 | 286 | private DateHierarchyAggregationBuilder order(List orders) { 287 | if (orders == null) { 288 | throw new IllegalArgumentException("[orders] must not be null: [" + name + "]"); 289 | } 290 | // if the list only contains one order use that to avoid inconsistent xcontent 291 | order(orders.size() > 1 ? BucketOrder.compound(orders) : orders.get(0)); 292 | return this; 293 | } 294 | 295 | 296 | /** 297 | * Sets the size - indicating how many term buckets should be returned 298 | * (defaults to 10) 299 | */ 300 | public DateHierarchyAggregationBuilder size(int size) { 301 | if (size <= 0) { 302 | throw new IllegalArgumentException("[size] must be greater than 0. Found [" + size + "] in [" + name + "]"); 303 | } 304 | bucketCountThresholds.setRequiredSize(size); 305 | return this; 306 | } 307 | 308 | /** Set the minimum count of matching documents that buckets need to have 309 | * and return this builder so that calls can be chained. */ 310 | public DateHierarchyAggregationBuilder minDocCount(long minDocCount) { 311 | if (minDocCount < 0) { 312 | throw new IllegalArgumentException( 313 | "[minDocCount] must be greater than or equal to 0. Found [" + minDocCount + "] in [" + name + "]"); 314 | } 315 | this.minDocCount = minDocCount; 316 | return this; 317 | } 318 | 319 | /** 320 | * Returns the number of term buckets currently configured 321 | */ 322 | public int size() { 323 | return bucketCountThresholds.getRequiredSize(); 324 | } 325 | 326 | @Override 327 | public BucketCardinality bucketCardinality() { 328 | return BucketCardinality.MANY; 329 | } 330 | 331 | /** 332 | * Sets the shard_size - indicating the number of term buckets each shard 333 | * will return to the coordinating node (the node that coordinates the 334 | * search execution). The higher the shard size is, the more accurate the 335 | * results are. 336 | */ 337 | public DateHierarchyAggregationBuilder shardSize(int shardSize) { 338 | if (shardSize <= 0) { 339 | throw new IllegalArgumentException( 340 | "[shardSize] must be greater than 0. Found [" + shardSize + "] in [" + name + "]"); 341 | } 342 | bucketCountThresholds.setShardSize(shardSize); 343 | return this; 344 | } 345 | 346 | /** 347 | * Returns the number of term buckets per shard that are currently configured 348 | */ 349 | public int shardSize() { 350 | return bucketCountThresholds.getShardSize(); 351 | } 352 | 353 | @Override 354 | protected ValuesSourceAggregatorFactory innerBuild(AggregationContext context, 355 | ValuesSourceConfig config, 356 | AggregatorFactory parent, 357 | Builder subFactoriesBuilder) throws IOException { 358 | 359 | 360 | final List preparedRoundings = buildRoundings(); 361 | 362 | return new DateHierarchyAggregatorFactory( 363 | name, 364 | config, 365 | order, 366 | preparedRoundings, 367 | minDocCount, 368 | bucketCountThresholds, 369 | context, 370 | parent, 371 | subFactoriesBuilder, 372 | metadata); 373 | } 374 | 375 | @Override 376 | protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { 377 | builder.startObject(); 378 | 379 | if (order != null) { 380 | builder.field(ORDER_FIELD.getPreferredName()); 381 | order.toXContent(builder, params); 382 | } 383 | 384 | builder.field(MIN_DOC_COUNT_FIELD.getPreferredName(), minDocCount); 385 | 386 | return builder.endObject(); 387 | } 388 | 389 | @Override 390 | public int hashCode() { 391 | return Objects.hash(super.hashCode(), interval, order, minDocCount, bucketCountThresholds, timeZone); 392 | } 393 | 394 | @Override 395 | public boolean equals(Object obj) { 396 | if (this == obj) return true; 397 | if (obj == null || getClass() != obj.getClass()) return false; 398 | if (!super.equals(obj)) return false; 399 | DateHierarchyAggregationBuilder other = (DateHierarchyAggregationBuilder) obj; 400 | return Objects.equals(interval, other.interval) 401 | && Objects.equals(order, other.order) 402 | && Objects.equals(minDocCount, other.minDocCount) 403 | && Objects.equals(bucketCountThresholds, other.bucketCountThresholds) 404 | && Objects.equals(timeZone, other.timeZone); 405 | } 406 | 407 | @Override 408 | public String getType() { 409 | return NAME; 410 | } 411 | 412 | @Override 413 | protected ValuesSourceRegistry.RegistryKey getRegistryKey() { return REGISTRY_KEY; } 414 | } 415 | 416 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/DateHierarchyAggregationSupplier.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | import org.elasticsearch.search.aggregations.Aggregator; 4 | import org.elasticsearch.search.aggregations.AggregatorFactories; 5 | import org.elasticsearch.search.aggregations.BucketOrder; 6 | import org.elasticsearch.search.aggregations.CardinalityUpperBound; 7 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; 8 | import org.elasticsearch.search.internal.SearchContext; 9 | 10 | import java.io.IOException; 11 | import java.util.List; 12 | import java.util.Map; 13 | 14 | @FunctionalInterface 15 | public interface DateHierarchyAggregationSupplier { 16 | Aggregator build(String name, 17 | AggregatorFactories factories, 18 | BucketOrder order, 19 | List roundingsInfo, 20 | long minDocCount, 21 | DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds, 22 | ValuesSourceConfig valuesSourceConfig, 23 | SearchContext aggregationContext, 24 | Aggregator parent, 25 | CardinalityUpperBound cardinality, 26 | Map metadata) throws IOException; 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/DateHierarchyAggregator.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | import org.apache.lucene.index.LeafReaderContext; 4 | import org.apache.lucene.index.SortedNumericDocValues; 5 | import org.apache.lucene.util.BytesRef; 6 | import org.elasticsearch.ElasticsearchException; 7 | import org.elasticsearch.common.Rounding; 8 | import org.elasticsearch.common.io.stream.StreamInput; 9 | import org.elasticsearch.common.io.stream.StreamOutput; 10 | import org.elasticsearch.common.io.stream.Writeable; 11 | import org.elasticsearch.core.Releasables; 12 | import org.elasticsearch.common.util.BytesRefHash; 13 | import org.elasticsearch.xcontent.ToXContentFragment; 14 | import org.elasticsearch.xcontent.XContentBuilder; 15 | import org.elasticsearch.search.aggregations.Aggregator; 16 | import org.elasticsearch.search.aggregations.AggregatorFactories; 17 | import org.elasticsearch.search.aggregations.BucketOrder; 18 | import org.elasticsearch.search.aggregations.CardinalityUpperBound; 19 | import org.elasticsearch.search.aggregations.InternalAggregation; 20 | import org.elasticsearch.search.aggregations.LeafBucketCollector; 21 | import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; 22 | import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; 23 | import org.elasticsearch.search.aggregations.support.ValuesSource; 24 | import org.elasticsearch.search.aggregations.support.AggregationContext; 25 | 26 | import java.io.IOException; 27 | import java.util.Arrays; 28 | import java.util.Comparator; 29 | import java.util.Iterator; 30 | import java.util.List; 31 | import java.util.Map; 32 | import java.util.Objects; 33 | 34 | 35 | public class DateHierarchyAggregator extends BucketsAggregator { 36 | 37 | public DateHierarchyAggregator(String name, 38 | AggregatorFactories factories, 39 | AggregationContext context, 40 | ValuesSource.Numeric valuesSource, 41 | BucketOrder order, 42 | long minDocCount, 43 | BucketCountThresholds bucketCountThresholds, 44 | List preparedRoundings, 45 | Aggregator parent, 46 | CardinalityUpperBound cardinalityUpperBound, 47 | Map metadata 48 | ) throws IOException { 49 | super(name, factories, context, parent, cardinalityUpperBound, metadata); 50 | this.valuesSource = valuesSource; 51 | this.preparedRoundings = preparedRoundings; 52 | this.minDocCount = minDocCount; 53 | bucketOrds = new BytesRefHash(1, context.bigArrays()); 54 | this.bucketCountThresholds = bucketCountThresholds; 55 | order.validate(this); 56 | this.order = order; 57 | this.partiallyBuiltBucketComparator = order == null ? null : order.partiallyBuiltBucketComparator(b -> b.bucketOrd, this); 58 | } 59 | 60 | public static class BucketCountThresholds implements Writeable, ToXContentFragment { 61 | private int requiredSize; 62 | private int shardSize; 63 | 64 | public BucketCountThresholds(int requiredSize, int shardSize) { 65 | this.requiredSize = requiredSize; 66 | this.shardSize = shardSize; 67 | } 68 | 69 | /** 70 | * Read from a stream. 71 | */ 72 | public BucketCountThresholds(StreamInput in) throws IOException { 73 | requiredSize = in.readInt(); 74 | shardSize = in.readInt(); 75 | } 76 | 77 | @Override 78 | public void writeTo(StreamOutput out) throws IOException { 79 | out.writeInt(requiredSize); 80 | out.writeInt(shardSize); 81 | } 82 | 83 | public BucketCountThresholds(DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds) { 84 | this(bucketCountThresholds.requiredSize, bucketCountThresholds.shardSize); 85 | } 86 | 87 | public void ensureValidity() { 88 | // shard_size cannot be smaller than size as we need to at least fetch size entries from every shards in order to return size 89 | if (shardSize < requiredSize) { 90 | setShardSize(requiredSize); 91 | } 92 | 93 | if (requiredSize <= 0 || shardSize <= 0) { 94 | throw new ElasticsearchException("parameters [required_size] and [shard_size] must be >0 in path-hierarchy aggregation."); 95 | } 96 | } 97 | 98 | public int getRequiredSize() { 99 | return requiredSize; 100 | } 101 | 102 | public void setRequiredSize(int requiredSize) { 103 | this.requiredSize = requiredSize; 104 | } 105 | 106 | public int getShardSize() { 107 | return shardSize; 108 | } 109 | 110 | public void setShardSize(int shardSize) { 111 | this.shardSize = shardSize; 112 | } 113 | 114 | @Override 115 | public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { 116 | builder.field(DateHierarchyAggregationBuilder.SIZE_FIELD.getPreferredName(), requiredSize); 117 | if (shardSize != -1) { 118 | builder.field(DateHierarchyAggregationBuilder.SHARD_SIZE_FIELD.getPreferredName(), shardSize); 119 | } 120 | return builder; 121 | } 122 | 123 | @Override 124 | public int hashCode() { 125 | return Objects.hash(requiredSize, shardSize); 126 | } 127 | 128 | @Override 129 | public boolean equals(Object obj) { 130 | if (obj == null) { 131 | return false; 132 | } 133 | if (getClass() != obj.getClass()) { 134 | return false; 135 | } 136 | DateHierarchyAggregator.BucketCountThresholds other = (DateHierarchyAggregator.BucketCountThresholds) obj; 137 | return Objects.equals(requiredSize, other.requiredSize) 138 | && Objects.equals(shardSize, other.shardSize); 139 | } 140 | } 141 | 142 | private final ValuesSource.Numeric valuesSource; 143 | private final BytesRefHash bucketOrds; 144 | private final BucketOrder order; 145 | private final long minDocCount; 146 | private final BucketCountThresholds bucketCountThresholds; 147 | private final List preparedRoundings; 148 | protected final Comparator partiallyBuiltBucketComparator; 149 | 150 | /** 151 | * The collector collects the docs, including or not some score (depending of the including of a Scorer) in the 152 | * collect() process. 153 | * 154 | * The LeafBucketCollector is a "Per-leaf bucket collector". It collects docs for the account of buckets. 155 | */ 156 | @Override 157 | public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { 158 | if (valuesSource == null) { 159 | return LeafBucketCollector.NO_OP_COLLECTOR; 160 | } 161 | final SortedNumericDocValues values = valuesSource.longValues(ctx); 162 | 163 | return new LeafBucketCollectorBase(sub, values) { 164 | 165 | @Override 166 | public void collect(int doc, long bucket) throws IOException { 167 | assert bucket == 0; 168 | if (values.advanceExact(doc)) { 169 | final int valuesCount = values.docValueCount(); 170 | 171 | for (int i = 0; i < valuesCount; ++i) { 172 | long value = values.nextValue(); 173 | String path = ""; 174 | for (DateHierarchyAggregationBuilder.PreparedRounding preparedRounding: preparedRoundings) { 175 | long roundedValue = preparedRounding.prepared.round(value); 176 | path += preparedRounding.roundingInfo.format.format(roundedValue).toString(); 177 | long bucketOrd = bucketOrds.add(new BytesRef(path)); 178 | if (bucketOrd < 0) { // already seen 179 | bucketOrd = -1 - bucketOrd; 180 | collectExistingBucket(sub, doc, bucketOrd); 181 | } else { 182 | collectBucket(sub, doc, bucketOrd); 183 | } 184 | path += "/"; 185 | } 186 | } 187 | } 188 | } 189 | }; 190 | } 191 | 192 | @Override 193 | public InternalAggregation[] buildAggregations(long[] owningBucketOrdinals) throws IOException { 194 | 195 | InternalDateHierarchy.InternalBucket[][] topBucketsPerOrd = new InternalDateHierarchy.InternalBucket[owningBucketOrdinals.length][]; 196 | InternalDateHierarchy[] results = new InternalDateHierarchy[owningBucketOrdinals.length]; 197 | 198 | for (int ordIdx = 0; ordIdx < owningBucketOrdinals.length; ordIdx++) { 199 | assert owningBucketOrdinals[ordIdx] == 0; 200 | 201 | // build buckets and store them sorted 202 | final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); 203 | 204 | PathSortedTree pathSortedTree = new PathSortedTree<>(order.comparator(), size); 205 | 206 | InternalDateHierarchy.InternalBucket spare; 207 | for (int i = 0; i < bucketOrds.size(); i++) { 208 | spare = new InternalDateHierarchy.InternalBucket(0, null, null, null, 0, null); 209 | 210 | BytesRef term = new BytesRef(); 211 | bucketOrds.get(i, term); 212 | String[] paths = term.utf8ToString().split("/", -1); 213 | 214 | spare.paths = paths; 215 | spare.key = term; 216 | spare.level = paths.length - 1; 217 | spare.name = paths[spare.level]; 218 | spare.docCount = bucketDocCount(i); 219 | spare.bucketOrd = i; 220 | 221 | pathSortedTree.add(spare.paths, spare); 222 | } 223 | 224 | // Get the top buckets 225 | topBucketsPerOrd[ordIdx] = new InternalDateHierarchy.InternalBucket[size]; 226 | long otherHierarchyNodes = pathSortedTree.getFullSize(); 227 | Iterator iterator = pathSortedTree.consumer(); 228 | for (int i = 0; i < size; i++) { 229 | final InternalDateHierarchy.InternalBucket bucket = iterator.next(); 230 | topBucketsPerOrd[ordIdx][i] = bucket; 231 | otherHierarchyNodes -= 1; 232 | } 233 | 234 | results[ordIdx] = new InternalDateHierarchy(name, Arrays.asList(topBucketsPerOrd[ordIdx]), order, 235 | minDocCount, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), 236 | otherHierarchyNodes, metadata()); 237 | } 238 | 239 | // Build sub-aggregations for pruned buckets 240 | buildSubAggsForAllBuckets( 241 | topBucketsPerOrd, 242 | b -> b.bucketOrd, 243 | (b, aggregations) -> b.aggregations = aggregations 244 | ); 245 | 246 | return results; 247 | } 248 | 249 | @Override 250 | public InternalAggregation buildEmptyAggregation() { 251 | return new InternalDateHierarchy(name, null, order, minDocCount, bucketCountThresholds.getRequiredSize(), 252 | bucketCountThresholds.getShardSize(), 0, metadata()); 253 | } 254 | 255 | @Override 256 | protected void doClose() { 257 | Releasables.close(bucketOrds); 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/DateHierarchyAggregatorFactory.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | import org.elasticsearch.search.aggregations.Aggregator; 4 | import org.elasticsearch.search.aggregations.AggregatorFactories; 5 | import org.elasticsearch.search.aggregations.AggregatorFactory; 6 | import org.elasticsearch.search.aggregations.BucketOrder; 7 | import org.elasticsearch.search.aggregations.CardinalityUpperBound; 8 | import org.elasticsearch.search.aggregations.InternalAggregation; 9 | import org.elasticsearch.search.aggregations.InternalOrder; 10 | import org.elasticsearch.search.aggregations.NonCollectingAggregator; 11 | import org.elasticsearch.search.aggregations.bucket.BucketUtils; 12 | import org.elasticsearch.search.aggregations.support.AggregationContext; 13 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; 14 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; 15 | import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; 16 | import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; 17 | import org.elasticsearch.search.aggregations.support.ValuesSource; 18 | import org.elasticsearch.search.internal.SearchContext; 19 | 20 | import java.io.IOException; 21 | import java.util.ArrayList; 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | /** 26 | * The factory of aggregators. 27 | * ValuesSourceAggregatorFactory extends {@link AggregatorFactory} 28 | */ 29 | class DateHierarchyAggregatorFactory extends ValuesSourceAggregatorFactory { 30 | 31 | private long minDocCount; 32 | private BucketOrder order; 33 | private List preparedRoundings; 34 | private final DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds; 35 | 36 | DateHierarchyAggregatorFactory(String name, 37 | ValuesSourceConfig config, 38 | BucketOrder order, 39 | List preparedRoundings, 40 | long minDocCount, 41 | DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds, 42 | AggregationContext context, 43 | AggregatorFactory parent, 44 | AggregatorFactories.Builder subFactoriesBuilder, 45 | Map metadata 46 | ) throws IOException { 47 | super(name, config, context, parent, subFactoriesBuilder, metadata); 48 | this.order = order; 49 | this.preparedRoundings = preparedRoundings; 50 | this.minDocCount = minDocCount; 51 | this.bucketCountThresholds = bucketCountThresholds; 52 | } 53 | 54 | public static void registerAggregators(ValuesSourceRegistry.Builder builder) { 55 | builder.register(DateHierarchyAggregationBuilder.REGISTRY_KEY, CoreValuesSourceType.DATE, (name, 56 | factories, 57 | order, 58 | roundingsInfo, 59 | minDocCount, 60 | bucketCountThresholds, 61 | valuesSourceConfig, 62 | aggregationContext, 63 | parent, 64 | cardinality, 65 | metadata) -> null, 66 | true); 67 | } 68 | 69 | @Override 70 | protected Aggregator createUnmapped(Aggregator parent, 71 | Map metadata) throws IOException { 72 | final InternalAggregation aggregation = new InternalDateHierarchy(name, new ArrayList<>(), order, minDocCount, 73 | bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), 0, metadata); 74 | return new NonCollectingAggregator(name, context, parent, factories, metadata) { 75 | { 76 | // even in the case of an unmapped aggregator, validate the 77 | // order 78 | order.validate(this); 79 | } 80 | 81 | @Override 82 | public InternalAggregation buildEmptyAggregation() { return aggregation; } 83 | }; 84 | } 85 | 86 | @Override 87 | protected Aggregator doCreateInternal(Aggregator parent, CardinalityUpperBound cardinality, Map metadata 88 | ) throws IOException { 89 | 90 | DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds = new 91 | DateHierarchyAggregator.BucketCountThresholds(this.bucketCountThresholds); 92 | if (!InternalOrder.isKeyOrder(order) 93 | && bucketCountThresholds.getShardSize() == DateHierarchyAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) { 94 | // The user has not made a shardSize selection. Use default 95 | // heuristic to avoid any wrong-ranking caused by distributed 96 | // counting 97 | bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize())); 98 | } 99 | bucketCountThresholds.ensureValidity(); 100 | return new DateHierarchyAggregator( 101 | name, factories, context, (ValuesSource.Numeric) config.getValuesSource(), 102 | order, minDocCount, bucketCountThresholds, preparedRoundings, parent, cardinality, metadata); 103 | } 104 | } 105 | 106 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/InternalDateHierarchy.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | import org.apache.lucene.util.BytesRef; 4 | import org.elasticsearch.common.io.stream.StreamInput; 5 | import org.elasticsearch.common.io.stream.StreamOutput; 6 | import org.elasticsearch.xcontent.XContentBuilder; 7 | import org.elasticsearch.search.aggregations.Aggregations; 8 | import org.elasticsearch.search.aggregations.BucketOrder; 9 | import org.elasticsearch.search.aggregations.InternalAggregation; 10 | import org.elasticsearch.search.aggregations.InternalAggregations; 11 | import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation; 12 | import org.elasticsearch.search.aggregations.InternalOrder; 13 | import org.elasticsearch.search.aggregations.KeyComparable; 14 | import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation; 15 | 16 | import java.io.IOException; 17 | import java.util.ArrayList; 18 | import java.util.Iterator; 19 | import java.util.LinkedHashMap; 20 | import java.util.List; 21 | import java.util.Map; 22 | import java.util.Objects; 23 | 24 | /** 25 | * An internal implementation of {@link InternalMultiBucketAggregation} 26 | * which extends {@link org.elasticsearch.search.aggregations.Aggregation}. 27 | * Mainly, returns the builder and makes the reduce of buckets. 28 | */ 29 | public class InternalDateHierarchy extends InternalMultiBucketAggregation { 31 | 32 | /** 33 | * The bucket class of InternalDateHierarchy. 34 | * @see MultiBucketsAggregation.Bucket 35 | */ 36 | public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucket implements 37 | KeyComparable { 38 | 39 | BytesRef key; 40 | String name; 41 | long bucketOrd; 42 | protected String[] paths; 43 | protected long docCount; 44 | protected InternalAggregations aggregations; 45 | protected int level; 46 | 47 | public InternalBucket(long docCount, InternalAggregations aggregations, BytesRef key, String name, int level, String[] paths) { 48 | this.key = key; 49 | this.name = name; 50 | this.docCount = docCount; 51 | this.aggregations = aggregations; 52 | this.level = level; 53 | this.paths = paths; 54 | } 55 | 56 | /** 57 | * Read from a stream. 58 | */ 59 | public InternalBucket(StreamInput in) throws IOException { 60 | key = in.readBytesRef(); 61 | name = in.readString(); 62 | docCount = in.readLong(); 63 | aggregations = InternalAggregations.readFrom(in); 64 | level = in.readInt(); 65 | int pathsSize = in.readInt(); 66 | paths = new String[pathsSize]; 67 | for (int i=0; i < pathsSize; i++) { 68 | paths[i] = in.readString(); 69 | } 70 | } 71 | 72 | /** 73 | * Write to a stream. 74 | */ 75 | @Override 76 | public void writeTo(StreamOutput out) throws IOException { 77 | out.writeBytesRef(key); 78 | out.writeString(name); 79 | out.writeLong(docCount); 80 | aggregations.writeTo(out); 81 | out.writeInt(level); 82 | out.writeInt(paths.length); 83 | for (String path: paths) { 84 | out.writeString(path); 85 | } 86 | } 87 | 88 | @Override 89 | public Object getKey() { 90 | return key; 91 | } 92 | 93 | @Override 94 | public String getKeyAsString() { 95 | return key.utf8ToString(); 96 | } 97 | 98 | @Override 99 | public int compareKey(InternalBucket other) { 100 | return key.compareTo(other.key); 101 | } 102 | 103 | @Override 104 | public long getDocCount() { 105 | return docCount; 106 | } 107 | 108 | @Override 109 | public Aggregations getAggregations() { 110 | return aggregations; 111 | } 112 | 113 | @Override 114 | public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { 115 | builder.startObject(); 116 | builder.field(CommonFields.DOC_COUNT.getPreferredName(), docCount); 117 | aggregations.toXContentInternal(builder, params); 118 | builder.endObject(); 119 | return builder; 120 | } 121 | } 122 | 123 | 124 | private List buckets; 125 | private BucketOrder order; 126 | private final int requiredSize; 127 | private final int shardSize; 128 | private final long otherHierarchyNodes; 129 | private final long minDocCount; 130 | 131 | public InternalDateHierarchy( 132 | String name, 133 | List buckets, 134 | BucketOrder order, 135 | long minDocCount, 136 | int requiredSize, 137 | int shardSize, 138 | long otherHierarchyNodes, 139 | Map metadata 140 | ) { 141 | super(name, metadata); 142 | this.buckets = buckets; 143 | this.order = order; 144 | this.minDocCount = minDocCount; 145 | this.requiredSize = requiredSize; 146 | this.shardSize = shardSize; 147 | this.otherHierarchyNodes = otherHierarchyNodes; 148 | } 149 | 150 | /** 151 | * Read from a stream. 152 | */ 153 | public InternalDateHierarchy(StreamInput in) throws IOException { 154 | super(in); 155 | order = InternalOrder.Streams.readOrder(in); 156 | minDocCount = in.readVLong(); 157 | requiredSize = readSize(in); 158 | shardSize = readSize(in); 159 | otherHierarchyNodes = in.readVLong(); 160 | int bucketsSize = in.readInt(); 161 | this.buckets = new ArrayList<>(bucketsSize); 162 | for (int i=0; i buckets) { 198 | return new InternalDateHierarchy( 199 | this.name, buckets, order, minDocCount, requiredSize, shardSize, otherHierarchyNodes, 200 | this.metadata); 201 | } 202 | 203 | @Override 204 | public InternalBucket createBucket(InternalAggregations aggregations, InternalBucket prototype) { 205 | return new InternalBucket(prototype.docCount, aggregations, prototype.key, prototype.name, prototype.level, prototype.paths); 206 | } 207 | 208 | @Override 209 | public List getBuckets() { 210 | return buckets; 211 | } 212 | 213 | /** 214 | * Reduces the given aggregations to a single one and returns it. 215 | */ 216 | @Override 217 | public InternalDateHierarchy reduce(List aggregations, ReduceContext reduceContext) { 218 | Map> buckets = null; 219 | long otherHierarchyNodes = 0; 220 | 221 | // extract buckets from aggregations 222 | for (InternalAggregation aggregation : aggregations) { 223 | InternalDateHierarchy dateHierarchy = (InternalDateHierarchy) aggregation; 224 | if (buckets == null) { 225 | buckets = new LinkedHashMap<>(); 226 | } 227 | 228 | otherHierarchyNodes += dateHierarchy.getSumOtherHierarchyNodes(); 229 | 230 | for (InternalBucket bucket : dateHierarchy.buckets) { 231 | List existingBuckets = buckets.get(bucket.key); 232 | if (existingBuckets == null) { 233 | existingBuckets = new ArrayList<>(aggregations.size()); 234 | buckets.put(bucket.key, existingBuckets); 235 | } 236 | existingBuckets.add(bucket); 237 | } 238 | } 239 | 240 | // reduce and sort buckets depending of ordering rules 241 | final int size = !reduceContext.isFinalReduce() ? buckets.size() : Math.min(requiredSize, buckets.size()); 242 | PathSortedTree ordered = new PathSortedTree<>(order.comparator(), size); 243 | for (List sameTermBuckets : buckets.values()) { 244 | 245 | final InternalBucket b = reduceBucket(sameTermBuckets, reduceContext); 246 | if (b.getDocCount() >= minDocCount || !reduceContext.isFinalReduce()) { 247 | reduceContext.consumeBucketsAndMaybeBreak(1); 248 | ordered.add(b.paths, b); 249 | } else { 250 | reduceContext.consumeBucketsAndMaybeBreak(-countInnerBucket(b)); 251 | } 252 | } 253 | 254 | long sum_other_hierarchy_nodes = ordered.getFullSize() - size + otherHierarchyNodes; 255 | return new InternalDateHierarchy(getName(), ordered.getAsList(), order, minDocCount, requiredSize, shardSize, 256 | sum_other_hierarchy_nodes, getMetadata()); 257 | } 258 | 259 | @Override 260 | protected InternalBucket reduceBucket(List buckets, ReduceContext context) { 261 | List aggregationsList = new ArrayList<>(buckets.size()); 262 | InternalBucket reduced = null; 263 | for (InternalBucket bucket : buckets) { 264 | if (reduced == null) { 265 | reduced = bucket; 266 | } else { 267 | reduced.docCount += bucket.docCount; 268 | } 269 | aggregationsList.add(bucket.aggregations); 270 | } 271 | reduced.aggregations = InternalAggregations.reduce(aggregationsList, context); 272 | return reduced; 273 | } 274 | 275 | @Override 276 | public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { 277 | Iterator bucketIterator = buckets.iterator(); 278 | builder.startArray(CommonFields.BUCKETS.getPreferredName()); 279 | InternalBucket prevBucket = null; 280 | InternalBucket currentBucket = null; 281 | while (bucketIterator.hasNext()) { 282 | currentBucket = bucketIterator.next(); 283 | 284 | if (prevBucket != null) { 285 | if (prevBucket.level == currentBucket.level) { 286 | builder.endObject(); 287 | } else if (prevBucket.level < currentBucket.level) { 288 | builder.startObject(name); 289 | builder.startArray(CommonFields.BUCKETS.getPreferredName()); 290 | } else { 291 | for (int i = currentBucket.level; i < prevBucket.level; i++) { 292 | builder.endObject(); 293 | builder.endArray(); 294 | builder.endObject(); 295 | } 296 | builder.endObject(); 297 | } 298 | } 299 | 300 | builder.startObject(); 301 | builder.field(CommonFields.KEY.getPreferredName(), currentBucket.name); 302 | builder.field(CommonFields.DOC_COUNT.getPreferredName(), currentBucket.docCount); 303 | currentBucket.getAggregations().toXContentInternal(builder, params); 304 | 305 | prevBucket = currentBucket; 306 | } 307 | 308 | if (currentBucket != null) { 309 | for (int i=0; i < currentBucket.level; i++) { 310 | builder.endObject(); 311 | builder.endArray(); 312 | builder.endObject(); 313 | } 314 | builder.endObject(); 315 | } 316 | 317 | builder.endArray(); 318 | return builder; 319 | } 320 | 321 | @Override 322 | public int hashCode() { 323 | return Objects.hash(buckets, order, requiredSize, shardSize, otherHierarchyNodes, minDocCount); 324 | } 325 | 326 | @Override 327 | public boolean equals(Object obj) { 328 | InternalDateHierarchy that = (InternalDateHierarchy) obj; 329 | return Objects.equals(buckets, that.buckets) 330 | && Objects.equals(order, that.order) 331 | && Objects.equals(minDocCount, that.minDocCount) 332 | && Objects.equals(requiredSize, that.requiredSize) 333 | && Objects.equals(shardSize, that.shardSize) 334 | && Objects.equals(otherHierarchyNodes, that.otherHierarchyNodes); 335 | } 336 | } 337 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/InternalPathHierarchy.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | import org.apache.lucene.util.BytesRef; 4 | import org.elasticsearch.xcontent.ParseField; 5 | import org.elasticsearch.common.io.stream.StreamInput; 6 | import org.elasticsearch.common.io.stream.StreamOutput; 7 | import org.elasticsearch.xcontent.XContentBuilder; 8 | import org.elasticsearch.search.aggregations.Aggregation; 9 | import org.elasticsearch.search.aggregations.Aggregations; 10 | import org.elasticsearch.search.aggregations.BucketOrder; 11 | import org.elasticsearch.search.aggregations.InternalAggregation; 12 | import org.elasticsearch.search.aggregations.InternalAggregations; 13 | import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation; 14 | import org.elasticsearch.search.aggregations.InternalOrder; 15 | import org.elasticsearch.search.aggregations.KeyComparable; 16 | import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation; 17 | 18 | import java.io.IOException; 19 | import java.util.Arrays; 20 | import java.util.ArrayList; 21 | import java.util.Iterator; 22 | import java.util.List; 23 | import java.util.Map; 24 | import java.util.Objects; 25 | import java.util.TreeMap; 26 | 27 | /** 28 | * An internal implementation of {@link InternalMultiBucketAggregation} which extends {@link Aggregation}. 29 | * Mainly, returns the builder and makes the reduce of buckets. 30 | */ 31 | public class InternalPathHierarchy extends InternalMultiBucketAggregation { 33 | protected static final ParseField SUM_OF_OTHER_HIERARCHY_NODES = new ParseField("sum_other_hierarchy_nodes"); 34 | protected static final ParseField PATHS = new ParseField("path"); 35 | 36 | /** 37 | * The bucket class of InternalPathHierarchy. 38 | * @see MultiBucketsAggregation.Bucket 39 | */ 40 | public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucket implements 41 | KeyComparable { 42 | 43 | BytesRef termBytes; 44 | long bucketOrd; 45 | protected String[] paths; 46 | protected long docCount; 47 | protected InternalAggregations aggregations; 48 | protected int level; 49 | protected int minDepth; 50 | protected String basename; 51 | 52 | public InternalBucket(long docCount, InternalAggregations aggregations, String basename, 53 | BytesRef term, int level, int minDepth, String[] paths) { 54 | termBytes = term; 55 | this.docCount = docCount; 56 | this.aggregations = aggregations; 57 | this.level = level; 58 | this.minDepth = minDepth; 59 | this.basename = basename; 60 | this.paths = paths; 61 | } 62 | 63 | /** 64 | * Read from a stream. 65 | */ 66 | public InternalBucket(StreamInput in) throws IOException { 67 | termBytes = in.readBytesRef(); 68 | docCount = in.readLong(); 69 | aggregations = InternalAggregations.readFrom(in); 70 | level = in.readInt(); 71 | minDepth = in.readInt(); 72 | basename = in.readString(); 73 | int pathsSize = in.readInt(); 74 | paths = new String[pathsSize]; 75 | for (int i=0; i < pathsSize; i++) { 76 | paths[i] = in.readString(); 77 | } 78 | } 79 | 80 | /** 81 | * Write to a stream. 82 | */ 83 | @Override 84 | public void writeTo(StreamOutput out) throws IOException { 85 | out.writeBytesRef(termBytes); 86 | out.writeLong(docCount); 87 | aggregations.writeTo(out); 88 | out.writeInt(level); 89 | out.writeInt(minDepth); 90 | out.writeString(basename); 91 | out.writeInt(paths.length); 92 | for (String path: paths) { 93 | out.writeString(path); 94 | } 95 | } 96 | 97 | @Override 98 | public String getKey() { 99 | return termBytes.utf8ToString(); 100 | } 101 | 102 | @Override 103 | public String getKeyAsString() { 104 | return termBytes.utf8ToString(); 105 | } 106 | 107 | @Override 108 | public int compareKey(InternalBucket other) { 109 | return termBytes.compareTo(other.termBytes); 110 | } 111 | 112 | @Override 113 | public long getDocCount() { 114 | return docCount; 115 | } 116 | 117 | @Override 118 | public Aggregations getAggregations() { 119 | return aggregations; 120 | } 121 | 122 | @Override 123 | public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { 124 | builder.startObject(); 125 | builder.field(CommonFields.DOC_COUNT.getPreferredName(), docCount); 126 | aggregations.toXContentInternal(builder, params); 127 | builder.endObject(); 128 | return builder; 129 | } 130 | } 131 | 132 | 133 | private List buckets; 134 | private BytesRef separator; 135 | private BucketOrder order; 136 | private final int requiredSize; 137 | private final int shardSize; 138 | private final long otherHierarchyNodes; 139 | private final long minDocCount; 140 | 141 | public InternalPathHierarchy( 142 | String name, 143 | List buckets, 144 | BucketOrder order, 145 | long minDocCount, 146 | int requiredSize, 147 | int shardSize, 148 | long otherHierarchyNodes, 149 | BytesRef separator, 150 | Map metadata 151 | ) { 152 | super(name, metadata); 153 | this.buckets = buckets; 154 | this.order = order; 155 | this.minDocCount = minDocCount; 156 | this.requiredSize = requiredSize; 157 | this.shardSize = shardSize; 158 | this.otherHierarchyNodes = otherHierarchyNodes; 159 | this.separator = separator; 160 | } 161 | 162 | /** 163 | * Read from a stream. 164 | */ 165 | public InternalPathHierarchy(StreamInput in) throws IOException { 166 | super(in); 167 | order = InternalOrder.Streams.readOrder(in); 168 | minDocCount = in.readVLong(); 169 | requiredSize = readSize(in); 170 | shardSize = readSize(in); 171 | otherHierarchyNodes = in.readVLong(); 172 | separator = in.readBytesRef(); 173 | int bucketsSize = in.readInt(); 174 | this.buckets = new ArrayList<>(bucketsSize); 175 | for (int i=0; i buckets) { 212 | return new InternalPathHierarchy(this.name, buckets, order, minDocCount, requiredSize, shardSize, otherHierarchyNodes, 213 | this.separator, this.metadata); 214 | } 215 | 216 | @Override 217 | public InternalBucket createBucket(InternalAggregations aggregations, InternalBucket prototype) { 218 | return new InternalBucket(prototype.docCount, aggregations, prototype.basename, prototype.termBytes, 219 | prototype.level, prototype.minDepth, prototype.paths); 220 | } 221 | 222 | @Override 223 | public List getBuckets() { 224 | return buckets; 225 | } 226 | 227 | /** 228 | * Reduces the given aggregations to a single one and returns it. 229 | */ 230 | @Override 231 | public InternalPathHierarchy reduce(List aggregations, ReduceContext reduceContext) { 232 | Map> buckets = null; 233 | long otherHierarchyNodes = 0; 234 | 235 | // extract buckets from aggregations 236 | for (InternalAggregation aggregation : aggregations) { 237 | InternalPathHierarchy pathHierarchy = (InternalPathHierarchy) aggregation; 238 | if (buckets == null) { 239 | buckets = new TreeMap<>(); 240 | } 241 | 242 | otherHierarchyNodes += pathHierarchy.getSumOtherHierarchyNodes(); 243 | 244 | for (InternalBucket bucket : pathHierarchy.buckets) { 245 | List existingBuckets = buckets.get(bucket.termBytes); 246 | if (existingBuckets == null) { 247 | existingBuckets = new ArrayList<>(aggregations.size()); 248 | buckets.put(bucket.termBytes, existingBuckets); 249 | } 250 | existingBuckets.add(bucket); 251 | } 252 | } 253 | 254 | // reduce and sort buckets depending of ordering rules 255 | final int size = !reduceContext.isFinalReduce() ? buckets.size() : Math.min(requiredSize, buckets.size()); 256 | PathSortedTree ordered = new PathSortedTree<>(order.comparator(), size); 257 | for (List sameTermBuckets : buckets.values()) { 258 | final InternalBucket b = reduceBucket(sameTermBuckets, reduceContext); 259 | if (b.getDocCount() >= minDocCount || !reduceContext.isFinalReduce()) { 260 | reduceContext.consumeBucketsAndMaybeBreak(1); 261 | String [] pathsForTree; 262 | if (b.minDepth > 0) { 263 | pathsForTree = Arrays.copyOfRange(b.paths, b.minDepth, b.paths.length); 264 | } else { 265 | pathsForTree = b.paths; 266 | } 267 | ordered.add(pathsForTree, b); 268 | } else { 269 | reduceContext.consumeBucketsAndMaybeBreak(-countInnerBucket(b)); 270 | } 271 | } 272 | 273 | long sum_other_hierarchy_nodes = ordered.getFullSize() - size + otherHierarchyNodes; 274 | return new InternalPathHierarchy(getName(), ordered.getAsList(), order, minDocCount, requiredSize, shardSize, 275 | sum_other_hierarchy_nodes, separator, getMetadata()); 276 | } 277 | 278 | /** 279 | * Utility method of InternalPathHierarchy.doReduce() 280 | */ 281 | @Override 282 | protected InternalBucket reduceBucket(List buckets, ReduceContext context) { 283 | List aggregationsList = new ArrayList<>(buckets.size()); 284 | InternalBucket reduced = null; 285 | for (InternalBucket bucket : buckets) { 286 | if (reduced == null) { 287 | reduced = bucket; 288 | } else { 289 | reduced.docCount += bucket.docCount; 290 | } 291 | aggregationsList.add(bucket.aggregations); 292 | } 293 | reduced.aggregations = InternalAggregations.reduce(aggregationsList, context); 294 | return reduced; 295 | } 296 | 297 | @Override 298 | public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { 299 | // builder.field(SUM_OF_OTHER_HIERARCHY_NODES.getPreferredName(), otherHierarchyNodes); 300 | Iterator bucketIterator = buckets.iterator(); 301 | builder.startArray(CommonFields.BUCKETS.getPreferredName()); 302 | InternalBucket prevBucket = null; 303 | InternalBucket currentBucket = null; 304 | while (bucketIterator.hasNext()) { 305 | currentBucket = bucketIterator.next(); 306 | 307 | if (prevBucket != null) { 308 | if (prevBucket.level == currentBucket.level) { 309 | builder.endObject(); 310 | } else if (prevBucket.level < currentBucket.level) { 311 | builder.startObject(name); 312 | builder.startArray(CommonFields.BUCKETS.getPreferredName()); 313 | } else { 314 | for (int i = currentBucket.level; i < prevBucket.level; i++) { 315 | builder.endObject(); 316 | builder.endArray(); 317 | builder.endObject(); 318 | } 319 | builder.endObject(); 320 | } 321 | } 322 | 323 | builder.startObject(); 324 | builder.field(CommonFields.KEY.getPreferredName(), currentBucket.basename); 325 | builder.field(CommonFields.DOC_COUNT.getPreferredName(), currentBucket.docCount); 326 | builder.field(PATHS.getPreferredName(), Arrays.copyOf(currentBucket.paths, currentBucket.paths.length -1)); 327 | currentBucket.getAggregations().toXContentInternal(builder, params); 328 | 329 | prevBucket = currentBucket; 330 | } 331 | 332 | if (currentBucket != null) { 333 | for (int i=0; i < currentBucket.level; i++) { 334 | builder.endObject(); 335 | builder.endArray(); 336 | builder.endObject(); 337 | } 338 | builder.endObject(); 339 | } 340 | 341 | builder.endArray(); 342 | return builder; 343 | } 344 | 345 | @Override 346 | public int hashCode() { 347 | return Objects.hash(buckets, separator, order, requiredSize, shardSize, otherHierarchyNodes, minDocCount); 348 | } 349 | 350 | @Override 351 | public boolean equals(Object obj) { 352 | InternalPathHierarchy that = (InternalPathHierarchy) obj; 353 | return Objects.equals(buckets, that.buckets) 354 | && Objects.equals(separator, that.separator) 355 | && Objects.equals(order, that.order) 356 | && Objects.equals(minDocCount, that.minDocCount) 357 | && Objects.equals(requiredSize, that.requiredSize) 358 | && Objects.equals(shardSize, that.shardSize) 359 | && Objects.equals(otherHierarchyNodes, that.otherHierarchyNodes); 360 | } 361 | } 362 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/PathHierarchyAggregationBuilder.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | import org.elasticsearch.Version; 4 | import org.elasticsearch.xcontent.ParseField; 5 | import org.elasticsearch.common.io.stream.StreamInput; 6 | import org.elasticsearch.common.io.stream.StreamOutput; 7 | import org.elasticsearch.xcontent.ObjectParser; 8 | import org.elasticsearch.xcontent.XContentBuilder; 9 | import org.elasticsearch.xcontent.XContentParser; 10 | import org.elasticsearch.search.aggregations.support.AggregationContext; 11 | import org.elasticsearch.search.aggregations.AggregationBuilder; 12 | import org.elasticsearch.search.aggregations.AggregatorFactories; 13 | import org.elasticsearch.search.aggregations.AggregatorFactory; 14 | import org.elasticsearch.search.aggregations.AggregatorFactories.Builder; 15 | import org.elasticsearch.search.aggregations.BucketOrder; 16 | import org.elasticsearch.search.aggregations.InternalOrder; 17 | import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; 18 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder; 19 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; 20 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; 21 | import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; 22 | import org.elasticsearch.search.aggregations.support.ValuesSourceType; 23 | 24 | import java.io.IOException; 25 | import java.util.List; 26 | import java.util.Map; 27 | import java.util.Objects; 28 | 29 | 30 | /** 31 | * The builder of the aggregatorFactory. Also implements the parsing of the request. 32 | */ 33 | public class PathHierarchyAggregationBuilder extends ValuesSourceAggregationBuilder { 34 | public static final String NAME = "path_hierarchy"; 35 | public static final ValuesSourceRegistry.RegistryKey REGISTRY_KEY = 36 | new ValuesSourceRegistry.RegistryKey<>(NAME, PathHierarchyAggregationSupplier.class); 37 | 38 | public static final ParseField SEPARATOR_FIELD = new ParseField("separator"); 39 | public static final ParseField MIN_DEPTH_FIELD = new ParseField("min_depth"); 40 | public static final ParseField MAX_DEPTH_FIELD = new ParseField("max_depth"); 41 | public static final ParseField KEEP_BLANK_PATH = new ParseField("keep_blank_path"); 42 | public static final ParseField DEPTH_FIELD = new ParseField("depth"); 43 | public static final ParseField ORDER_FIELD = new ParseField("order"); 44 | public static final ParseField SIZE_FIELD = new ParseField("size"); 45 | public static final ParseField SHARD_SIZE_FIELD = new ParseField("shard_size"); 46 | public static final ParseField MIN_DOC_COUNT_FIELD = new ParseField("min_doc_count"); 47 | 48 | public static final PathHierarchyAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new 49 | PathHierarchyAggregator.BucketCountThresholds(10, -1); 50 | public static final ObjectParser PARSER = 51 | ObjectParser.fromBuilder(NAME, PathHierarchyAggregationBuilder::new); 52 | static { 53 | ValuesSourceAggregationBuilder.declareFields(PARSER, true, true, false); 54 | 55 | PARSER.declareString(PathHierarchyAggregationBuilder::separator, SEPARATOR_FIELD); 56 | PARSER.declareInt(PathHierarchyAggregationBuilder::minDepth, MIN_DEPTH_FIELD); 57 | PARSER.declareInt(PathHierarchyAggregationBuilder::maxDepth, MAX_DEPTH_FIELD); 58 | PARSER.declareBoolean(PathHierarchyAggregationBuilder::keepBlankPath, KEEP_BLANK_PATH); 59 | PARSER.declareInt(PathHierarchyAggregationBuilder::depth, DEPTH_FIELD); 60 | PARSER.declareInt(PathHierarchyAggregationBuilder::size, SIZE_FIELD); 61 | PARSER.declareLong(PathHierarchyAggregationBuilder::minDocCount, MIN_DOC_COUNT_FIELD); 62 | PARSER.declareInt(PathHierarchyAggregationBuilder::shardSize, SHARD_SIZE_FIELD); 63 | PARSER.declareObjectArray(PathHierarchyAggregationBuilder::order, (p, c) -> InternalOrder.Parser.parseOrderParam(p), 64 | ORDER_FIELD); 65 | } 66 | 67 | public static AggregationBuilder parse(String aggregationName, XContentParser parser) throws IOException { 68 | return PARSER.parse(parser, new PathHierarchyAggregationBuilder(aggregationName), null); 69 | } 70 | 71 | public static void registerAggregators(ValuesSourceRegistry.Builder builder) { 72 | PathHierarchyAggregatorFactory.registerAggregators(builder); 73 | } 74 | 75 | private static final String DEFAULT_SEPARATOR = "/"; 76 | private static final int DEFAULT_MIN_DEPTH = 0; 77 | private static final int DEFAULT_MAX_DEPTH = 3; 78 | private static final boolean DEFAULT_KEEP_BLANK_PATH = false; 79 | private String separator = DEFAULT_SEPARATOR; 80 | private int minDepth = DEFAULT_MIN_DEPTH; 81 | private int maxDepth = DEFAULT_MAX_DEPTH; 82 | private boolean keepBlankPath = DEFAULT_KEEP_BLANK_PATH; 83 | private long minDocCount = 0; 84 | private int depth = -1; 85 | private BucketOrder order = BucketOrder.compound(BucketOrder.count(false)); // automatically adds tie-breaker key asc order 86 | private PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds = new PathHierarchyAggregator.BucketCountThresholds( 87 | DEFAULT_BUCKET_COUNT_THRESHOLDS); 88 | 89 | 90 | private PathHierarchyAggregationBuilder(String name) { 91 | super(name); 92 | } 93 | 94 | @Override 95 | protected boolean serializeTargetValueType(Version version) { 96 | return true; 97 | } 98 | 99 | /** 100 | * Read from a stream 101 | */ 102 | public PathHierarchyAggregationBuilder(StreamInput in) throws IOException { 103 | super(in); 104 | bucketCountThresholds = new PathHierarchyAggregator.BucketCountThresholds(in); 105 | separator = in.readString(); 106 | minDocCount = in.readVLong(); 107 | minDepth = in.readOptionalVInt(); 108 | maxDepth = in.readOptionalVInt(); 109 | keepBlankPath = in.readOptionalBoolean(); 110 | depth = in.readOptionalVInt(); 111 | order = InternalOrder.Streams.readOrder(in); 112 | } 113 | 114 | private PathHierarchyAggregationBuilder(PathHierarchyAggregationBuilder clone, Builder factoriesBuilder, 115 | Map metadata) { 116 | super(clone, factoriesBuilder, metadata); 117 | separator = clone.separator; 118 | minDepth = clone.minDepth; 119 | maxDepth = clone.maxDepth; 120 | keepBlankPath = clone.keepBlankPath; 121 | depth = clone.depth; 122 | order = clone.order; 123 | minDocCount = clone.minDocCount; 124 | this.bucketCountThresholds = new PathHierarchyAggregator.BucketCountThresholds(clone.bucketCountThresholds); 125 | } 126 | 127 | @Override 128 | protected AggregationBuilder shallowCopy(AggregatorFactories.Builder factoriesBuilder, Map metadata) { 129 | return new PathHierarchyAggregationBuilder(this, factoriesBuilder, metadata); 130 | } 131 | 132 | @Override 133 | protected ValuesSourceType defaultValueSourceType() { 134 | return CoreValuesSourceType.KEYWORD; 135 | } 136 | 137 | /** 138 | * Write to a stream 139 | */ 140 | @Override 141 | protected void innerWriteTo(StreamOutput out) throws IOException { 142 | bucketCountThresholds.writeTo(out); 143 | out.writeString(separator); 144 | out.writeVLong(minDocCount); 145 | out.writeOptionalVInt(minDepth); 146 | out.writeOptionalVInt(maxDepth); 147 | out.writeOptionalBoolean(keepBlankPath); 148 | out.writeOptionalVInt(depth); 149 | order.writeTo(out); 150 | } 151 | 152 | private PathHierarchyAggregationBuilder separator(String separator) { 153 | this.separator = separator; 154 | return this; 155 | } 156 | 157 | private PathHierarchyAggregationBuilder minDepth(int minDepth) { 158 | this.minDepth = minDepth; 159 | return this; 160 | } 161 | 162 | private PathHierarchyAggregationBuilder maxDepth(int maxDepth) { 163 | this.maxDepth = maxDepth; 164 | return this; 165 | } 166 | 167 | private PathHierarchyAggregationBuilder keepBlankPath(boolean keepBlankPath) { 168 | this.keepBlankPath = keepBlankPath; 169 | return this; 170 | } 171 | 172 | private PathHierarchyAggregationBuilder depth(int depth) { 173 | this.depth = depth; 174 | return this; 175 | } 176 | 177 | /** Set the order in which the buckets will be returned. It returns the builder so that calls 178 | * can be chained. A tie-breaker may be added to avoid non-deterministic ordering. */ 179 | private PathHierarchyAggregationBuilder order(BucketOrder order) { 180 | if (order == null) { 181 | throw new IllegalArgumentException("[order] must not be null: [" + name + "]"); 182 | } 183 | if(order instanceof InternalOrder.CompoundOrder || InternalOrder.isKeyOrder(order)) { 184 | this.order = order; // if order already contains a tie-breaker we are good to go 185 | } else { // otherwise add a tie-breaker by using a compound order 186 | this.order = BucketOrder.compound(order); 187 | } 188 | return this; 189 | } 190 | 191 | private PathHierarchyAggregationBuilder order(List orders) { 192 | if (orders == null) { 193 | throw new IllegalArgumentException("[orders] must not be null: [" + name + "]"); 194 | } 195 | // if the list only contains one order use that to avoid inconsistent xcontent 196 | order(orders.size() > 1 ? BucketOrder.compound(orders) : orders.get(0)); 197 | return this; 198 | } 199 | 200 | 201 | /** 202 | * Sets the size - indicating how many term buckets should be returned 203 | * (defaults to 10) 204 | */ 205 | public PathHierarchyAggregationBuilder size(int size) { 206 | if (size <= 0) { 207 | throw new IllegalArgumentException("[size] must be greater than 0. Found [" + size + "] in [" + name + "]"); 208 | } 209 | bucketCountThresholds.setRequiredSize(size); 210 | return this; 211 | } 212 | 213 | /** Set the minimum count of matching documents that buckets need to have 214 | * and return this builder so that calls can be chained. */ 215 | public PathHierarchyAggregationBuilder minDocCount(long minDocCount) { 216 | if (minDocCount < 0) { 217 | throw new IllegalArgumentException( 218 | "[minDocCount] must be greater than or equal to 0. Found [" + minDocCount + "] in [" + name + "]"); 219 | } 220 | this.minDocCount = minDocCount; 221 | return this; 222 | } 223 | 224 | /** 225 | * Returns the number of term buckets currently configured 226 | */ 227 | public int size() { 228 | return bucketCountThresholds.getRequiredSize(); 229 | } 230 | 231 | @Override 232 | public BucketCardinality bucketCardinality() { 233 | return BucketCardinality.MANY; 234 | } 235 | 236 | /** 237 | * Sets the shard_size - indicating the number of term buckets each shard 238 | * will return to the coordinating node (the node that coordinates the 239 | * search execution). The higher the shard size is, the more accurate the 240 | * results are. 241 | */ 242 | public PathHierarchyAggregationBuilder shardSize(int shardSize) { 243 | if (shardSize <= 0) { 244 | throw new IllegalArgumentException( 245 | "[shardSize] must be greater than 0. Found [" + shardSize + "] in [" + name + "]"); 246 | } 247 | bucketCountThresholds.setShardSize(shardSize); 248 | return this; 249 | } 250 | 251 | /** 252 | * Returns the number of term buckets per shard that are currently configured 253 | */ 254 | public int shardSize() { 255 | return bucketCountThresholds.getShardSize(); 256 | } 257 | 258 | @Override 259 | protected ValuesSourceAggregatorFactory innerBuild(AggregationContext context, 260 | ValuesSourceConfig config, 261 | AggregatorFactory parent, 262 | AggregatorFactories.Builder subFactoriesBuilder) throws IOException { 263 | 264 | 265 | if (minDepth > maxDepth) 266 | throw new IllegalArgumentException("[minDepth] (" + minDepth + ") must not be greater than [maxDepth] (" + 267 | maxDepth + ")"); 268 | 269 | if (depth >= 0) { 270 | if (minDepth > depth) 271 | throw new IllegalArgumentException("[minDepth] (" + minDepth + ") must not be greater than [depth] (" + 272 | depth + ")"); 273 | minDepth = depth; 274 | maxDepth = depth; 275 | } 276 | 277 | return new PathHierarchyAggregatorFactory( 278 | name, 279 | config, 280 | separator, 281 | minDepth, 282 | maxDepth, 283 | keepBlankPath, 284 | order, 285 | minDocCount, 286 | bucketCountThresholds, 287 | context, 288 | parent, 289 | subFactoriesBuilder, 290 | metadata); 291 | } 292 | 293 | @Override 294 | protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { 295 | builder.startObject(); 296 | 297 | if (order != null) { 298 | builder.field(ORDER_FIELD.getPreferredName()); 299 | order.toXContent(builder, params); 300 | } 301 | 302 | builder.field(MIN_DOC_COUNT_FIELD.getPreferredName(), minDocCount); 303 | 304 | if (!separator.equals(DEFAULT_SEPARATOR)) { 305 | builder.field(SEPARATOR_FIELD.getPreferredName(), separator); 306 | } 307 | 308 | if (minDepth != DEFAULT_MIN_DEPTH) { 309 | builder.field(MIN_DEPTH_FIELD.getPreferredName(), minDepth); 310 | } 311 | 312 | if (maxDepth != DEFAULT_MAX_DEPTH) { 313 | builder.field(MAX_DEPTH_FIELD.getPreferredName(), maxDepth); 314 | } 315 | 316 | if (depth != 0) { 317 | builder.field(DEPTH_FIELD.getPreferredName(), depth); 318 | } 319 | 320 | return builder.endObject(); 321 | } 322 | 323 | @Override 324 | public int hashCode() { 325 | return Objects.hash(super.hashCode(), separator, minDepth, maxDepth, depth, order, minDocCount, bucketCountThresholds); 326 | } 327 | 328 | @Override 329 | public boolean equals(Object obj) { 330 | if (this == obj) return true; 331 | if (obj == null || getClass() != obj.getClass()) return false; 332 | if (!super.equals(obj)) return false; 333 | PathHierarchyAggregationBuilder other = (PathHierarchyAggregationBuilder) obj; 334 | return Objects.equals(separator, other.separator) 335 | && Objects.equals(minDepth, other.minDepth) 336 | && Objects.equals(maxDepth, other.maxDepth) 337 | && Objects.equals(depth, other.depth) 338 | && Objects.equals(order, other.order) 339 | && Objects.equals(minDocCount, other.minDocCount) 340 | && Objects.equals(bucketCountThresholds, other.bucketCountThresholds); 341 | } 342 | 343 | @Override 344 | public String getType() { 345 | return NAME; 346 | } 347 | 348 | @Override 349 | protected ValuesSourceRegistry.RegistryKey getRegistryKey() { return REGISTRY_KEY; } 350 | } 351 | 352 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/PathHierarchyAggregationSupplier.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | import org.apache.lucene.util.BytesRef; 4 | import org.elasticsearch.search.aggregations.Aggregator; 5 | import org.elasticsearch.search.aggregations.AggregatorFactories; 6 | import org.elasticsearch.search.aggregations.BucketOrder; 7 | import org.elasticsearch.search.aggregations.CardinalityUpperBound; 8 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; 9 | import org.elasticsearch.search.internal.SearchContext; 10 | 11 | import java.io.IOException; 12 | import java.util.Map; 13 | 14 | @FunctionalInterface 15 | public interface PathHierarchyAggregationSupplier { 16 | Aggregator build(String name, 17 | AggregatorFactories factories, 18 | BytesRef separator, 19 | int minDepth, 20 | int maxDepth, 21 | boolean keepBlankPath, 22 | BucketOrder order, 23 | long minDocCount, 24 | PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds, 25 | ValuesSourceConfig valuesSourceConfig, 26 | SearchContext aggregationContext, 27 | Aggregator parent, 28 | CardinalityUpperBound cardinality, 29 | Map metadata) throws IOException; 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/PathHierarchyAggregator.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | import org.apache.lucene.index.LeafReaderContext; 4 | import org.apache.lucene.util.BytesRef; 5 | import org.apache.lucene.util.BytesRefBuilder; 6 | import org.elasticsearch.ElasticsearchException; 7 | import org.elasticsearch.common.io.stream.StreamInput; 8 | import org.elasticsearch.common.io.stream.StreamOutput; 9 | import org.elasticsearch.common.io.stream.Writeable; 10 | import org.elasticsearch.core.Releasables; 11 | import org.elasticsearch.common.util.BytesRefHash; 12 | import org.elasticsearch.xcontent.ToXContentFragment; 13 | import org.elasticsearch.xcontent.XContentBuilder; 14 | import org.elasticsearch.index.fielddata.SortedBinaryDocValues; 15 | import org.elasticsearch.search.aggregations.Aggregator; 16 | import org.elasticsearch.search.aggregations.AggregatorFactories; 17 | import org.elasticsearch.search.aggregations.BucketOrder; 18 | import org.elasticsearch.search.aggregations.CardinalityUpperBound; 19 | import org.elasticsearch.search.aggregations.InternalAggregation; 20 | import org.elasticsearch.search.aggregations.LeafBucketCollector; 21 | import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; 22 | import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; 23 | import org.elasticsearch.search.aggregations.support.ValuesSource; 24 | import org.elasticsearch.search.aggregations.support.AggregationContext; 25 | 26 | import java.io.IOException; 27 | import java.util.Arrays; 28 | import java.util.Comparator; 29 | import java.util.Iterator; 30 | import java.util.Map; 31 | import java.util.Objects; 32 | import java.util.regex.Pattern; 33 | 34 | public class PathHierarchyAggregator extends BucketsAggregator { 35 | 36 | public PathHierarchyAggregator(String name, 37 | AggregatorFactories factories, 38 | AggregationContext context, 39 | ValuesSource valuesSource, 40 | BucketOrder order, 41 | long minDocCount, 42 | BucketCountThresholds bucketCountThresholds, 43 | BytesRef separator, 44 | int minDepth, 45 | Aggregator parent, 46 | CardinalityUpperBound cardinality, 47 | Map metadata 48 | ) throws IOException { 49 | super(name, factories, context, parent, cardinality, metadata); 50 | this.valuesSource = valuesSource; 51 | this.separator = separator; 52 | this.minDocCount = minDocCount; 53 | bucketOrds = new BytesRefHash(1, context.bigArrays()); 54 | order.validate(this); 55 | this.order = order; 56 | this.partiallyBuiltBucketComparator = order == null ? null : order.partiallyBuiltBucketComparator(b -> b.bucketOrd, this); 57 | this.bucketCountThresholds = bucketCountThresholds; 58 | this.minDepth = minDepth; 59 | } 60 | 61 | public static class BucketCountThresholds implements Writeable, ToXContentFragment { 62 | private int requiredSize; 63 | private int shardSize; 64 | 65 | public BucketCountThresholds(int requiredSize, int shardSize) { 66 | this.requiredSize = requiredSize; 67 | this.shardSize = shardSize; 68 | } 69 | 70 | /** 71 | * Read from a stream. 72 | */ 73 | public BucketCountThresholds(StreamInput in) throws IOException { 74 | requiredSize = in.readInt(); 75 | shardSize = in.readInt(); 76 | } 77 | 78 | @Override 79 | public void writeTo(StreamOutput out) throws IOException { 80 | out.writeInt(requiredSize); 81 | out.writeInt(shardSize); 82 | } 83 | 84 | public BucketCountThresholds(PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds) { 85 | this(bucketCountThresholds.requiredSize, bucketCountThresholds.shardSize); 86 | } 87 | 88 | public void ensureValidity() { 89 | // shard_size cannot be smaller than size as we need to at least fetch size entries from every shards in order to return size 90 | if (shardSize < requiredSize) { 91 | setShardSize(requiredSize); 92 | } 93 | 94 | if (requiredSize <= 0 || shardSize <= 0) { 95 | throw new ElasticsearchException("parameters [required_size] and [shard_size] must be >0 in path-hierarchy aggregation."); 96 | } 97 | } 98 | 99 | public int getRequiredSize() { 100 | return requiredSize; 101 | } 102 | 103 | public void setRequiredSize(int requiredSize) { 104 | this.requiredSize = requiredSize; 105 | } 106 | 107 | public int getShardSize() { 108 | return shardSize; 109 | } 110 | 111 | public void setShardSize(int shardSize) { 112 | this.shardSize = shardSize; 113 | } 114 | 115 | @Override 116 | public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { 117 | builder.field(PathHierarchyAggregationBuilder.SIZE_FIELD.getPreferredName(), requiredSize); 118 | if (shardSize != -1) { 119 | builder.field(PathHierarchyAggregationBuilder.SHARD_SIZE_FIELD.getPreferredName(), shardSize); 120 | } 121 | return builder; 122 | } 123 | 124 | @Override 125 | public int hashCode() { 126 | return Objects.hash(requiredSize, shardSize); 127 | } 128 | 129 | @Override 130 | public boolean equals(Object obj) { 131 | if (obj == null) { 132 | return false; 133 | } 134 | if (getClass() != obj.getClass()) { 135 | return false; 136 | } 137 | PathHierarchyAggregator.BucketCountThresholds other = (PathHierarchyAggregator.BucketCountThresholds) obj; 138 | return Objects.equals(requiredSize, other.requiredSize) 139 | && Objects.equals(shardSize, other.shardSize); 140 | } 141 | } 142 | 143 | 144 | private final ValuesSource valuesSource; 145 | private final BytesRefHash bucketOrds; 146 | private final BucketOrder order; 147 | private final long minDocCount; 148 | private final int minDepth; 149 | protected final Comparator partiallyBuiltBucketComparator; 150 | private final BucketCountThresholds bucketCountThresholds; 151 | private final BytesRef separator; 152 | 153 | /** 154 | * The collector collects the docs, including or not some score (depending of the including of a Scorer) in the 155 | * collect() process. 156 | * 157 | * The LeafBucketCollector is a "Per-leaf bucket collector". It collects docs for the account of buckets. 158 | */ 159 | @Override 160 | public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { 161 | if (valuesSource == null) { 162 | return LeafBucketCollector.NO_OP_COLLECTOR; 163 | } 164 | final SortedBinaryDocValues values = valuesSource.bytesValues(ctx); 165 | return new LeafBucketCollectorBase(sub, values) { 166 | final BytesRefBuilder previous = new BytesRefBuilder(); 167 | /** 168 | * Collect the given doc in the given bucket. 169 | * Called once for every document matching a query, with the unbased document number. 170 | */ 171 | @Override 172 | public void collect(int doc, long owningBucketOrdinal) throws IOException { 173 | assert owningBucketOrdinal == 0; 174 | if (values.advanceExact(doc)) { 175 | final int valuesCount = values.docValueCount(); 176 | previous.clear(); 177 | 178 | // SortedBinaryDocValues don't guarantee uniqueness so we need to take care of dups 179 | for (int i = 0; i < valuesCount; ++i) { 180 | final BytesRef bytesValue = values.nextValue(); 181 | if (i > 0 && previous.get().equals(bytesValue)) { 182 | continue; 183 | } 184 | long bucketOrdinal = bucketOrds.add(bytesValue); 185 | if (bucketOrdinal < 0) { // already seen 186 | bucketOrdinal = - 1 - bucketOrdinal; 187 | collectExistingBucket(sub, doc, bucketOrdinal); 188 | } else { 189 | collectBucket(sub, doc, bucketOrdinal); 190 | } 191 | previous.copyBytes(bytesValue); 192 | } 193 | } 194 | } 195 | }; 196 | } 197 | 198 | @Override 199 | public InternalAggregation[] buildAggregations(long[] owningBucketOrdinals) throws IOException { 200 | 201 | InternalPathHierarchy.InternalBucket[][] topBucketsPerOrd = new InternalPathHierarchy.InternalBucket[owningBucketOrdinals.length][]; 202 | InternalPathHierarchy[] results = new InternalPathHierarchy[owningBucketOrdinals.length]; 203 | 204 | for (int ordIdx = 0; ordIdx < owningBucketOrdinals.length; ordIdx++) { 205 | assert owningBucketOrdinals[ordIdx] == 0; 206 | 207 | final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); 208 | PathSortedTree pathSortedTree = 209 | new PathSortedTree<>(partiallyBuiltBucketComparator, size); 210 | 211 | InternalPathHierarchy.InternalBucket spare; 212 | for (int i = 0; i < bucketOrds.size(); i++) { 213 | spare = new InternalPathHierarchy.InternalBucket(0, null, null, new BytesRef(), 0, 0, null); 214 | BytesRef term = new BytesRef(); 215 | bucketOrds.get(i, term); 216 | 217 | String quotedPattern = Pattern.quote(separator.utf8ToString()); 218 | 219 | String[] paths = term.utf8ToString().split(quotedPattern, -1); 220 | 221 | String[] pathsForTree; 222 | 223 | if (minDepth > 0) { 224 | pathsForTree = Arrays.copyOfRange(paths, minDepth, paths.length); 225 | } else { 226 | pathsForTree = paths; 227 | } 228 | 229 | spare.termBytes = BytesRef.deepCopyOf(term); 230 | spare.level = pathsForTree.length - 1; 231 | spare.docCount = bucketDocCount(i); 232 | spare.basename = paths[paths.length - 1]; 233 | spare.minDepth = minDepth; 234 | spare.bucketOrd = i; 235 | spare.paths = paths; 236 | 237 | pathSortedTree.add(pathsForTree, spare); 238 | 239 | } 240 | // Get the top buckets 241 | topBucketsPerOrd[ordIdx] = new InternalPathHierarchy.InternalBucket[size]; 242 | long otherHierarchyNodes = pathSortedTree.getFullSize(); 243 | Iterator iterator = pathSortedTree.consumer(); 244 | for (int i = 0; i < size; i++) { 245 | final InternalPathHierarchy.InternalBucket bucket = iterator.next(); 246 | topBucketsPerOrd[ordIdx][i] = bucket; 247 | otherHierarchyNodes -= 1; 248 | } 249 | 250 | results[ordIdx] = new InternalPathHierarchy(name, Arrays.asList(topBucketsPerOrd[ordIdx]), order, 251 | minDocCount, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), 252 | otherHierarchyNodes, separator, metadata()); 253 | } 254 | 255 | // Build sub-aggregations for pruned buckets 256 | buildSubAggsForAllBuckets( 257 | topBucketsPerOrd, 258 | b -> b.bucketOrd, 259 | (b, aggregations) -> b.aggregations = aggregations 260 | ); 261 | 262 | return results; 263 | } 264 | 265 | @Override 266 | public InternalAggregation buildEmptyAggregation() { 267 | return new InternalPathHierarchy(name, null, order, minDocCount, bucketCountThresholds.getRequiredSize(), 268 | bucketCountThresholds.getShardSize(), 0, separator, metadata()); 269 | } 270 | 271 | @Override 272 | protected void doClose() { 273 | Releasables.close(bucketOrds); 274 | } 275 | } 276 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/PathHierarchyAggregatorFactory.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | import org.apache.lucene.index.LeafReaderContext; 4 | import org.apache.lucene.util.ArrayUtil; 5 | import org.apache.lucene.util.BytesRef; 6 | import org.apache.lucene.util.BytesRefBuilder; 7 | import org.apache.lucene.util.FutureArrays; 8 | import org.elasticsearch.index.fielddata.SortedBinaryDocValues; 9 | import org.elasticsearch.index.fielddata.SortingBinaryDocValues; 10 | import org.elasticsearch.search.aggregations.Aggregator; 11 | import org.elasticsearch.search.aggregations.AggregatorFactories; 12 | import org.elasticsearch.search.aggregations.AggregatorFactory; 13 | import org.elasticsearch.search.aggregations.BucketOrder; 14 | import org.elasticsearch.search.aggregations.CardinalityUpperBound; 15 | import org.elasticsearch.search.aggregations.InternalAggregation; 16 | import org.elasticsearch.search.aggregations.InternalOrder; 17 | import org.elasticsearch.search.aggregations.NonCollectingAggregator; 18 | import org.elasticsearch.search.aggregations.bucket.BucketUtils; 19 | import org.elasticsearch.search.aggregations.support.AggregationContext; 20 | import org.elasticsearch.search.aggregations.support.ValuesSource; 21 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; 22 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; 23 | import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; 24 | import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; 25 | 26 | import java.io.IOException; 27 | import java.util.ArrayList; 28 | import java.util.Map; 29 | 30 | /** 31 | * The factory of aggregators. 32 | * ValuesSourceAggregatorFactory extends {@link AggregatorFactory} 33 | */ 34 | class PathHierarchyAggregatorFactory extends ValuesSourceAggregatorFactory { 35 | 36 | private BytesRef separator; 37 | private int minDepth; 38 | private int maxDepth; 39 | private BucketOrder order; 40 | private long minDocCount; 41 | private boolean keepBlankPath; 42 | private final PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds; 43 | 44 | PathHierarchyAggregatorFactory(String name, 45 | ValuesSourceConfig config, 46 | String separator, 47 | int minDepth, 48 | int maxDepth, 49 | boolean keepBlankPath, 50 | BucketOrder order, 51 | long minDocCount, 52 | PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds, 53 | AggregationContext context, 54 | AggregatorFactory parent, 55 | AggregatorFactories.Builder subFactoriesBuilder, 56 | Map metaData 57 | ) throws IOException { 58 | super(name, config, context, parent, subFactoriesBuilder, metaData); 59 | this.separator = new BytesRef(separator); 60 | this.minDepth = minDepth; 61 | this.maxDepth = maxDepth; 62 | this.keepBlankPath = keepBlankPath; 63 | this.order = order; 64 | this.minDocCount = minDocCount; 65 | this.bucketCountThresholds = bucketCountThresholds; 66 | } 67 | 68 | public static void registerAggregators(ValuesSourceRegistry.Builder builder) { 69 | builder.register(PathHierarchyAggregationBuilder.REGISTRY_KEY, CoreValuesSourceType.KEYWORD, (name, 70 | factories, 71 | separator, 72 | minDepth, 73 | maxDepth, 74 | keepBlankPath, 75 | order, 76 | minDocCount, 77 | bucketCountThresholds, 78 | valuesSourceConfig, 79 | aggregationContext, 80 | parent, 81 | cardinality, 82 | metadata) -> null, 83 | true); 84 | } 85 | 86 | @Override 87 | protected Aggregator createUnmapped(Aggregator parent, Map metadata) throws IOException { 88 | final InternalAggregation aggregation = new InternalPathHierarchy(name, new ArrayList<>(), order, minDocCount, 89 | bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), 0, separator, metadata); 90 | return new NonCollectingAggregator(name, context, parent, factories, metadata) { 91 | { 92 | // even in the case of an unmapped aggregator, validate the 93 | // order 94 | order.validate(this); 95 | } 96 | 97 | @Override 98 | public InternalAggregation buildEmptyAggregation() { 99 | return aggregation; 100 | } 101 | }; 102 | } 103 | 104 | @Override 105 | protected Aggregator doCreateInternal(Aggregator parent, CardinalityUpperBound cardinality, 106 | Map metadata) throws IOException { 107 | ValuesSource valuesSourceBytes = new HierarchyValuesSource(config.getValuesSource(), separator, minDepth, maxDepth, keepBlankPath); 108 | PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds = new 109 | PathHierarchyAggregator.BucketCountThresholds(this.bucketCountThresholds); 110 | if (!InternalOrder.isKeyOrder(order) 111 | && bucketCountThresholds.getShardSize() == PathHierarchyAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) { 112 | // The user has not made a shardSize selection. Use default 113 | // heuristic to avoid any wrong-ranking caused by distributed 114 | // counting 115 | bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize())); 116 | } 117 | bucketCountThresholds.ensureValidity(); 118 | return new PathHierarchyAggregator( 119 | name, factories, context, 120 | valuesSourceBytes, order, minDocCount, bucketCountThresholds, separator, minDepth, 121 | parent, cardinality, metadata); 122 | } 123 | 124 | /** 125 | * A list of per-document binary values, sorted according to {@link BytesRef}. 126 | * There might be dups however. 127 | * @see ValuesSource 128 | */ 129 | private static class HierarchyValues extends SortingBinaryDocValues { 130 | 131 | /** valuesSource is a list of per-document binary values, sorted according to {@link BytesRef#compareTo(BytesRef)} 132 | * (warning, there might be dups however). 133 | */ 134 | private SortedBinaryDocValues valuesSource; 135 | private BytesRef separator; 136 | private int minDepth; 137 | private int maxDepth; 138 | private boolean keepBlankPath; 139 | 140 | private HierarchyValues(SortedBinaryDocValues valuesSource, BytesRef separator, int minDepth, int maxDepth, 141 | boolean keepBlankPath) { 142 | this.valuesSource = valuesSource; 143 | this.separator = separator; 144 | this.minDepth = minDepth; 145 | this.maxDepth = maxDepth; 146 | this.keepBlankPath = keepBlankPath; 147 | } 148 | 149 | /** 150 | * Handles iterations on doc values: 151 | * Advance the iterator to exactly target and return whether target has a value. 152 | * target must be greater than or equal to the current doc ID and must be a valid doc ID, ie. ≥ 0 and < maxDoc. 153 | * After this method returns, docID() returns target. 154 | */ 155 | @Override 156 | public boolean advanceExact(int docId) throws IOException { 157 | if (valuesSource.advanceExact(docId)) { 158 | count = 0; 159 | int t = 0; 160 | for (int i=0; i < valuesSource.docValueCount(); i++) { 161 | int depth = 0; 162 | BytesRef val = valuesSource.nextValue(); 163 | BytesRefBuilder cleanVal = new BytesRefBuilder(); 164 | int startNewValOffset = -1; 165 | 166 | for (int offset=0; offset < val.length; offset++) { 167 | // it is a separator 168 | if (val.length - offset >= separator.length && 169 | FutureArrays.equals( 170 | separator.bytes, separator.offset, separator.offset + separator.length, 171 | val.bytes, val.offset + offset, val.offset + offset + separator.length)) { 172 | // ignore separator at the beginning 173 | if (offset == 0) { 174 | offset += separator.length -1; 175 | continue; 176 | } 177 | 178 | // A new path needs to be add 179 | if (startNewValOffset != -1) { 180 | cleanVal.append(val.bytes, val.offset + startNewValOffset, offset - startNewValOffset); 181 | if (depth >= minDepth) { 182 | values[t++].copyBytes(cleanVal); 183 | } 184 | startNewValOffset = -1; 185 | cleanVal.append(separator); 186 | depth ++; 187 | // two separators following each other 188 | } else if (keepBlankPath) { 189 | count++; 190 | growExact(); 191 | values[t++].copyBytes(cleanVal); 192 | cleanVal.append(separator); 193 | depth ++; 194 | } 195 | 196 | if (maxDepth >= 0 && depth > maxDepth) { 197 | break; 198 | } 199 | offset += separator.length - 1; 200 | } else { 201 | if (startNewValOffset == -1) { 202 | startNewValOffset = offset; 203 | if (depth >= minDepth) { 204 | count++; 205 | growExact(); 206 | } 207 | } 208 | } 209 | } 210 | 211 | if (startNewValOffset != -1 && minDepth <= depth) { 212 | cleanVal.append(val.bytes, val.offset + startNewValOffset, val.length - startNewValOffset); 213 | values[t++].copyBytes(cleanVal); 214 | } 215 | 216 | } 217 | sort(); // sort values that are stored between offsets 0 and count of values 218 | return true; 219 | } else 220 | return false; 221 | } 222 | 223 | final void growExact() { 224 | if (values.length < count) { 225 | final int oldLen = values.length; 226 | values = ArrayUtil.growExact(values, count); 227 | for (int i = oldLen; i < count; ++i) { 228 | values[i] = new BytesRefBuilder(); 229 | } 230 | } 231 | } 232 | } 233 | 234 | /** 235 | * To get ValuesSource as sorted bytes. 236 | */ 237 | private static class HierarchyValuesSource extends ValuesSource.Bytes { 238 | private final ValuesSource values; 239 | private final BytesRef separator; 240 | private final int minDepth; 241 | private final int maxDepth; 242 | private final boolean twoSepAsOne; 243 | 244 | private HierarchyValuesSource(ValuesSource values, BytesRef separator, int minDepth, int maxDepth, boolean twoSepAsOne){ 245 | this.values = values; 246 | this.separator = separator; 247 | this.minDepth = minDepth; 248 | this.maxDepth = maxDepth; 249 | this.twoSepAsOne = twoSepAsOne; 250 | } 251 | 252 | @Override 253 | public SortedBinaryDocValues bytesValues(LeafReaderContext context) throws IOException { 254 | return new HierarchyValues(values.bytesValues(context), separator, minDepth, maxDepth, twoSepAsOne); 255 | } 256 | 257 | } 258 | } 259 | 260 | -------------------------------------------------------------------------------- /src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/PathSortedTree.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket; 2 | 3 | 4 | import java.util.ArrayList; 5 | import java.util.Comparator; 6 | import java.util.Iterator; 7 | import java.util.List; 8 | import java.util.NoSuchElementException; 9 | import java.util.PriorityQueue; 10 | import java.util.Stack; 11 | 12 | public class PathSortedTree implements Iterable{ 13 | 14 | private Comparator comparator; 15 | private Node root; 16 | private int size = -1; 17 | private int fullSize = 0; 18 | 19 | public PathSortedTree(Comparator comparator) { 20 | root = new Node<>(comparator); 21 | this.comparator = comparator; 22 | } 23 | 24 | public PathSortedTree(Comparator comparator, int size) { 25 | this(comparator); 26 | this.size = size; 27 | } 28 | 29 | public int getFullSize() { 30 | return fullSize; 31 | } 32 | 33 | public void add(K[] path, T element) { 34 | /* Please note that paths in path must be descending-sorted by level. */ 35 | Node currentNode = root; 36 | for (K k : path) { 37 | boolean newChild = true; 38 | for (Node child : currentNode.children) { 39 | if (child.key.equals(k)) { 40 | currentNode = child; 41 | newChild = false; 42 | break; 43 | } 44 | } 45 | if (newChild) { 46 | Node newNode = new Node<>(k, comparator, element, currentNode); 47 | currentNode.children.add(newNode); 48 | fullSize ++; 49 | break; 50 | } 51 | } 52 | } 53 | 54 | 55 | public List getAsList() { 56 | 57 | List result = new ArrayList<>(fullSize); 58 | 59 | Iterator iterator = consumer(); 60 | 61 | while (iterator.hasNext()){ 62 | result.add(iterator.next()); 63 | } 64 | return result; 65 | } 66 | 67 | public Iterator consumer() { 68 | return new PathSortedTreeConsumer(root, fullSize); 69 | } 70 | 71 | 72 | @Override 73 | public Iterator iterator() { 74 | return new PathSortedTreeIterator(root); 75 | } 76 | 77 | public static class Node { 78 | private K key; 79 | private T data; 80 | private Node parent; 81 | 82 | private PriorityQueue> children; 83 | 84 | Node() { 85 | this.children = new PriorityQueue<>(); 86 | } 87 | 88 | public Node(Comparator comparator) { 89 | this.children = new PriorityQueue<>(getComparator(comparator)); 90 | } 91 | 92 | Comparator> getComparator(Comparator comparator) { 93 | return (n1, n2) -> comparator.compare(n1.data, n2.data); 94 | } 95 | 96 | 97 | public Node(K key, Comparator comparator, T data, Node parent) { 98 | this.key = key; 99 | this.data = data; 100 | this.children = new PriorityQueue<>(getComparator(comparator)); 101 | this.parent = parent; 102 | } 103 | } 104 | 105 | private class PathSortedTreeIterator implements Iterator { 106 | 107 | private Stack>> iterators; 108 | Iterator> current; 109 | 110 | PathSortedTreeIterator(Node root) { 111 | current = root.children.iterator(); 112 | iterators = new Stack<>(); 113 | } 114 | 115 | @Override 116 | public boolean hasNext() { 117 | return current.hasNext(); 118 | } 119 | 120 | @Override 121 | public T next() { 122 | 123 | Node nextNode = current.next(); 124 | 125 | if (! nextNode.children.isEmpty()) { 126 | iterators.push(current); 127 | current = nextNode.children.iterator(); 128 | } else if (! current.hasNext()){ 129 | while (! iterators.empty()) { 130 | current = iterators.pop(); 131 | if (current.hasNext()) { 132 | break; 133 | } 134 | } 135 | } 136 | 137 | return nextNode.data; 138 | 139 | } 140 | } 141 | 142 | private class PathSortedTreeConsumer implements Iterator { 143 | 144 | Node cursor; 145 | 146 | int currentSize = 0; 147 | int iteratorFullSize; 148 | 149 | PathSortedTreeConsumer(Node root, int fullSize) { 150 | iteratorFullSize = fullSize; 151 | cursor = root; 152 | } 153 | 154 | @Override 155 | public boolean hasNext() { 156 | if (size >=0 && currentSize >= size) { 157 | return false; 158 | } 159 | if (cursor.children.size() > 0) { 160 | return true; 161 | } 162 | 163 | return currentSize < iteratorFullSize; 164 | } 165 | 166 | @Override 167 | public T next() { 168 | 169 | Node nextNode = null; 170 | while (nextNode == null) { 171 | nextNode = cursor.children.poll(); 172 | if (nextNode == null) { 173 | if (cursor.parent == null) { 174 | break; 175 | } 176 | cursor = cursor.parent; 177 | } 178 | } 179 | if (nextNode == null) throw new NoSuchElementException(); 180 | currentSize ++; 181 | fullSize --; 182 | cursor = nextNode; 183 | return nextNode.data; 184 | 185 | } 186 | } 187 | 188 | } 189 | -------------------------------------------------------------------------------- /src/test/java/org/opendatasoft/elasticsearch/PathHierarchyTests.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch; 2 | 3 | import org.elasticsearch.xcontent.XContentParser; 4 | import org.elasticsearch.xcontent.json.JsonXContent; 5 | import org.elasticsearch.search.aggregations.AggregationBuilder; 6 | import org.elasticsearch.test.ESTestCase; 7 | import org.opendatasoft.elasticsearch.search.aggregations.bucket.PathHierarchyAggregationBuilder; 8 | 9 | public class PathHierarchyTests extends ESTestCase { 10 | public void testParser() throws Exception { 11 | // can create the factory with utf8 separator 12 | String separator = "夢"; 13 | XContentParser stParser = createParser(JsonXContent.jsonXContent, 14 | "{\"field\":\"path\", \"separator\": \"" + separator + "\"}"); 15 | XContentParser.Token token = stParser.nextToken(); 16 | assertSame(XContentParser.Token.START_OBJECT, token); 17 | assertNotNull(PathHierarchyAggregationBuilder.parse("path_hierarchy", stParser)); 18 | 19 | // can create the factory with an array of orders 20 | String orders = "[{\"_key\": \"asc\"}, {\"_count\": \"desc\"}]"; 21 | stParser = createParser(JsonXContent.jsonXContent, 22 | "{\"field\":\"path\", \"order\": " + orders + "}"); 23 | assertNotNull(PathHierarchyAggregationBuilder.parse("path_hierarchy", stParser)); 24 | stParser = createParser(JsonXContent.jsonXContent, 25 | "{\"field\":\"path\", \"separator\":\"/\", \"order\": " + orders + ", \"min_depth\": 0, \"max_depth\": 3}"); 26 | AggregationBuilder builder = PathHierarchyAggregationBuilder.parse("path_hierarchy", stParser); 27 | assertNotNull(builder); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/yamlRestTest/java/org/opendatasoft/elasticsearch/RestApiYamlIT.java: -------------------------------------------------------------------------------- 1 | package org.opendatasoft.elasticsearch; 2 | 3 | import com.carrotsearch.randomizedtesting.annotations.Name; 4 | import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; 5 | import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; 6 | import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; 7 | 8 | /* 9 | * Generic loader for yaml integration tests 10 | */ 11 | 12 | public class RestApiYamlIT extends ESClientYamlSuiteTestCase { 13 | public RestApiYamlIT (@Name("yaml") ClientYamlTestCandidate testCandidate) { 14 | super(testCandidate); 15 | } 16 | 17 | @ParametersFactory 18 | public static Iterable parameters() throws Exception { 19 | return ESClientYamlSuiteTestCase.createParameters(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/10_basic.yml: -------------------------------------------------------------------------------- 1 | "PathHierarchy Aggregation plugin installed": 2 | - do: 3 | cluster.state: {} 4 | 5 | - set: {master_node: master} 6 | 7 | - do: 8 | nodes.info: {} 9 | 10 | - match: {nodes.$master.plugins.0.name: pathhierarchy-aggregation} 11 | -------------------------------------------------------------------------------- /src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/20_path_hierarchy.yml: -------------------------------------------------------------------------------- 1 | setup: 2 | - do: 3 | indices.create: 4 | index: filesystem 5 | body: 6 | settings: 7 | number_of_shards: 1 8 | number_of_replicas: 0 9 | mappings: 10 | properties: 11 | path: 12 | type: keyword 13 | views: 14 | type: integer 15 | 16 | - do: 17 | cluster.health: 18 | wait_for_status: green 19 | 20 | --- 21 | "Test with filesystem arborescence": 22 | - do: 23 | index: 24 | index: filesystem 25 | id: 1 26 | body: { "path": "/My documents/Spreadsheets/Budget_2013.xls", "views": 10 } 27 | 28 | - do: 29 | index: 30 | index: filesystem 31 | id: 2 32 | body: { "path": "/My documents/Spreadsheets/Budget_2014.xls", "views": 7 } 33 | 34 | - do: 35 | index: 36 | index: filesystem 37 | id: 3 38 | body: { "path": "/My documents/Test.txt", "views": 1 } 39 | 40 | - do: 41 | index: 42 | index: filesystem 43 | id: 4 44 | body: { "path": "/My documents/Spreadsheets//Budget_2014.xls", "views": 12 } 45 | 46 | - do: 47 | indices.refresh: {} 48 | 49 | 50 | # basic test 51 | - do: 52 | search: 53 | rest_total_hits_as_int: true 54 | body: { 55 | "size" : 0, 56 | "aggs" : { 57 | "tree" : { 58 | "path_hierarchy" : { 59 | "field" : "path", 60 | "separator": "/", 61 | "order": [ {"_count": "desc"}, {"_key": "asc"}], 62 | "min_depth": 0, 63 | "max_depth": 3 64 | }, 65 | "aggs": { 66 | "total_views": { 67 | "sum": { 68 | "field": "views" 69 | } 70 | } 71 | } 72 | } 73 | } 74 | } 75 | 76 | - match: { hits.total: 4 } 77 | 78 | - match: { aggregations.tree.buckets.0.key: "My documents" } 79 | - match: { aggregations.tree.buckets.0.doc_count: 4 } 80 | - match: { aggregations.tree.buckets.0.total_views.value: 30 } 81 | 82 | - match: { aggregations.tree.buckets.0.tree.buckets.0.key: "Spreadsheets" } 83 | - match: { aggregations.tree.buckets.0.tree.buckets.0.doc_count: 3 } 84 | - match: { aggregations.tree.buckets.0.tree.buckets.0.total_views.value: 29 } 85 | 86 | - match: { aggregations.tree.buckets.0.tree.buckets.1.key: "Test.txt" } 87 | - match: { aggregations.tree.buckets.0.tree.buckets.1.doc_count: 1 } 88 | - match: { aggregations.tree.buckets.0.tree.buckets.1.total_views.value: 1 } 89 | 90 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.key: "Budget_2014.xls" } 91 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.doc_count: 2 } 92 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.total_views.value: 19 } 93 | 94 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.key: "Budget_2013.xls" } 95 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.doc_count: 1 } 96 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.total_views.value: 10 } 97 | 98 | 99 | # test size 100 | - do: 101 | search: 102 | rest_total_hits_as_int: true 103 | body: { 104 | "size" : 0, 105 | "aggs" : { 106 | "tree" : { 107 | "path_hierarchy" : { 108 | "field" : "path", 109 | "size": 2 110 | } 111 | } 112 | } 113 | } 114 | 115 | - match: { hits.total: 4 } 116 | 117 | - match: { aggregations.tree.buckets.0.key: "My documents" } 118 | - match: { aggregations.tree.buckets.0.doc_count: 4 } 119 | 120 | - match: { aggregations.tree.buckets.0.tree.buckets.0.key: "Spreadsheets" } 121 | - match: { aggregations.tree.buckets.0.tree.buckets.0.doc_count: 3 } 122 | 123 | 124 | # test depth 125 | - do: 126 | search: 127 | rest_total_hits_as_int: true 128 | body: { 129 | "size" : 0, 130 | "aggs" : { 131 | "tree" : { 132 | "path_hierarchy" : { 133 | "field" : "path", 134 | "separator": "/", 135 | "order": [{"_count": "desc"}, {"_key": "asc"}], 136 | "depth": 2 137 | }, 138 | "aggs": { 139 | "total_views": { 140 | "sum": { 141 | "field": "views" 142 | } 143 | } 144 | } 145 | } 146 | } 147 | } 148 | 149 | - match: { hits.total: 4 } 150 | 151 | - match: { aggregations.tree.buckets.0.key: "Budget_2014.xls" } 152 | - match: { aggregations.tree.buckets.0.doc_count: 2 } 153 | - match: { aggregations.tree.buckets.0.total_views.value: 19 } 154 | 155 | - match: { aggregations.tree.buckets.1.key: "Budget_2013.xls" } 156 | - match: { aggregations.tree.buckets.1.doc_count: 1 } 157 | - match: { aggregations.tree.buckets.1.total_views.value: 10 } 158 | 159 | 160 | # test keep_blank_path 161 | - do: 162 | search: 163 | rest_total_hits_as_int: true 164 | body: { 165 | "size" : 0, 166 | "aggs" : { 167 | "tree" : { 168 | "path_hierarchy" : { 169 | "field" : "path", 170 | "separator": "/", 171 | "order": [{"_count": "desc"}, {"_key": "asc"}], 172 | "keep_blank_path": "true" 173 | }, 174 | "aggs": { 175 | "total_views": { 176 | "sum": { 177 | "field": "views" 178 | } 179 | } 180 | } 181 | } 182 | } 183 | } 184 | 185 | - match: { hits.total: 4 } 186 | 187 | - match: { aggregations.tree.buckets.0.key: "My documents" } 188 | - match: { aggregations.tree.buckets.0.doc_count: 4 } 189 | - match: { aggregations.tree.buckets.0.total_views.value: 30 } 190 | 191 | - match: { aggregations.tree.buckets.0.tree.buckets.0.key: "Spreadsheets" } 192 | - match: { aggregations.tree.buckets.0.tree.buckets.0.doc_count: 3 } 193 | - match: { aggregations.tree.buckets.0.tree.buckets.0.total_views.value: 29 } 194 | 195 | - match: { aggregations.tree.buckets.0.tree.buckets.1.key: "Test.txt" } 196 | - match: { aggregations.tree.buckets.0.tree.buckets.1.doc_count: 1 } 197 | - match: { aggregations.tree.buckets.0.tree.buckets.1.total_views.value: 1 } 198 | 199 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.key: "" } 200 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.doc_count: 1 } 201 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.total_views.value: 12 } 202 | 203 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.key: "Budget_2013.xls" } 204 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.doc_count: 1 } 205 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.total_views.value: 10 } 206 | 207 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.2.key: "Budget_2014.xls" } 208 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.2.doc_count: 1 } 209 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.2.total_views.value: 7 } 210 | 211 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.tree.buckets.0.key: "Budget_2014.xls" } 212 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.tree.buckets.0.doc_count: 1 } 213 | - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.tree.buckets.0.total_views.value: 12 } 214 | 215 | # test multi characters separator 216 | - do: 217 | search: 218 | rest_total_hits_as_int: true 219 | body: { 220 | "size" : 0, 221 | "aggs" : { 222 | "tree" : { 223 | "path_hierarchy" : { 224 | "field" : "path", 225 | "separator": "doc", 226 | "order": [{"_key": "asc"}], 227 | }, 228 | "aggs": { 229 | "total_views": { 230 | "sum": { 231 | "field": "views" 232 | } 233 | } 234 | } 235 | } 236 | } 237 | } 238 | 239 | - match: { hits.total: 4 } 240 | 241 | - match: { aggregations.tree.buckets.0.key: "/My " } 242 | - match: { aggregations.tree.buckets.0.doc_count: 4 } 243 | - match: { aggregations.tree.buckets.0.total_views.value: 30 } 244 | 245 | - match: { aggregations.tree.buckets.0.tree.buckets.0.key: "uments/Spreadsheets//Budget_2014.xls" } 246 | - match: { aggregations.tree.buckets.0.tree.buckets.0.doc_count: 1 } 247 | - match: { aggregations.tree.buckets.0.tree.buckets.0.total_views.value: 12 } 248 | 249 | - match: { aggregations.tree.buckets.0.tree.buckets.1.key: "uments/Spreadsheets/Budget_2013.xls" } 250 | - match: { aggregations.tree.buckets.0.tree.buckets.1.doc_count: 1 } 251 | - match: { aggregations.tree.buckets.0.tree.buckets.1.total_views.value: 10 } 252 | 253 | - match: { aggregations.tree.buckets.0.tree.buckets.2.key: "uments/Spreadsheets/Budget_2014.xls" } 254 | - match: { aggregations.tree.buckets.0.tree.buckets.2.doc_count: 1 } 255 | - match: { aggregations.tree.buckets.0.tree.buckets.2.total_views.value: 7 } 256 | 257 | - match: { aggregations.tree.buckets.0.tree.buckets.3.key: "uments/Test.txt" } 258 | - match: { aggregations.tree.buckets.0.tree.buckets.3.doc_count: 1 } 259 | - match: { aggregations.tree.buckets.0.tree.buckets.3.total_views.value: 1 } 260 | 261 | ## test sum_other_hierarchy_nodes 262 | # - do: 263 | # search: 264 | # body: { 265 | # "size" : 0, 266 | # "aggs" : { 267 | # "tree" : { 268 | # "path_hierarchy" : { 269 | # "field" : "path", 270 | # "size": 1 271 | # } 272 | # } 273 | # } 274 | # } 275 | # 276 | # - match: { hits.total: 4 } 277 | # 278 | # - match: { aggregations.tree.sum_other_hierarchy_nodes: 4 } 279 | -------------------------------------------------------------------------------- /src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/30_date_hierarchy.yml: -------------------------------------------------------------------------------- 1 | setup: 2 | - do: 3 | indices.create: 4 | index: calendar 5 | body: 6 | settings: 7 | number_of_shards: 2 8 | number_of_replicas: 0 9 | mappings: 10 | properties: 11 | date: 12 | type: date 13 | 14 | - do: 15 | cluster.health: 16 | wait_for_status: green 17 | 18 | --- 19 | "Test with date hierarchy": 20 | - do: 21 | index: 22 | index: calendar 23 | id: 1 24 | body: { "date": "2012-01-10T02:47:28" } 25 | 26 | - do: 27 | index: 28 | index: calendar 29 | id: 2 30 | body: { "date": "2011-01-05T01:43:35" } 31 | 32 | - do: 33 | index: 34 | index: calendar 35 | id: 3 36 | body: { "date": "2012-05-01T12:24:19" } 37 | 38 | - do: 39 | indices.refresh: {} 40 | 41 | 42 | # test years interval 43 | - do: 44 | search: 45 | rest_total_hits_as_int: true 46 | body: { 47 | "size" : 0, 48 | "aggs" : { 49 | "tree" : { 50 | "date_hierarchy" : { 51 | "field" : "date", 52 | "interval": "years", 53 | "order": [{"_key": "asc"}], 54 | } 55 | } 56 | } 57 | } 58 | 59 | - match: { hits.total: 3 } 60 | 61 | - match: { aggregations.tree.buckets.0.key: "2011" } 62 | - match: { aggregations.tree.buckets.0.doc_count: 1 } 63 | 64 | - match: { aggregations.tree.buckets.1.key: "2012" } 65 | - match: { aggregations.tree.buckets.1.doc_count: 2 } 66 | 67 | 68 | # test months interval 69 | - do: 70 | search: 71 | rest_total_hits_as_int: true 72 | body: { 73 | "size": 0, 74 | "aggs": { 75 | "tree": { 76 | "date_hierarchy": { 77 | "field": "date", 78 | "interval": "months", 79 | "order": [{"_key": "asc"}], 80 | } 81 | } 82 | } 83 | } 84 | 85 | - match: { hits.total: 3 } 86 | 87 | - match: { aggregations.tree.buckets.0.key: "2011" } 88 | - match: { aggregations.tree.buckets.0.doc_count: 1 } 89 | 90 | - match: { aggregations.tree.buckets.1.key: "2012" } 91 | - match: { aggregations.tree.buckets.1.doc_count: 2 } 92 | 93 | - match: { aggregations.tree.buckets.0.tree.buckets.0.key: "01" } 94 | - match: { aggregations.tree.buckets.0.tree.buckets.0.doc_count: 1 } 95 | 96 | - match: { aggregations.tree.buckets.1.tree.buckets.0.key: "01" } 97 | - match: { aggregations.tree.buckets.1.tree.buckets.0.doc_count: 1 } 98 | 99 | - match: { aggregations.tree.buckets.1.tree.buckets.1.key: "05" } 100 | - match: { aggregations.tree.buckets.1.tree.buckets.1.doc_count: 1 } 101 | -------------------------------------------------------------------------------- /src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/40_max_buckets_path_hierarchy.yml: -------------------------------------------------------------------------------- 1 | setup: 2 | - do: 3 | indices.create: 4 | index: filesystem 5 | body: 6 | settings: 7 | number_of_shards: 1 8 | number_of_replicas: 0 9 | mappings: 10 | properties: 11 | path: 12 | type: keyword 13 | views: 14 | type: integer 15 | 16 | - do: 17 | cluster.put_settings: 18 | body: 19 | transient: 20 | search.max_buckets: "3" 21 | flat_settings: true 22 | 23 | - do: 24 | cluster.health: 25 | wait_for_status: green 26 | 27 | --- 28 | teardown: 29 | 30 | - do: 31 | cluster.put_settings: 32 | body: 33 | transient: 34 | search.max_buckets: null 35 | 36 | --- 37 | "Test with filesystem arborescence": 38 | - do: 39 | index: 40 | index: filesystem 41 | id: 1 42 | body: { "path": "/Spreadsheets/Budget_2013.xls", "views": 10 } 43 | 44 | - do: 45 | index: 46 | index: filesystem 47 | id: 2 48 | body: { "path": "/Spreadsheets/Budget_2014.xls", "views": 7 } 49 | 50 | - do: 51 | index: 52 | index: filesystem 53 | id: 3 54 | body: { "path": "/My documents/Test.txt", "views": 1 } 55 | 56 | - do: 57 | index: 58 | index: filesystem 59 | id: 4 60 | body: { "path": "/Spreadsheets/Budget_2014.xls", "views": 12 } 61 | 62 | - do: 63 | indices.refresh: {} 64 | 65 | 66 | # Search with limited size 67 | - do: 68 | search: 69 | rest_total_hits_as_int: true 70 | body: { 71 | "size" : 0, 72 | "aggs" : { 73 | "tree" : { 74 | "path_hierarchy" : { 75 | "field" : "path", 76 | "separator": "/", 77 | "size": "3", 78 | "shard_size": "3" 79 | }, 80 | "aggs": { 81 | "total_views": { 82 | "sum": { 83 | "field": "views" 84 | } 85 | } 86 | } 87 | } 88 | } 89 | } 90 | 91 | - match: { hits.total: 4 } 92 | -------------------------------------------------------------------------------- /src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/50_max_buckets_date_hierarchy.yml: -------------------------------------------------------------------------------- 1 | setup: 2 | - do: 3 | indices.create: 4 | index: calendar 5 | body: 6 | settings: 7 | number_of_shards: 1 8 | number_of_replicas: 0 9 | mappings: 10 | properties: 11 | date: 12 | type: date 13 | 14 | - do: 15 | cluster.put_settings: 16 | body: 17 | transient: 18 | search.max_buckets: "3" 19 | flat_settings: true 20 | 21 | - do: 22 | cluster.health: 23 | wait_for_status: green 24 | 25 | --- 26 | teardown: 27 | 28 | - do: 29 | cluster.put_settings: 30 | body: 31 | transient: 32 | search.max_buckets: null 33 | 34 | --- 35 | "Test with date hierarchy": 36 | - do: 37 | index: 38 | index: calendar 39 | id: 1 40 | body: { "date": "2012-01-10T02:47:28" } 41 | 42 | - do: 43 | index: 44 | index: calendar 45 | id: 2 46 | body: { "date": "2011-01-05T01:43:35" } 47 | 48 | - do: 49 | index: 50 | index: calendar 51 | id: 3 52 | body: { "date": "2012-05-01T12:24:19" } 53 | 54 | - do: 55 | index: 56 | index: calendar 57 | id: 4 58 | body: { "date": "2020-05-01T12:24:19" } 59 | 60 | - do: 61 | indices.refresh: {} 62 | 63 | 64 | # Search with limited size 65 | - do: 66 | search: 67 | rest_total_hits_as_int: true 68 | body: { 69 | "size": 0, 70 | "aggs": { 71 | "tree": { 72 | "date_hierarchy": { 73 | "field": "date", 74 | "interval": "months", 75 | "order": [{"_key": "asc"}], 76 | "size" : 3, 77 | "shard_size": 3 78 | } 79 | } 80 | } 81 | } 82 | 83 | - match: { hits.total: 4 } 84 | -------------------------------------------------------------------------------- /src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/60_path_hierarchy_multi_buckets.yml: -------------------------------------------------------------------------------- 1 | setup: 2 | - do: 3 | indices.create: 4 | index: filesystem 5 | body: 6 | settings: 7 | number_of_shards: 1 8 | number_of_replicas: 0 9 | mappings: 10 | properties: 11 | path1: 12 | type: keyword 13 | path2: 14 | type: keyword 15 | 16 | - do: 17 | cluster.health: 18 | wait_for_status: green 19 | 20 | --- 21 | "Test with filesystem arborescence": 22 | - do: 23 | index: 24 | index: filesystem 25 | id: 1 26 | body: { "path1": "/My documents/Spreadsheets/Budget_2013.xls", "path2": "/My documents/Spreadsheets/Budget_2014.xls" } 27 | 28 | - do: 29 | index: 30 | index: filesystem 31 | id: 2 32 | body: { "path1": "/My documents/Spreadsheets/Budget_2014.xls", "path2": "/My documents/Spreadsheets/Budget_2013.xls" } 33 | 34 | - do: 35 | indices.refresh: {} 36 | 37 | 38 | # basic test 39 | - do: 40 | search: 41 | rest_total_hits_as_int: true 42 | body: { 43 | "size" : 0, 44 | "aggs": { 45 | "_path1_agg": { 46 | "path_hierarchy": { 47 | "field": "path1", 48 | "order": { 49 | "_key": "asc" 50 | }, 51 | "shard_size": 100, 52 | "size": 20000, 53 | "min_doc_count": 0 54 | } 55 | }, 56 | "_path2_agg": { 57 | "path_hierarchy": { 58 | "field": "path2", 59 | "order": { 60 | "_key": "asc" 61 | }, 62 | "shard_size": 100, 63 | "size": 20000, 64 | "min_doc_count": 0 65 | } 66 | } 67 | } 68 | } 69 | 70 | - match: { aggregations._path1_agg.buckets.0.key: "My documents" } 71 | 72 | - match: { aggregations._path2_agg.buckets.0.key: "My documents" } 73 | --------------------------------------------------------------------------------