├── .env
├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── LICENSE
├── README.md
├── build.gradle
├── build.sh
├── docker-compose.yml
├── docker
    └── Dockerfile
├── gradle.properties
├── gradle
    └── wrapper
    │   ├── gradle-wrapper.jar
    │   └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── prepare-version.sh
└── src
    ├── main
        └── java
        │   └── org
        │       └── opendatasoft
        │           └── elasticsearch
        │               ├── plugin
        │                   └── PathHierarchyAggregation.java
        │               └── search
        │                   └── aggregations
        │                       └── bucket
        │                           ├── DateHierarchyAggregationBuilder.java
        │                           ├── DateHierarchyAggregationSupplier.java
        │                           ├── DateHierarchyAggregator.java
        │                           ├── DateHierarchyAggregatorFactory.java
        │                           ├── InternalDateHierarchy.java
        │                           ├── InternalPathHierarchy.java
        │                           ├── PathHierarchyAggregationBuilder.java
        │                           ├── PathHierarchyAggregationSupplier.java
        │                           ├── PathHierarchyAggregator.java
        │                           ├── PathHierarchyAggregatorFactory.java
        │                           └── PathSortedTree.java
    ├── test
        └── java
        │   └── org
        │       └── opendatasoft
        │           └── elasticsearch
        │               └── PathHierarchyTests.java
    └── yamlRestTest
        ├── java
            └── org
            │   └── opendatasoft
            │       └── elasticsearch
            │           └── RestApiYamlIT.java
        └── resources
            └── rest-api-spec
                └── test
                    └── PathHierarchy
                        ├── 10_basic.yml
                        ├── 20_path_hierarchy.yml
                        ├── 30_date_hierarchy.yml
                        ├── 40_max_buckets_path_hierarchy.yml
                        ├── 50_max_buckets_date_hierarchy.yml
                        └── 60_path_hierarchy_multi_buckets.yml


/.env:
--------------------------------------------------------------------------------
1 | ES_VERSION=7.17.28
2 | PLUGIN_VERSION=7.17.28.0
3 | JAVA_COMPILER_VERSION=17
4 | GRADLE_VERSION=8.10.2
5 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | jobs:
 7 |   build:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - name: Checkout sources
11 |       uses: actions/checkout@v4
12 |     - name: Setup Java
13 |       uses: actions/setup-java@v4
14 |       with:
15 |         distribution: 'temurin'
16 |         java-version: 17
17 |     - name: Setup Gradle
18 |       uses: gradle/actions/setup-gradle@v4
19 |     - name: Build with Gradle
20 |       run: ./gradlew build
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .gradle/
3 | .idea/
4 | build/
5 | *.iml
6 | *.log
7 | .vscode/
8 | 
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2018
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Elasticsearch Aggregation Path Hierarchy Plugin
  2 | =========================================
  3 | 
  4 | This plugin adds the possibility to create hierarchical aggregations.
  5 | Each term is split on a provided separator (default "/") then aggregated by level.
  6 | For a complete example see https://github.com/elastic/elasticsearch/issues/8896
  7 | 
  8 | Two different aggregations are available:
  9 |  - `path_hierarchy` for hierarchical aggregations on `keywords` field or `scripts`
 10 |  - `date_hierarchy` for hierachical aggregations on `date` fields. It is more optimized to use this aggregation for date instead of a script.
 11 | 
 12 | This is a multi bucket aggregation.
 13 | 
 14 | 
 15 | Installation
 16 | ------------
 17 | 
 18 | To install it, launch this command in Elasticsearch directory replacing the url with a release suiting your case (please check available releases [here](https://github.com/opendatasoft/elasticsearch-aggregation-pathhierarchy/releases)):
 19 | `./bin/elasticsearch-plugin install https://github.com/opendatasoft/elasticsearch-aggregation-pathhierarchy/releases/download/v7.17.28.0/pathhierarchy-aggregation-7.17.28.0.zip`
 20 | 
 21 | Build
 22 | -----
 23 | 
 24 | Built with Java 17 and Gradle 8.10.2.
 25 | 
 26 | The first 3 digits of plugin version is Elasticsearch versioning. The last digit is used for plugin versioning under an elasticsearch version.
 27 | 
 28 | 
 29 | Upgrade the plugin
 30 | ------------
 31 | 
 32 | In order to upgrade the plugin, one can simply run for example `./prepare-version.sh 7.17.28`. It will:
 33 | - fetch dependencies versions form Elasticsearch internals
 34 | - upgrade the configuration files
 35 | 
 36 | You can then run `./build.sh` that will build the plugin in a docker container using gradle.
 37 | If successful, the plugin will be available in `./build/distributions/` (under the name `pathhierarchy-aggregation-7.17.28.0.zip` in this example).
 38 | 
 39 | 
 40 | 
 41 | Development Environment Setup
 42 | ------------
 43 | 
 44 | Build the plugin using gradle:
 45 | 
 46 | `./gradlew build`
 47 | 
 48 | OR
 49 | 
 50 | `./gradlew assemble` (to avoid tests)
 51 | 
 52 | In case you have to upgrade Gradle, you can do it with `./gradlew wrapper --gradler-version x.y.z`.
 53 | 
 54 | Then the following command will start a dockerized ES and will install the previously built plugin:
 55 | 
 56 | ```
 57 | docker compose up
 58 | ```
 59 | 
 60 | Check the Elasticsearch instance at `localhost:9200` and the plugin version with `localhost:9200/_cat/plugins`.
 61 | 
 62 | 
 63 | > NOTE: In `docker-compose.yml` you can uncomment the debug env and attach a REMOTE JVM on `*:5005` to debug the plugin.
 64 | 
 65 | Path hierarchy aggregation
 66 | --------------------------
 67 | 
 68 | ### Parameters
 69 | 
 70 |  - `field` or `script` : field to aggregate on
 71 |  - `separator` : separator for path hierarchy (default to "/")
 72 |  - `order` : order parameter to define how to sort result. Allowed parameters are `_key`, `_count` or sub aggregation name. Default to {"_count": "desc}.
 73 |  - `size`: size parameter to define how many buckets should be returned. Default to 10.
 74 |  - `shard_size`: how many buckets returned by each shards. Set to size if smaller, default to size if the search request needs to go to a single shard, and (size * 1.5 + 10) otherwise (more information here: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_shard_size_3).
 75 |  - `min_depth`: Set minimum depth level. Default to 0.
 76 |  - `max_depth`: Set maximum depth level. `-1` means no limit. Default to 3.
 77 |  - `depth`: Retrieve values for specified depth. Shortcut, instead of setting `min_depth` and `max_depth` parameters to the same value.
 78 |  - `keep_blank_path`: Keep blank path as bucket. if this option is set to false, chained separator will be ignored. Default to false.
 79 |  - `min_doc_count`: Return buckets containing at least `min_doc_count` document. Default to 0
 80 | 
 81 | 
 82 | Examples
 83 | -------
 84 | 
 85 | #### String field
 86 | 
 87 | ```
 88 | # Add data:
 89 | PUT filesystem
 90 | {
 91 |   "mappings": {
 92 |     "properties": {
 93 |       "path": {
 94 |         "type": "keyword"
 95 |       }
 96 |     }
 97 |   }
 98 | }
 99 | POST filesystem/_bulk?refresh
100 | {"index":{}}
101 | {"path":"/My documents/Spreadsheets/Budget_2013.xls","views":10}
102 | {"index":{}}
103 | {"path":"/My documents/Spreadsheets/Budget_2014.xls","views":7}
104 | {"index":{}}
105 | {"path":"/My documents/Test.txt","views":1}
106 | 
107 | 
108 | # Path hierarchy request :
109 | GET /filesystem/_search?size=0
110 | {
111 |   "aggs": {
112 |     "tree": {
113 |       "path_hierarchy": {
114 |         "field": "path",
115 |         "separator": "/"
116 |       },
117 |       "aggs": {
118 |         "total_views": {
119 |           "sum": {
120 |             "field": "views"
121 |           }
122 |         }
123 |       }
124 |     }
125 |   }
126 | }
127 | 
128 | 
129 | Result :
130 | {"aggregations": {
131 |    "tree": {
132 |      "sum_other_doc_count": 0,
133 |      "buckets": [
134 |        {
135 |          "key": "My documents",
136 |          "doc_count": 3,
137 |          "total_views": {
138 |            "value": 18
139 |          },
140 |          "tree": {
141 |            "buckets": [
142 |              {
143 |                "key": "Spreadsheets",
144 |                "doc_count": 2,
145 |                "total_views": {
146 |                  "value": 17
147 |                },
148 |                "tree": {
149 |                  "buckets": [
150 |                    {
151 |                      "key": "Budget_2013.xls",
152 |                      "doc_count": 1,
153 |                      "total_views": {
154 |                        "value": 10
155 |                      }
156 |                    },
157 |                    {
158 |                      "key": "Budget_2014.xls",
159 |                      "doc_count": 1,
160 |                      "total_views": {
161 |                        "value": 7
162 |                      }
163 |                    }
164 |                  ]
165 |                }
166 |              },
167 |              {
168 |                "key": "Test.txt",
169 |                "doc_count": 1,
170 |                "total_views": {
171 |                  "value": 1
172 |                }
173 |              }
174 |            ]
175 |          }
176 |        }
177 |      ]
178 |    }
179 | }
180 | ```
181 | 
182 | #### Script
183 | 
184 | ```
185 | PUT calendar
186 | {
187 |   "mappings": {
188 |     "properties": {
189 |       "date": {
190 |         "type": "date"
191 |       }
192 |     }
193 |   }
194 | }
195 | 
196 | POST calendar/_bulk?refresh
197 | {"index":{}}
198 | {"date": "2012-01-10T02:47:28"}
199 | {"index":{}}
200 | {"date": "2012-01-05T01:43:35"}
201 | {"index":{}}
202 | {"date": "2012-05-01T12:24:19"}
203 | 
204 | GET /calendar/_search?size=0
205 | {
206 |   "aggs": {
207 |     "tree": {
208 |       "path_hierarchy": {
209 |         "script": "doc['date'].value.toOffsetDateTime().format(DateTimeFormatter.ofPattern('yyyy/MM/dd'))",
210 |         "order": {
211 |           "_key": "asc"
212 |         }
213 |       }
214 |     }
215 |   }
216 | }
217 | 
218 | 
219 | Result :
220 | 
221 | {"aggregations": {
222 |     "tree": {
223 |       "buckets": [
224 |         {
225 |           "key": "2012",
226 |           "doc_count": 3,
227 |           "tree": {
228 |             "buckets": [
229 |               {
230 |                 "key": "01",
231 |                 "doc_count": 2,
232 |                 "tree": {
233 |                   "buckets": [
234 |                     {
235 |                       "key": "05",
236 |                       "doc_count": 1
237 |                     },
238 |                     {
239 |                       "key": "10",
240 |                       "doc_count": 1
241 |                     }
242 |                   ]
243 |                 }
244 |               },
245 |               {
246 |                 "key": "05",
247 |                 "doc_count": 1,
248 |                 "tree": {
249 |                   "buckets": [
250 |                     {
251 |                       "key": "01",
252 |                       "doc_count": 1
253 |                     }
254 |                   ]
255 |                 }
256 |               }
257 |             ]
258 |           }
259 |         }
260 |       ]
261 |     }
262 |   }
263 | }
264 | 
265 | ```
266 | 
267 | Date hierarchy
268 | --------------
269 | 
270 | ### Parameters
271 | 
272 |  - `field` : field to aggregate on. This parameter is mandatory
273 |  - `interval`: date interval used to create the hierarchy. Allowed values are: `years`, `months`, `days`, `hours`, `minutes`, `seconds` Default to `years`.
274 |  - `order` : order parameter to define how to sort result. Allowed parameters are `_key`, `_count` or sub aggregation name. Default to {"_count": "desc}.
275 |  - `size`: size parameter to define how many buckets should be returned. Default to 10.
276 |  - `shard_size`: how many buckets returned by each shards. Set to size if smaller, default to size if the search request needs to go to a single shard, and (size * 1.5 + 10) otherwise (more information here: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_shard_size_3).
277 |  - `min_doc_count`: Return buckets containing at least `min_doc_count` document. Default to 0
278 | 
279 | 
280 | Example
281 | -------
282 | 
283 | ```
284 | 
285 | PUT calendar
286 | {
287 |   "mappings": {
288 |     "properties": {
289 |       "date": {
290 |         "type": "date"
291 |       }
292 |     }
293 |   }
294 | }
295 | 
296 | PUT /calendar/_doc/1
297 | {
298 |   "date": "2012-01-10T02:47:28"
299 | }
300 | PUT /calendar/_doc/2
301 | {
302 |   "date": "2012-01-05T01:43:35"
303 | }
304 | PUT /calendar/_doc/3
305 | {
306 |   "date": "2012-05-01T12:24:19"
307 | }
308 | 
309 | GET /calendar/_search?size=0
310 | {
311 |   "aggs": {
312 |     "tree": {
313 |       "date_hierarchy": {
314 |         "field": "date",
315 |         "interval": "days",
316 |         "order": {
317 |           "_key": "asc"
318 |         }
319 |       }
320 |     }
321 |   }
322 | }
323 | 
324 | ```
325 | 
326 | 
327 | License
328 | -------
329 | 
330 | This software is under The MIT License (MIT).
331 | 


--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
 1 | buildscript {
 2 |     repositories {
 3 |         mavenLocal()
 4 |         mavenCentral()
 5 |     }
 6 | 
 7 |     dependencies {
 8 |         classpath "org.elasticsearch.gradle:build-tools:${es_version}"
 9 |     }
10 | }
11 | 
12 | repositories {
13 |     mavenLocal()
14 |     mavenCentral()
15 | }
16 | 
17 | group = 'org.elasticsearch.plugin'
18 | version = "${plugin_version}"
19 | 
20 | def versions = org.elasticsearch.gradle.VersionProperties.versions
21 | 
22 | apply plugin: 'java'
23 | apply plugin: 'idea'
24 | apply plugin: 'elasticsearch.esplugin'
25 | apply plugin: 'elasticsearch.yaml-rest-test'
26 | 
27 | 
28 | esplugin {
29 |     name 'pathhierarchy-aggregation'
30 |     description 'Return a path hierarchy aggregation'
31 |     classname 'org.opendatasoft.elasticsearch.plugin.PathHierarchyAggregation'
32 |     licenseFile = rootProject.file('LICENSE')
33 |     noticeFile = rootProject.file('README.md')
34 | }
35 | 
36 | 
37 | dependencies {
38 |     implementation "org.elasticsearch:elasticsearch:${es_version}"
39 |     yamlRestTestImplementation "org.elasticsearch.test:framework:${es_version}"
40 |     yamlRestTestImplementation "org.apache.logging.log4j:log4j-core:${versions.log4j}"
41 | }
42 | 
43 | tasks.named("yamlRestTest").configure {
44 |   systemProperty 'tests.security.manager', 'false'
45 | }
46 | 
47 | tasks.named("test").configure {
48 |   systemProperty 'tests.security.manager', 'false'
49 | }
50 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # helper script that runs the build script inside a gradle based docker container
 4 | 
 5 | source .env
 6 | 
 7 | echo "GRADLE_VERSION ${GRADLE_VERSION}"
 8 | 
 9 | docker run --rm \
10 |   -v .:/opt/gen \
11 |   -w /opt/gen \
12 |   -u gradle \
13 |   gradle:${GRADLE_VERSION} ./gradlew build
14 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | networks:
 2 |   es-network:
 3 | 
 4 | services:
 5 |   elasticsearch-plugin-debug:
 6 |     build:
 7 |       context: .
 8 |       dockerfile: docker/Dockerfile
 9 |       target: elasticsearch-plugin-debug
10 |       args:
11 |         ES_VERSION: ${ES_VERSION}
12 |         PLUGIN_VERSION: ${PLUGIN_VERSION}
13 |         PLUGIN_FILENAME: pathhierarchy-aggregation-${PLUGIN_VERSION}.zip
14 |     environment:
15 |       - discovery.type=single-node
16 |       # NO DEBUG
17 |       - ES_JAVA_OPTS=-Xms512m -Xmx512m
18 |       # DEBUG
19 |       # - ES_JAVA_OPTS=-Xms512m -Xmx512m -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005
20 |     ports:
21 |       - "9200:9200"
22 |       # DEBUG
23 |       # - "5005:5005"
24 |     networks:
25 |       - es-network
26 | 
27 |   kibana:
28 |     image: docker.elastic.co/kibana/kibana:7.17.24
29 |     environment:
30 |       ELASTICSEARCH_HOSTS: http://elasticsearch-plugin-debug:9200/
31 |     ports:
32 |       - "5602:5601"
33 |     networks:
34 |       - es-network
35 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG ES_VERSION
 2 | ARG PLUGIN_FILENAME
 3 | 
 4 | FROM docker.elastic.co/elasticsearch/elasticsearch:${ES_VERSION} AS elasticsearch-plugin-debug
 5 | 
 6 | COPY build/distributions/${PLUGIN_FILENAME} /tmp/${PLUGIN_FILENAME}
 7 | 
 8 | # mandatory because docker ARG is scoped to stage (forgotten after the FROM scope above...!)
 9 | ARG PLUGIN_FILENAME
10 | # madantory because ARGS cannot be used inside RUN shell
11 | ENV PLUGIN_FILENAME=${PLUGIN_FILENAME}
12 | RUN ./bin/elasticsearch-plugin install --batch file:/tmp/${PLUGIN_FILENAME}
13 | 


--------------------------------------------------------------------------------
/gradle.properties:
--------------------------------------------------------------------------------
1 | es_version = 7.17.28
2 | plugin_version = 7.17.28.0
3 | 


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opendatasoft/elasticsearch-aggregation-pathhierarchy/06fc00f47cb146b319754f62f9010012e3073f56/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionBase=GRADLE_USER_HOME
2 | distributionPath=wrapper/dists
3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip
4 | networkTimeout=10000
5 | validateDistributionUrl=true
6 | zipStoreBase=GRADLE_USER_HOME
7 | zipStorePath=wrapper/dists
8 | 


--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | #
  4 | # Copyright © 2015-2021 the original authors.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #      https://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | #
 18 | # SPDX-License-Identifier: Apache-2.0
 19 | #
 20 | 
 21 | ##############################################################################
 22 | #
 23 | #   Gradle start up script for POSIX generated by Gradle.
 24 | #
 25 | #   Important for running:
 26 | #
 27 | #   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
 28 | #       noncompliant, but you have some other compliant shell such as ksh or
 29 | #       bash, then to run this script, type that shell name before the whole
 30 | #       command line, like:
 31 | #
 32 | #           ksh Gradle
 33 | #
 34 | #       Busybox and similar reduced shells will NOT work, because this script
 35 | #       requires all of these POSIX shell features:
 36 | #         * functions;
 37 | #         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
 38 | #           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
 39 | #         * compound commands having a testable exit status, especially «case»;
 40 | #         * various built-in commands including «command», «set», and «ulimit».
 41 | #
 42 | #   Important for patching:
 43 | #
 44 | #   (2) This script targets any POSIX shell, so it avoids extensions provided
 45 | #       by Bash, Ksh, etc; in particular arrays are avoided.
 46 | #
 47 | #       The "traditional" practice of packing multiple parameters into a
 48 | #       space-separated string is a well documented source of bugs and security
 49 | #       problems, so this is (mostly) avoided, by progressively accumulating
 50 | #       options in "$@", and eventually passing that to Java.
 51 | #
 52 | #       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
 53 | #       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
 54 | #       see the in-line comments for details.
 55 | #
 56 | #       There are tweaks for specific operating systems such as AIX, CygWin,
 57 | #       Darwin, MinGW, and NonStop.
 58 | #
 59 | #   (3) This script is generated from the Groovy template
 60 | #       https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
 61 | #       within the Gradle project.
 62 | #
 63 | #       You can find Gradle at https://github.com/gradle/gradle/.
 64 | #
 65 | ##############################################################################
 66 | 
 67 | # Attempt to set APP_HOME
 68 | 
 69 | # Resolve links: $0 may be a link
 70 | app_path=$0
 71 | 
 72 | # Need this for daisy-chained symlinks.
 73 | while
 74 |     APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
 75 |     [ -h "$app_path" ]
 76 | do
 77 |     ls=$( ls -ld "$app_path" )
 78 |     link=${ls#*' -> '}
 79 |     case $link in             #(
 80 |       /*)   app_path=$link ;; #(
 81 |       *)    app_path=$APP_HOME$link ;;
 82 |     esac
 83 | done
 84 | 
 85 | # This is normally unused
 86 | # shellcheck disable=SC2034
 87 | APP_BASE_NAME=${0##*/}
 88 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
 89 | APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
 90 | ' "$PWD" ) || exit
 91 | 
 92 | # Use the maximum available, or set MAX_FD != -1 to use that value.
 93 | MAX_FD=maximum
 94 | 
 95 | warn () {
 96 |     echo "$*"
 97 | } >&2
 98 | 
 99 | die () {
100 |     echo
101 |     echo "$*"
102 |     echo
103 |     exit 1
104 | } >&2
105 | 
106 | # OS specific support (must be 'true' or 'false').
107 | cygwin=false
108 | msys=false
109 | darwin=false
110 | nonstop=false
111 | case "$( uname )" in                #(
112 |   CYGWIN* )         cygwin=true  ;; #(
113 |   Darwin* )         darwin=true  ;; #(
114 |   MSYS* | MINGW* )  msys=true    ;; #(
115 |   NONSTOP* )        nonstop=true ;;
116 | esac
117 | 
118 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
119 | 
120 | 
121 | # Determine the Java command to use to start the JVM.
122 | if [ -n "$JAVA_HOME" ] ; then
123 |     if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
124 |         # IBM's JDK on AIX uses strange locations for the executables
125 |         JAVACMD=$JAVA_HOME/jre/sh/java
126 |     else
127 |         JAVACMD=$JAVA_HOME/bin/java
128 |     fi
129 |     if [ ! -x "$JAVACMD" ] ; then
130 |         die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
131 | 
132 | Please set the JAVA_HOME variable in your environment to match the
133 | location of your Java installation."
134 |     fi
135 | else
136 |     JAVACMD=java
137 |     if ! command -v java >/dev/null 2>&1
138 |     then
139 |         die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
140 | 
141 | Please set the JAVA_HOME variable in your environment to match the
142 | location of your Java installation."
143 |     fi
144 | fi
145 | 
146 | # Increase the maximum file descriptors if we can.
147 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
148 |     case $MAX_FD in #(
149 |       max*)
150 |         # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
151 |         # shellcheck disable=SC2039,SC3045
152 |         MAX_FD=$( ulimit -H -n ) ||
153 |             warn "Could not query maximum file descriptor limit"
154 |     esac
155 |     case $MAX_FD in  #(
156 |       '' | soft) :;; #(
157 |       *)
158 |         # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
159 |         # shellcheck disable=SC2039,SC3045
160 |         ulimit -n "$MAX_FD" ||
161 |             warn "Could not set maximum file descriptor limit to $MAX_FD"
162 |     esac
163 | fi
164 | 
165 | # Collect all arguments for the java command, stacking in reverse order:
166 | #   * args from the command line
167 | #   * the main class name
168 | #   * -classpath
169 | #   * -D...appname settings
170 | #   * --module-path (only if needed)
171 | #   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
172 | 
173 | # For Cygwin or MSYS, switch paths to Windows format before running java
174 | if "$cygwin" || "$msys" ; then
175 |     APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
176 |     CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
177 | 
178 |     JAVACMD=$( cygpath --unix "$JAVACMD" )
179 | 
180 |     # Now convert the arguments - kludge to limit ourselves to /bin/sh
181 |     for arg do
182 |         if
183 |             case $arg in                                #(
184 |               -*)   false ;;                            # don't mess with options #(
185 |               /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
186 |                     [ -e "$t" ] ;;                      #(
187 |               *)    false ;;
188 |             esac
189 |         then
190 |             arg=$( cygpath --path --ignore --mixed "$arg" )
191 |         fi
192 |         # Roll the args list around exactly as many times as the number of
193 |         # args, so each arg winds up back in the position where it started, but
194 |         # possibly modified.
195 |         #
196 |         # NB: a `for` loop captures its iteration list before it begins, so
197 |         # changing the positional parameters here affects neither the number of
198 |         # iterations, nor the values presented in `arg`.
199 |         shift                   # remove old arg
200 |         set -- "$@" "$arg"      # push replacement arg
201 |     done
202 | fi
203 | 
204 | 
205 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
206 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
207 | 
208 | # Collect all arguments for the java command:
209 | #   * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
210 | #     and any embedded shellness will be escaped.
211 | #   * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
212 | #     treated as '${Hostname}' itself on the command line.
213 | 
214 | set -- \
215 |         "-Dorg.gradle.appname=$APP_BASE_NAME" \
216 |         -classpath "$CLASSPATH" \
217 |         org.gradle.wrapper.GradleWrapperMain \
218 |         "$@"
219 | 
220 | # Stop when "xargs" is not available.
221 | if ! command -v xargs >/dev/null 2>&1
222 | then
223 |     die "xargs is not available"
224 | fi
225 | 
226 | # Use "xargs" to parse quoted args.
227 | #
228 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
229 | #
230 | # In Bash we could simply go:
231 | #
232 | #   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
233 | #   set -- "${ARGS[@]}" "$@"
234 | #
235 | # but POSIX shell has neither arrays nor command substitution, so instead we
236 | # post-process each arg (as a line of input to sed) to backslash-escape any
237 | # character that might be a shell metacharacter, then use eval to reverse
238 | # that process (while maintaining the separation between arguments), and wrap
239 | # the whole thing up as a single "set" statement.
240 | #
241 | # This will of course break if any of these variables contains a newline or
242 | # an unmatched quote.
243 | #
244 | 
245 | eval "set -- $(
246 |         printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
247 |         xargs -n1 |
248 |         sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
249 |         tr '\n' ' '
250 |     )" '"$@"'
251 | 
252 | exec "$JAVACMD" "$@"
253 | 


--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
 1 | @rem
 2 | @rem Copyright 2015 the original author or authors.
 3 | @rem
 4 | @rem Licensed under the Apache License, Version 2.0 (the "License");
 5 | @rem you may not use this file except in compliance with the License.
 6 | @rem You may obtain a copy of the License at
 7 | @rem
 8 | @rem      https://www.apache.org/licenses/LICENSE-2.0
 9 | @rem
10 | @rem Unless required by applicable law or agreed to in writing, software
11 | @rem distributed under the License is distributed on an "AS IS" BASIS,
12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | @rem See the License for the specific language governing permissions and
14 | @rem limitations under the License.
15 | @rem
16 | @rem SPDX-License-Identifier: Apache-2.0
17 | @rem
18 | 
19 | @if "%DEBUG%"=="" @echo off
20 | @rem ##########################################################################
21 | @rem
22 | @rem  Gradle startup script for Windows
23 | @rem
24 | @rem ##########################################################################
25 | 
26 | @rem Set local scope for the variables with windows NT shell
27 | if "%OS%"=="Windows_NT" setlocal
28 | 
29 | set DIRNAME=%~dp0
30 | if "%DIRNAME%"=="" set DIRNAME=.
31 | @rem This is normally unused
32 | set APP_BASE_NAME=%~n0
33 | set APP_HOME=%DIRNAME%
34 | 
35 | @rem Resolve any "." and ".." in APP_HOME to make it shorter.
36 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
37 | 
38 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
39 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
40 | 
41 | @rem Find java.exe
42 | if defined JAVA_HOME goto findJavaFromJavaHome
43 | 
44 | set JAVA_EXE=java.exe
45 | %JAVA_EXE% -version >NUL 2>&1
46 | if %ERRORLEVEL% equ 0 goto execute
47 | 
48 | echo. 1>&2
49 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
50 | echo. 1>&2
51 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2
52 | echo location of your Java installation. 1>&2
53 | 
54 | goto fail
55 | 
56 | :findJavaFromJavaHome
57 | set JAVA_HOME=%JAVA_HOME:"=%
58 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
59 | 
60 | if exist "%JAVA_EXE%" goto execute
61 | 
62 | echo. 1>&2
63 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
64 | echo. 1>&2
65 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2
66 | echo location of your Java installation. 1>&2
67 | 
68 | goto fail
69 | 
70 | :execute
71 | @rem Setup the command line
72 | 
73 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
74 | 
75 | 
76 | @rem Execute Gradle
77 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
78 | 
79 | :end
80 | @rem End local scope for the variables with windows NT shell
81 | if %ERRORLEVEL% equ 0 goto mainEnd
82 | 
83 | :fail
84 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
85 | rem the _cmd.exe /c_ return code!
86 | set EXIT_CODE=%ERRORLEVEL%
87 | if %EXIT_CODE% equ 0 set EXIT_CODE=1
88 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
89 | exit /b %EXIT_CODE%
90 | 
91 | :mainEnd
92 | if "%OS%"=="Windows_NT" endlocal
93 | 
94 | :omega
95 | 


--------------------------------------------------------------------------------
/prepare-version.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Helper script to prepare a version of the plugin based on a specific elasticsearch version
 4 | #
 5 | # given the elasticsearch version as argument, it generates the needed configuration file to build the plugin
 6 | 
 7 | [ -z "$1" ] && {
 8 |     echo "1st argument should be the targeted elasticsearch version"
 9 |     exit 1
10 | }
11 | 
12 | ES_VERSION="$1"
13 | 
14 | # retrieve version information from ES repository
15 | GRADLE_VERSION=$(curl -s https://raw.githubusercontent.com/elastic/elasticsearch/refs/tags/v${ES_VERSION}/build-tools-internal/src/main/resources/minimumGradleVersion)
16 | JAVA_COMPILER_VERSION=$(curl -s https://raw.githubusercontent.com/elastic/elasticsearch/refs/tags/v${ES_VERSION}/build-tools-internal/src/main/resources/minimumCompilerVersion)
17 | JAVA_RUNTIME_VERSION=$(curl -s https://raw.githubusercontent.com/elastic/elasticsearch/refs/tags/v${ES_VERSION}/build-tools-internal/src/main/resources/minimumRuntimeVersion)
18 | 
19 | PLUGIN_VERSION="${ES_VERSION}.0"
20 | 
21 | echo "GRADLE_VERSION ${GRADLE_VERSION}"
22 | echo "JAVA_COMPILER_VERSION ${JAVA_COMPILER_VERSION}"
23 | echo "ES_VERSION ${ES_VERSION}"
24 | echo "PLUGIN_VERSION ${PLUGIN_VERSION}"
25 | 
26 | echo "es_version = ${ES_VERSION}
27 | plugin_version = ${PLUGIN_VERSION}" > gradle.properties
28 | 
29 | echo "ES_VERSION=${ES_VERSION}
30 | PLUGIN_VERSION=${PLUGIN_VERSION}
31 | JAVA_COMPILER_VERSION=${JAVA_COMPILER_VERSION}
32 | GRADLE_VERSION=${GRADLE_VERSION}" > .env
33 | 
34 | docker run --rm \
35 |   -v .:/opt/gen \
36 |   -w /opt/gen \
37 |   -u gradle \
38 |   gradle:"${GRADLE_VERSION}" /usr/bin/gradle wrapper --gradle-version "${GRADLE_VERSION}" --distribution-type bin
39 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/plugin/PathHierarchyAggregation.java:
--------------------------------------------------------------------------------
 1 | package org.opendatasoft.elasticsearch.plugin;
 2 | 
 3 | import org.elasticsearch.plugins.Plugin;
 4 | import org.elasticsearch.plugins.SearchPlugin;
 5 | import org.opendatasoft.elasticsearch.search.aggregations.bucket.DateHierarchyAggregationBuilder;
 6 | import org.opendatasoft.elasticsearch.search.aggregations.bucket.InternalDateHierarchy;
 7 | import org.opendatasoft.elasticsearch.search.aggregations.bucket.InternalPathHierarchy;
 8 | import org.opendatasoft.elasticsearch.search.aggregations.bucket.PathHierarchyAggregationBuilder;
 9 | 
10 | import java.util.ArrayList;
11 | 
12 | public class PathHierarchyAggregation extends Plugin implements SearchPlugin {
13 |     @Override
14 |     public ArrayList<SearchPlugin.AggregationSpec> getAggregations() {
15 |         ArrayList<SearchPlugin.AggregationSpec> r = new ArrayList<>();
16 |         r.add(
17 |                 new AggregationSpec(
18 |                         PathHierarchyAggregationBuilder.NAME,
19 |                         PathHierarchyAggregationBuilder::new,
20 |                         PathHierarchyAggregationBuilder.PARSER)
21 |                         .addResultReader(InternalPathHierarchy::new)
22 |                         .setAggregatorRegistrar(PathHierarchyAggregationBuilder::registerAggregators)
23 |         );
24 |         r.add(
25 |                 new AggregationSpec(
26 |                         DateHierarchyAggregationBuilder.NAME,
27 |                         DateHierarchyAggregationBuilder::new,
28 |                         DateHierarchyAggregationBuilder.PARSER)
29 |                         .addResultReader(InternalDateHierarchy::new)
30 |                         .setAggregatorRegistrar(DateHierarchyAggregationBuilder::registerAggregators)
31 |         );
32 |         return r;
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/DateHierarchyAggregationBuilder.java:
--------------------------------------------------------------------------------
  1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
  2 | 
  3 | import org.elasticsearch.Version;
  4 | import org.elasticsearch.xcontent.ParseField;
  5 | import org.elasticsearch.common.Rounding;
  6 | import org.elasticsearch.common.io.stream.StreamInput;
  7 | import org.elasticsearch.common.io.stream.StreamOutput;
  8 | import org.elasticsearch.common.io.stream.Writeable;
  9 | import org.elasticsearch.common.time.DateFormatter;
 10 | import org.elasticsearch.xcontent.ObjectParser;
 11 | import org.elasticsearch.xcontent.XContentBuilder;
 12 | import org.elasticsearch.xcontent.XContentParser;
 13 | import org.elasticsearch.index.mapper.DateFieldMapper;
 14 | import org.elasticsearch.search.aggregations.support.AggregationContext;
 15 | import org.elasticsearch.search.DocValueFormat;
 16 | import org.elasticsearch.search.aggregations.AggregationBuilder;
 17 | import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
 18 | import org.elasticsearch.search.aggregations.AggregatorFactory;
 19 | import org.elasticsearch.search.aggregations.BucketOrder;
 20 | import org.elasticsearch.search.aggregations.InternalOrder;
 21 | import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 22 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder;
 23 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
 24 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
 25 | import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry;
 26 | import org.elasticsearch.search.aggregations.support.ValuesSourceType;
 27 | 
 28 | import java.io.IOException;
 29 | import java.time.ZoneId;
 30 | import java.time.ZoneOffset;
 31 | import java.util.ArrayList;
 32 | import java.util.LinkedHashMap;
 33 | import java.util.List;
 34 | import java.util.Map;
 35 | import java.util.Objects;
 36 | 
 37 | import static java.util.Collections.unmodifiableMap;
 38 | 
 39 | 
 40 | /**
 41 |  * The builder of the aggregatorFactory. Also implements the parsing of the request.
 42 |  */
 43 | public class DateHierarchyAggregationBuilder extends ValuesSourceAggregationBuilder<DateHierarchyAggregationBuilder> {
 44 |     public static final String NAME = "date_hierarchy";
 45 |     public static final ValuesSourceRegistry.RegistryKey<DateHierarchyAggregationSupplier> REGISTRY_KEY =
 46 |             new ValuesSourceRegistry.RegistryKey<>(NAME, DateHierarchyAggregationSupplier.class);
 47 | 
 48 | 
 49 |     public static final ParseField INTERVAL_FIELD = new ParseField("interval");
 50 |     public static final ParseField ORDER_FIELD = new ParseField("order");
 51 |     public static final ParseField SIZE_FIELD = new ParseField("size");
 52 |     public static final ParseField SHARD_SIZE_FIELD = new ParseField("shard_size");
 53 |     public static final ParseField MIN_DOC_COUNT_FIELD = new ParseField("min_doc_count");
 54 | 
 55 | 
 56 |     public static final Map<String, IntervalConfig> INTERVAL_CONFIG;
 57 |     static {
 58 |         Map<String, IntervalConfig> dateFieldUnits = new LinkedHashMap<>();
 59 |         dateFieldUnits.put("years", new IntervalConfig(Rounding.DateTimeUnit.YEAR_OF_CENTURY, "yyyy"));
 60 |         dateFieldUnits.put("months", new IntervalConfig(Rounding.DateTimeUnit.MONTH_OF_YEAR, "MM"));
 61 |         dateFieldUnits.put("days", new IntervalConfig(Rounding.DateTimeUnit.DAY_OF_MONTH, "dd"));
 62 |         dateFieldUnits.put("hours", new IntervalConfig(Rounding.DateTimeUnit.HOUR_OF_DAY, "hh"));
 63 |         dateFieldUnits.put("minutes", new IntervalConfig(Rounding.DateTimeUnit.MINUTES_OF_HOUR, "mm"));
 64 |         dateFieldUnits.put("seconds", new IntervalConfig(Rounding.DateTimeUnit.SECOND_OF_MINUTE, "ss"));
 65 |         INTERVAL_CONFIG = unmodifiableMap(dateFieldUnits);
 66 |     }
 67 | 
 68 |     public static class IntervalConfig {
 69 |         final Rounding.DateTimeUnit dateTimeUnit;
 70 |         final String format;
 71 | 
 72 |         public IntervalConfig(Rounding.DateTimeUnit dateTimeUnit, String format) {
 73 |             this.dateTimeUnit = dateTimeUnit;
 74 |             this.format = format;
 75 |         }
 76 |     }
 77 | 
 78 |     public static class PreparedRounding {
 79 |         final RoundingInfo roundingInfo;
 80 |         final Rounding.Prepared prepared;
 81 | 
 82 |         public PreparedRounding(RoundingInfo roundingInfo, Rounding.Prepared prepared) {
 83 |             this.roundingInfo = roundingInfo;
 84 |             this.prepared = prepared;
 85 |         }
 86 |     }
 87 | 
 88 |     public List<PreparedRounding> buildRoundings() {
 89 |         List<PreparedRounding> roundings = new ArrayList<>();
 90 | 
 91 |         ZoneId timeZone = timeZone() == null ? ZoneOffset.UTC: timeZone();
 92 | 
 93 |         long now = System.currentTimeMillis();
 94 |         for (String interval : INTERVAL_CONFIG.keySet()) {
 95 |             RoundingInfo ri = new RoundingInfo(interval, createRounding(INTERVAL_CONFIG.get(interval).dateTimeUnit),
 96 |                     new DocValueFormat.DateTime(DateFormatter.forPattern(INTERVAL_CONFIG.get(interval).format), timeZone,
 97 |                             DateFieldMapper.Resolution.MILLISECONDS));
 98 |             roundings.add(new PreparedRounding(ri, ri.rounding.prepareForUnknown()));
 99 | 
100 |             if (interval.equals(interval())) {
101 |                 break;
102 |             }
103 |         }
104 | 
105 |         return roundings;
106 |     }
107 | 
108 |     public static class RoundingInfo implements Writeable {
109 |         final DocValueFormat format;
110 |         final Rounding rounding;
111 |         final String interval;
112 | 
113 |         public RoundingInfo(String interval, Rounding rounding, DocValueFormat docValueFormat) {
114 |             this.interval = interval;
115 |             this.rounding =  rounding;
116 |             this.format = docValueFormat;
117 |         }
118 | 
119 |         public RoundingInfo(StreamInput in) throws IOException {
120 |             rounding = Rounding.read(in);
121 |             interval = in.readString();
122 |             format = in.readNamedWriteable(DocValueFormat.class);
123 |         }
124 | 
125 |         @Override
126 |         public void writeTo(StreamOutput out) throws IOException {
127 |             rounding.writeTo(out);
128 |             out.writeString(interval);
129 |             out.writeNamedWriteable(format);
130 |         }
131 |     }
132 | 
133 |     public static final DateHierarchyAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new
134 |             DateHierarchyAggregator.BucketCountThresholds(10, -1);
135 |     public static final ObjectParser<DateHierarchyAggregationBuilder, String> PARSER =
136 |             ObjectParser.fromBuilder(NAME, DateHierarchyAggregationBuilder::new);
137 |     static {
138 | 
139 |         ValuesSourceAggregationBuilder.declareFields(PARSER, true, true, true);
140 | 
141 |         PARSER.declareString(DateHierarchyAggregationBuilder::interval, INTERVAL_FIELD);
142 | 
143 |         PARSER.declareField(DateHierarchyAggregationBuilder::timeZone, p -> {
144 |             if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
145 |                 return ZoneId.of(p.text());
146 |             } else {
147 |                 return ZoneOffset.ofHours(p.intValue());
148 |             }
149 |         }, new ParseField("time_zone"), ObjectParser.ValueType.LONG);
150 | 
151 |         PARSER.declareInt(DateHierarchyAggregationBuilder::size, SIZE_FIELD);
152 |         PARSER.declareLong(DateHierarchyAggregationBuilder::minDocCount, MIN_DOC_COUNT_FIELD);
153 |         PARSER.declareInt(DateHierarchyAggregationBuilder::shardSize, SHARD_SIZE_FIELD);
154 |         PARSER.declareObjectArray(DateHierarchyAggregationBuilder::order, (p, c) -> InternalOrder.Parser.parseOrderParam(p),
155 |                 ORDER_FIELD);
156 |     }
157 | 
158 |     public static AggregationBuilder parse(String aggregationName, XContentParser parser) throws IOException {
159 |         return PARSER.parse(parser, new DateHierarchyAggregationBuilder(aggregationName), null);
160 |     }
161 | 
162 |     public static void registerAggregators(ValuesSourceRegistry.Builder builder) {
163 |         DateHierarchyAggregatorFactory.registerAggregators(builder);
164 |     }
165 | 
166 |     private long minDocCount = 0;
167 |     private ZoneId timeZone = null;
168 |     private String interval = "years";
169 |     private BucketOrder order = BucketOrder.compound(BucketOrder.count(false)); // automatically adds tie-breaker key asc order
170 |     private DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds = new DateHierarchyAggregator.BucketCountThresholds(
171 |             DEFAULT_BUCKET_COUNT_THRESHOLDS);
172 | 
173 | 
174 |     private DateHierarchyAggregationBuilder(String name) {
175 |         super(name);
176 |     }
177 | 
178 |     @Override
179 |     protected boolean serializeTargetValueType(Version version) {
180 |         return true;
181 |     }
182 | 
183 |     /**
184 |      * Read from a stream
185 |      *
186 |      */
187 |     public DateHierarchyAggregationBuilder(StreamInput in) throws IOException {
188 |         super(in);
189 |         bucketCountThresholds = new DateHierarchyAggregator.BucketCountThresholds(in);
190 |         minDocCount = in.readVLong();
191 |         interval = in.readString();
192 |         order = InternalOrder.Streams.readOrder(in);
193 |         timeZone = in.readOptionalZoneId();
194 |     }
195 | 
196 |     private DateHierarchyAggregationBuilder(DateHierarchyAggregationBuilder clone, Builder factoriesBuilder,
197 |                                             Map<String, Object> metaData) {
198 |         super(clone, factoriesBuilder, metaData);
199 |         order = clone.order;
200 |         minDocCount = clone.minDocCount;
201 |         this.bucketCountThresholds = new DateHierarchyAggregator.BucketCountThresholds(clone.bucketCountThresholds);
202 |     }
203 | 
204 |     @Override
205 |     protected AggregationBuilder shallowCopy(Builder factoriesBuilder, Map<String, Object> metaData) {
206 |         return new DateHierarchyAggregationBuilder(this, factoriesBuilder, metaData);
207 |     }
208 | 
209 |     @Override
210 |     protected ValuesSourceType defaultValueSourceType() {
211 |         return CoreValuesSourceType.DATE;
212 |     }
213 | 
214 |     /**
215 |      * Write to a stream
216 |      */
217 |     @Override
218 |     protected void innerWriteTo(StreamOutput out) throws IOException {
219 |         bucketCountThresholds.writeTo(out);
220 |         out.writeVLong(minDocCount);
221 |         out.writeString(interval);
222 |         order.writeTo(out);
223 |         out.writeOptionalZoneId(timeZone);
224 |     }
225 | 
226 |     /**
227 |      * Returns the date interval that is set on this source
228 |      **/
229 |     public String interval() {
230 |         return interval;
231 |     }
232 | 
233 |     public DateHierarchyAggregationBuilder interval(String interval) {
234 | 
235 |         if (INTERVAL_CONFIG.get(interval) == null) {
236 |             throw new IllegalArgumentException("[interval] is invalid");
237 |         }
238 | 
239 |         this.interval = interval;
240 |         return this;
241 |     }
242 | 
243 |     /**
244 |      * Sets the time zone to use for this aggregation
245 |      */
246 |     public DateHierarchyAggregationBuilder timeZone(ZoneId timeZone) {
247 |         if (timeZone == null) {
248 |             throw new IllegalArgumentException("[timeZone] must not be null: [" + name + "]");
249 |         }
250 |         this.timeZone = timeZone;
251 |         return this;
252 |     }
253 | 
254 |     /**
255 |      * Gets the time zone to use for this aggregation
256 |      */
257 |     public ZoneId timeZone() {
258 |         return timeZone;
259 |     }
260 | 
261 |     private Rounding createRounding(Rounding.DateTimeUnit dateTimeUnit) {
262 |         Rounding.Builder tzRoundingBuilder;
263 |         tzRoundingBuilder = Rounding.builder(dateTimeUnit);
264 | 
265 |         if (timeZone() != null) {
266 |             tzRoundingBuilder.timeZone(timeZone());
267 |         }
268 |         Rounding rounding = tzRoundingBuilder.build();
269 |         return rounding;
270 |     }
271 | 
272 |     /** Set the order in which the buckets will be returned. It returns the builder so that calls
273 |      *  can be chained. A tie-breaker may be added to avoid non-deterministic ordering. */
274 |     private DateHierarchyAggregationBuilder order(BucketOrder order) {
275 |         if (order == null) {
276 |             throw new IllegalArgumentException("[order] must not be null: [" + name + "]");
277 |         }
278 |         if(order instanceof InternalOrder.CompoundOrder || InternalOrder.isKeyOrder(order)) {
279 |             this.order = order; // if order already contains a tie-breaker we are good to go
280 |         } else { // otherwise add a tie-breaker by using a compound order
281 |             this.order = BucketOrder.compound(order);
282 |         }
283 |         return this;
284 |     }
285 | 
286 |     private DateHierarchyAggregationBuilder order(List<BucketOrder> orders) {
287 |         if (orders == null) {
288 |             throw new IllegalArgumentException("[orders] must not be null: [" + name + "]");
289 |         }
290 |         // if the list only contains one order use that to avoid inconsistent xcontent
291 |         order(orders.size() > 1 ? BucketOrder.compound(orders) : orders.get(0));
292 |         return this;
293 |     }
294 | 
295 | 
296 |     /**
297 |      * Sets the size - indicating how many term buckets should be returned
298 |      * (defaults to 10)
299 |      */
300 |     public DateHierarchyAggregationBuilder size(int size) {
301 |         if (size <= 0) {
302 |             throw new IllegalArgumentException("[size] must be greater than 0. Found [" + size + "] in [" + name + "]");
303 |         }
304 |         bucketCountThresholds.setRequiredSize(size);
305 |         return this;
306 |     }
307 | 
308 |     /** Set the minimum count of matching documents that buckets need to have
309 |      *  and return this builder so that calls can be chained. */
310 |     public DateHierarchyAggregationBuilder minDocCount(long minDocCount) {
311 |         if (minDocCount < 0) {
312 |             throw new IllegalArgumentException(
313 |                     "[minDocCount] must be greater than or equal to 0. Found [" + minDocCount + "] in [" + name + "]");
314 |         }
315 |         this.minDocCount = minDocCount;
316 |         return this;
317 |     }
318 | 
319 |     /**
320 |      * Returns the number of term buckets currently configured
321 |      */
322 |     public int size() {
323 |         return bucketCountThresholds.getRequiredSize();
324 |     }
325 | 
326 |     @Override
327 |     public BucketCardinality bucketCardinality() {
328 |         return BucketCardinality.MANY;
329 |     }
330 | 
331 |     /**
332 |      * Sets the shard_size - indicating the number of term buckets each shard
333 |      * will return to the coordinating node (the node that coordinates the
334 |      * search execution). The higher the shard size is, the more accurate the
335 |      * results are.
336 |      */
337 |     public DateHierarchyAggregationBuilder shardSize(int shardSize) {
338 |         if (shardSize <= 0) {
339 |             throw new IllegalArgumentException(
340 |                     "[shardSize] must be greater than 0. Found [" + shardSize + "] in [" + name + "]");
341 |         }
342 |         bucketCountThresholds.setShardSize(shardSize);
343 |         return this;
344 |     }
345 | 
346 |     /**
347 |      * Returns the number of term buckets per shard that are currently configured
348 |      */
349 |     public int shardSize() {
350 |         return bucketCountThresholds.getShardSize();
351 |     }
352 | 
353 |     @Override
354 |     protected ValuesSourceAggregatorFactory innerBuild(AggregationContext context,
355 |                                                        ValuesSourceConfig config,
356 |                                                        AggregatorFactory parent,
357 |                                                        Builder subFactoriesBuilder) throws IOException {
358 | 
359 | 
360 |         final List<PreparedRounding> preparedRoundings = buildRoundings();
361 | 
362 |         return new DateHierarchyAggregatorFactory(
363 |                 name,
364 |                 config,
365 |                 order,
366 |                 preparedRoundings,
367 |                 minDocCount,
368 |                 bucketCountThresholds,
369 |                 context,
370 |                 parent,
371 |                 subFactoriesBuilder,
372 |                 metadata);
373 |     }
374 | 
375 |     @Override
376 |     protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
377 |         builder.startObject();
378 | 
379 |         if (order != null) {
380 |             builder.field(ORDER_FIELD.getPreferredName());
381 |             order.toXContent(builder, params);
382 |         }
383 | 
384 |         builder.field(MIN_DOC_COUNT_FIELD.getPreferredName(), minDocCount);
385 | 
386 |         return builder.endObject();
387 |     }
388 | 
389 |     @Override
390 |     public int hashCode() {
391 |         return Objects.hash(super.hashCode(), interval, order, minDocCount, bucketCountThresholds, timeZone);
392 |     }
393 | 
394 |     @Override
395 |     public boolean equals(Object obj) {
396 |         if (this == obj) return true;
397 |         if (obj == null || getClass() != obj.getClass()) return false;
398 |         if (!super.equals(obj)) return false;
399 |         DateHierarchyAggregationBuilder other = (DateHierarchyAggregationBuilder) obj;
400 |         return Objects.equals(interval, other.interval)
401 |                 && Objects.equals(order, other.order)
402 |                 && Objects.equals(minDocCount, other.minDocCount)
403 |                 && Objects.equals(bucketCountThresholds, other.bucketCountThresholds)
404 |                 && Objects.equals(timeZone, other.timeZone);
405 |     }
406 | 
407 |     @Override
408 |     public String getType() {
409 |         return NAME;
410 |     }
411 | 
412 |     @Override
413 |     protected ValuesSourceRegistry.RegistryKey<?> getRegistryKey() { return REGISTRY_KEY; }
414 | }
415 | 
416 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/DateHierarchyAggregationSupplier.java:
--------------------------------------------------------------------------------
 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
 2 | 
 3 | import org.elasticsearch.search.aggregations.Aggregator;
 4 | import org.elasticsearch.search.aggregations.AggregatorFactories;
 5 | import org.elasticsearch.search.aggregations.BucketOrder;
 6 | import org.elasticsearch.search.aggregations.CardinalityUpperBound;
 7 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
 8 | import org.elasticsearch.search.internal.SearchContext;
 9 | 
10 | import java.io.IOException;
11 | import java.util.List;
12 | import java.util.Map;
13 | 
14 | @FunctionalInterface
15 | public interface DateHierarchyAggregationSupplier {
16 |     Aggregator build(String name,
17 |                      AggregatorFactories factories,
18 |                      BucketOrder order,
19 |                      List<DateHierarchyAggregationBuilder.RoundingInfo> roundingsInfo,
20 |                      long minDocCount,
21 |                      DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds,
22 |                      ValuesSourceConfig valuesSourceConfig,
23 |                      SearchContext aggregationContext,
24 |                      Aggregator parent,
25 |                      CardinalityUpperBound cardinality,
26 |                      Map<String, Object> metadata) throws IOException;
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/DateHierarchyAggregator.java:
--------------------------------------------------------------------------------
  1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
  2 | 
  3 | import org.apache.lucene.index.LeafReaderContext;
  4 | import org.apache.lucene.index.SortedNumericDocValues;
  5 | import org.apache.lucene.util.BytesRef;
  6 | import org.elasticsearch.ElasticsearchException;
  7 | import org.elasticsearch.common.Rounding;
  8 | import org.elasticsearch.common.io.stream.StreamInput;
  9 | import org.elasticsearch.common.io.stream.StreamOutput;
 10 | import org.elasticsearch.common.io.stream.Writeable;
 11 | import org.elasticsearch.core.Releasables;
 12 | import org.elasticsearch.common.util.BytesRefHash;
 13 | import org.elasticsearch.xcontent.ToXContentFragment;
 14 | import org.elasticsearch.xcontent.XContentBuilder;
 15 | import org.elasticsearch.search.aggregations.Aggregator;
 16 | import org.elasticsearch.search.aggregations.AggregatorFactories;
 17 | import org.elasticsearch.search.aggregations.BucketOrder;
 18 | import org.elasticsearch.search.aggregations.CardinalityUpperBound;
 19 | import org.elasticsearch.search.aggregations.InternalAggregation;
 20 | import org.elasticsearch.search.aggregations.LeafBucketCollector;
 21 | import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
 22 | import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
 23 | import org.elasticsearch.search.aggregations.support.ValuesSource;
 24 | import org.elasticsearch.search.aggregations.support.AggregationContext;
 25 | 
 26 | import java.io.IOException;
 27 | import java.util.Arrays;
 28 | import java.util.Comparator;
 29 | import java.util.Iterator;
 30 | import java.util.List;
 31 | import java.util.Map;
 32 | import java.util.Objects;
 33 | 
 34 | 
 35 | public class DateHierarchyAggregator extends BucketsAggregator {
 36 | 
 37 |     public DateHierarchyAggregator(String name,
 38 |                                    AggregatorFactories factories,
 39 |                                    AggregationContext context,
 40 |                                    ValuesSource.Numeric valuesSource,
 41 |                                    BucketOrder order,
 42 |                                    long minDocCount,
 43 |                                    BucketCountThresholds bucketCountThresholds,
 44 |                                    List<DateHierarchyAggregationBuilder.PreparedRounding> preparedRoundings,
 45 |                                    Aggregator parent,
 46 |                                    CardinalityUpperBound cardinalityUpperBound,
 47 |                                    Map<String,  Object> metadata
 48 |     ) throws IOException {
 49 |         super(name, factories, context, parent, cardinalityUpperBound, metadata);
 50 |         this.valuesSource = valuesSource;
 51 |         this.preparedRoundings = preparedRoundings;
 52 |         this.minDocCount = minDocCount;
 53 |         bucketOrds =  new BytesRefHash(1, context.bigArrays());
 54 |         this.bucketCountThresholds = bucketCountThresholds;
 55 |         order.validate(this);
 56 |         this.order = order;
 57 |         this.partiallyBuiltBucketComparator = order == null ? null : order.partiallyBuiltBucketComparator(b -> b.bucketOrd, this);
 58 |     }
 59 | 
 60 |     public static class BucketCountThresholds implements Writeable, ToXContentFragment {
 61 |         private int requiredSize;
 62 |         private int shardSize;
 63 | 
 64 |         public BucketCountThresholds(int requiredSize, int shardSize) {
 65 |             this.requiredSize = requiredSize;
 66 |             this.shardSize = shardSize;
 67 |         }
 68 | 
 69 |         /**
 70 |          * Read from a stream.
 71 |          */
 72 |         public BucketCountThresholds(StreamInput in) throws IOException {
 73 |             requiredSize = in.readInt();
 74 |             shardSize = in.readInt();
 75 |         }
 76 | 
 77 |         @Override
 78 |         public void writeTo(StreamOutput out) throws IOException {
 79 |             out.writeInt(requiredSize);
 80 |             out.writeInt(shardSize);
 81 |         }
 82 | 
 83 |         public BucketCountThresholds(DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds) {
 84 |             this(bucketCountThresholds.requiredSize, bucketCountThresholds.shardSize);
 85 |         }
 86 | 
 87 |         public void ensureValidity() {
 88 |             // shard_size cannot be smaller than size as we need to at least fetch size entries from every shards in order to return size
 89 |             if (shardSize < requiredSize) {
 90 |                 setShardSize(requiredSize);
 91 |             }
 92 | 
 93 |             if (requiredSize <= 0 || shardSize <= 0) {
 94 |                 throw new ElasticsearchException("parameters [required_size] and [shard_size] must be >0 in path-hierarchy aggregation.");
 95 |             }
 96 |         }
 97 | 
 98 |         public int getRequiredSize() {
 99 |             return requiredSize;
100 |         }
101 | 
102 |         public void setRequiredSize(int requiredSize) {
103 |             this.requiredSize = requiredSize;
104 |         }
105 | 
106 |         public int getShardSize() {
107 |             return shardSize;
108 |         }
109 | 
110 |         public void setShardSize(int shardSize) {
111 |             this.shardSize = shardSize;
112 |         }
113 | 
114 |         @Override
115 |         public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
116 |             builder.field(DateHierarchyAggregationBuilder.SIZE_FIELD.getPreferredName(), requiredSize);
117 |             if (shardSize != -1) {
118 |                 builder.field(DateHierarchyAggregationBuilder.SHARD_SIZE_FIELD.getPreferredName(), shardSize);
119 |             }
120 |             return builder;
121 |         }
122 | 
123 |         @Override
124 |         public int hashCode() {
125 |             return Objects.hash(requiredSize, shardSize);
126 |         }
127 | 
128 |         @Override
129 |         public boolean equals(Object obj) {
130 |             if (obj == null) {
131 |                 return false;
132 |             }
133 |             if (getClass() != obj.getClass()) {
134 |                 return false;
135 |             }
136 |             DateHierarchyAggregator.BucketCountThresholds other = (DateHierarchyAggregator.BucketCountThresholds) obj;
137 |             return Objects.equals(requiredSize, other.requiredSize)
138 |                     && Objects.equals(shardSize, other.shardSize);
139 |         }
140 |     }
141 | 
142 |     private final ValuesSource.Numeric valuesSource;
143 |     private final BytesRefHash bucketOrds;
144 |     private final BucketOrder order;
145 |     private final long minDocCount;
146 |     private final BucketCountThresholds bucketCountThresholds;
147 |     private final List<DateHierarchyAggregationBuilder.PreparedRounding> preparedRoundings;
148 |     protected final Comparator<InternalPathHierarchy.InternalBucket> partiallyBuiltBucketComparator;
149 | 
150 |     /**
151 |      * The collector collects the docs, including or not some score (depending of the including of a Scorer) in the
152 |      * collect() process.
153 |      *
154 |      * The LeafBucketCollector is a "Per-leaf bucket collector". It collects docs for the account of buckets.
155 |      */
156 |     @Override
157 |     public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
158 |         if (valuesSource == null) {
159 |             return LeafBucketCollector.NO_OP_COLLECTOR;
160 |         }
161 |         final SortedNumericDocValues values = valuesSource.longValues(ctx);
162 | 
163 |         return new LeafBucketCollectorBase(sub, values) {
164 | 
165 |             @Override
166 |             public void collect(int doc, long bucket) throws IOException {
167 |                 assert bucket == 0;
168 |                 if (values.advanceExact(doc)) {
169 |                     final int valuesCount = values.docValueCount();
170 | 
171 |                     for (int i = 0; i < valuesCount; ++i) {
172 |                         long value = values.nextValue();
173 |                         String path = "";
174 |                         for (DateHierarchyAggregationBuilder.PreparedRounding preparedRounding: preparedRoundings) {
175 |                             long roundedValue = preparedRounding.prepared.round(value);
176 |                             path += preparedRounding.roundingInfo.format.format(roundedValue).toString();
177 |                             long bucketOrd = bucketOrds.add(new BytesRef(path));
178 |                             if (bucketOrd < 0) { // already seen
179 |                                 bucketOrd = -1 - bucketOrd;
180 |                                 collectExistingBucket(sub, doc, bucketOrd);
181 |                             } else {
182 |                                 collectBucket(sub, doc, bucketOrd);
183 |                             }
184 |                             path += "/";
185 |                         }
186 |                     }
187 |                 }
188 |             }
189 |         };
190 |     }
191 | 
192 |     @Override
193 |     public InternalAggregation[] buildAggregations(long[] owningBucketOrdinals) throws IOException {
194 | 
195 |         InternalDateHierarchy.InternalBucket[][] topBucketsPerOrd = new InternalDateHierarchy.InternalBucket[owningBucketOrdinals.length][];
196 |         InternalDateHierarchy[] results = new InternalDateHierarchy[owningBucketOrdinals.length];
197 | 
198 |         for (int ordIdx = 0; ordIdx < owningBucketOrdinals.length; ordIdx++) {
199 |             assert owningBucketOrdinals[ordIdx] == 0;
200 | 
201 |             // build buckets and store them sorted
202 |             final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
203 | 
204 |             PathSortedTree<String, InternalDateHierarchy.InternalBucket> pathSortedTree = new PathSortedTree<>(order.comparator(), size);
205 | 
206 |             InternalDateHierarchy.InternalBucket spare;
207 |             for (int i = 0; i < bucketOrds.size(); i++) {
208 |                 spare = new InternalDateHierarchy.InternalBucket(0, null, null, null, 0, null);
209 | 
210 |                 BytesRef term = new BytesRef();
211 |                 bucketOrds.get(i, term);
212 |                 String[] paths = term.utf8ToString().split("/", -1);
213 | 
214 |                 spare.paths = paths;
215 |                 spare.key = term;
216 |                 spare.level = paths.length - 1;
217 |                 spare.name = paths[spare.level];
218 |                 spare.docCount = bucketDocCount(i);
219 |                 spare.bucketOrd = i;
220 | 
221 |                 pathSortedTree.add(spare.paths, spare);
222 |             }
223 | 
224 |             // Get the top buckets
225 |             topBucketsPerOrd[ordIdx] = new InternalDateHierarchy.InternalBucket[size];
226 |             long otherHierarchyNodes = pathSortedTree.getFullSize();
227 |             Iterator<InternalDateHierarchy.InternalBucket> iterator = pathSortedTree.consumer();
228 |             for (int i = 0; i < size; i++) {
229 |                 final InternalDateHierarchy.InternalBucket bucket = iterator.next();
230 |                 topBucketsPerOrd[ordIdx][i] = bucket;
231 |                 otherHierarchyNodes -= 1;
232 |             }
233 | 
234 |             results[ordIdx] = new InternalDateHierarchy(name, Arrays.asList(topBucketsPerOrd[ordIdx]), order,
235 |                     minDocCount, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(),
236 |                     otherHierarchyNodes, metadata());
237 |         }
238 | 
239 |         // Build sub-aggregations for pruned buckets
240 |         buildSubAggsForAllBuckets(
241 |                 topBucketsPerOrd,
242 |                 b -> b.bucketOrd,
243 |                 (b, aggregations) -> b.aggregations = aggregations
244 |         );
245 | 
246 |         return results;
247 |     }
248 | 
249 |     @Override
250 |     public InternalAggregation buildEmptyAggregation() {
251 |         return new InternalDateHierarchy(name, null, order, minDocCount, bucketCountThresholds.getRequiredSize(),
252 |                 bucketCountThresholds.getShardSize(), 0, metadata());
253 |     }
254 | 
255 |     @Override
256 |     protected void doClose() {
257 |         Releasables.close(bucketOrds);
258 |     }
259 | }
260 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/DateHierarchyAggregatorFactory.java:
--------------------------------------------------------------------------------
  1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
  2 | 
  3 | import org.elasticsearch.search.aggregations.Aggregator;
  4 | import org.elasticsearch.search.aggregations.AggregatorFactories;
  5 | import org.elasticsearch.search.aggregations.AggregatorFactory;
  6 | import org.elasticsearch.search.aggregations.BucketOrder;
  7 | import org.elasticsearch.search.aggregations.CardinalityUpperBound;
  8 | import org.elasticsearch.search.aggregations.InternalAggregation;
  9 | import org.elasticsearch.search.aggregations.InternalOrder;
 10 | import org.elasticsearch.search.aggregations.NonCollectingAggregator;
 11 | import org.elasticsearch.search.aggregations.bucket.BucketUtils;
 12 | import org.elasticsearch.search.aggregations.support.AggregationContext;
 13 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
 14 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
 15 | import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry;
 16 | import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 17 | import org.elasticsearch.search.aggregations.support.ValuesSource;
 18 | import org.elasticsearch.search.internal.SearchContext;
 19 | 
 20 | import java.io.IOException;
 21 | import java.util.ArrayList;
 22 | import java.util.List;
 23 | import java.util.Map;
 24 | 
 25 | /**
 26 |  * The factory of aggregators.
 27 |  * ValuesSourceAggregatorFactory extends {@link AggregatorFactory}
 28 |  */
 29 | class DateHierarchyAggregatorFactory extends ValuesSourceAggregatorFactory {
 30 | 
 31 |     private long minDocCount;
 32 |     private BucketOrder order;
 33 |     private List<DateHierarchyAggregationBuilder.PreparedRounding> preparedRoundings;
 34 |     private final DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds;
 35 | 
 36 |     DateHierarchyAggregatorFactory(String name,
 37 |                                    ValuesSourceConfig config,
 38 |                                    BucketOrder order,
 39 |                                    List<DateHierarchyAggregationBuilder.PreparedRounding> preparedRoundings,
 40 |                                    long minDocCount,
 41 |                                    DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds,
 42 |                                    AggregationContext context,
 43 |                                    AggregatorFactory parent,
 44 |                                    AggregatorFactories.Builder subFactoriesBuilder,
 45 |                                    Map<String, Object> metadata
 46 |     ) throws IOException {
 47 |         super(name, config, context, parent, subFactoriesBuilder, metadata);
 48 |         this.order = order;
 49 |         this.preparedRoundings = preparedRoundings;
 50 |         this.minDocCount = minDocCount;
 51 |         this.bucketCountThresholds = bucketCountThresholds;
 52 |     }
 53 | 
 54 |     public static void registerAggregators(ValuesSourceRegistry.Builder builder) {
 55 |         builder.register(DateHierarchyAggregationBuilder.REGISTRY_KEY, CoreValuesSourceType.DATE, (name,
 56 |                                                                                                    factories,
 57 |                                                                                                    order,
 58 |                                                                                                    roundingsInfo,
 59 |                                                                                                    minDocCount,
 60 |                                                                                                    bucketCountThresholds,
 61 |                                                                                                    valuesSourceConfig,
 62 |                                                                                                    aggregationContext,
 63 |                                                                                                    parent,
 64 |                                                                                                    cardinality,
 65 |                                                                                                    metadata) -> null,
 66 |                 true);
 67 |     }
 68 | 
 69 |     @Override
 70 |     protected Aggregator createUnmapped(Aggregator parent,
 71 |                                         Map<String, Object> metadata) throws IOException {
 72 |         final InternalAggregation aggregation = new InternalDateHierarchy(name, new ArrayList<>(), order, minDocCount,
 73 |                 bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), 0, metadata);
 74 |         return new NonCollectingAggregator(name, context, parent, factories, metadata) {
 75 |             {
 76 |                 // even in the case of an unmapped aggregator, validate the
 77 |                 // order
 78 |                 order.validate(this);
 79 |             }
 80 | 
 81 |             @Override
 82 |             public InternalAggregation buildEmptyAggregation() { return aggregation; }
 83 |         };
 84 |     }
 85 | 
 86 |     @Override
 87 |     protected Aggregator doCreateInternal(Aggregator parent, CardinalityUpperBound cardinality, Map<String, Object> metadata
 88 |     ) throws IOException {
 89 | 
 90 |         DateHierarchyAggregator.BucketCountThresholds bucketCountThresholds = new
 91 |                 DateHierarchyAggregator.BucketCountThresholds(this.bucketCountThresholds);
 92 |         if (!InternalOrder.isKeyOrder(order)
 93 |                 && bucketCountThresholds.getShardSize() == DateHierarchyAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
 94 |             // The user has not made a shardSize selection. Use default
 95 |             // heuristic to avoid any wrong-ranking caused by distributed
 96 |             // counting
 97 |             bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize()));
 98 |         }
 99 |         bucketCountThresholds.ensureValidity();
100 |         return new DateHierarchyAggregator(
101 |                 name, factories, context, (ValuesSource.Numeric) config.getValuesSource(),
102 |                 order, minDocCount, bucketCountThresholds, preparedRoundings, parent, cardinality, metadata);
103 |     }
104 | }
105 | 
106 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/InternalDateHierarchy.java:
--------------------------------------------------------------------------------
  1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
  2 | 
  3 | import org.apache.lucene.util.BytesRef;
  4 | import org.elasticsearch.common.io.stream.StreamInput;
  5 | import org.elasticsearch.common.io.stream.StreamOutput;
  6 | import org.elasticsearch.xcontent.XContentBuilder;
  7 | import org.elasticsearch.search.aggregations.Aggregations;
  8 | import org.elasticsearch.search.aggregations.BucketOrder;
  9 | import org.elasticsearch.search.aggregations.InternalAggregation;
 10 | import org.elasticsearch.search.aggregations.InternalAggregations;
 11 | import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
 12 | import org.elasticsearch.search.aggregations.InternalOrder;
 13 | import org.elasticsearch.search.aggregations.KeyComparable;
 14 | import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
 15 | 
 16 | import java.io.IOException;
 17 | import java.util.ArrayList;
 18 | import java.util.Iterator;
 19 | import java.util.LinkedHashMap;
 20 | import java.util.List;
 21 | import java.util.Map;
 22 | import java.util.Objects;
 23 | 
 24 | /**
 25 |  * An internal implementation of {@link InternalMultiBucketAggregation}
 26 |  * which extends {@link org.elasticsearch.search.aggregations.Aggregation}.
 27 |  * Mainly, returns the builder and makes the reduce of buckets.
 28 |  */
 29 | public class InternalDateHierarchy extends InternalMultiBucketAggregation<InternalDateHierarchy,
 30 |         InternalDateHierarchy.InternalBucket> {
 31 | 
 32 |     /**
 33 |      * The bucket class of InternalDateHierarchy.
 34 |      * @see MultiBucketsAggregation.Bucket
 35 |      */
 36 |     public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucket implements
 37 |             KeyComparable<InternalBucket> {
 38 | 
 39 |         BytesRef key;
 40 |         String name;
 41 |         long bucketOrd;
 42 |         protected String[] paths;
 43 |         protected long docCount;
 44 |         protected InternalAggregations aggregations;
 45 |         protected int level;
 46 | 
 47 |         public InternalBucket(long docCount, InternalAggregations aggregations, BytesRef key, String name, int level, String[] paths) {
 48 |             this.key = key;
 49 |             this.name = name;
 50 |             this.docCount = docCount;
 51 |             this.aggregations = aggregations;
 52 |             this.level = level;
 53 |             this.paths = paths;
 54 |         }
 55 | 
 56 |         /**
 57 |          * Read from a stream.
 58 |          */
 59 |         public InternalBucket(StreamInput in) throws IOException {
 60 |             key = in.readBytesRef();
 61 |             name = in.readString();
 62 |             docCount = in.readLong();
 63 |             aggregations = InternalAggregations.readFrom(in);
 64 |             level = in.readInt();
 65 |             int pathsSize = in.readInt();
 66 |             paths = new String[pathsSize];
 67 |             for (int i=0; i < pathsSize; i++) {
 68 |                 paths[i] = in.readString();
 69 |             }
 70 |         }
 71 | 
 72 |         /**
 73 |          * Write to a stream.
 74 |          */
 75 |         @Override
 76 |         public void writeTo(StreamOutput out) throws IOException {
 77 |             out.writeBytesRef(key);
 78 |             out.writeString(name);
 79 |             out.writeLong(docCount);
 80 |             aggregations.writeTo(out);
 81 |             out.writeInt(level);
 82 |             out.writeInt(paths.length);
 83 |             for (String path: paths) {
 84 |                 out.writeString(path);
 85 |             }
 86 |         }
 87 | 
 88 |         @Override
 89 |         public Object getKey() {
 90 |             return key;
 91 |         }
 92 | 
 93 |         @Override
 94 |         public String getKeyAsString() {
 95 |             return key.utf8ToString();
 96 |         }
 97 | 
 98 |         @Override
 99 |         public int compareKey(InternalBucket other) {
100 |             return key.compareTo(other.key);
101 |         }
102 | 
103 |         @Override
104 |         public long getDocCount() {
105 |             return docCount;
106 |         }
107 | 
108 |         @Override
109 |         public Aggregations getAggregations() {
110 |             return aggregations;
111 |         }
112 | 
113 |         @Override
114 |         public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
115 |             builder.startObject();
116 |             builder.field(CommonFields.DOC_COUNT.getPreferredName(), docCount);
117 |             aggregations.toXContentInternal(builder, params);
118 |             builder.endObject();
119 |             return builder;
120 |         }
121 |     }
122 | 
123 | 
124 |     private List<InternalBucket> buckets;
125 |     private BucketOrder order;
126 |     private final int requiredSize;
127 |     private final int shardSize;
128 |     private final long otherHierarchyNodes;
129 |     private final long minDocCount;
130 | 
131 |     public InternalDateHierarchy(
132 |             String name,
133 |             List<InternalBucket> buckets,
134 |             BucketOrder order,
135 |             long minDocCount,
136 |             int requiredSize,
137 |             int shardSize,
138 |             long otherHierarchyNodes,
139 |             Map<String, Object> metadata
140 |     ) {
141 |         super(name, metadata);
142 |         this.buckets = buckets;
143 |         this.order = order;
144 |         this.minDocCount = minDocCount;
145 |         this.requiredSize = requiredSize;
146 |         this.shardSize = shardSize;
147 |         this.otherHierarchyNodes = otherHierarchyNodes;
148 |     }
149 | 
150 |     /**
151 |      * Read from a stream.
152 |      */
153 |     public InternalDateHierarchy(StreamInput in) throws IOException {
154 |         super(in);
155 |         order = InternalOrder.Streams.readOrder(in);
156 |         minDocCount = in.readVLong();
157 |         requiredSize = readSize(in);
158 |         shardSize = readSize(in);
159 |         otherHierarchyNodes = in.readVLong();
160 |         int bucketsSize = in.readInt();
161 |         this.buckets = new ArrayList<>(bucketsSize);
162 |         for (int i=0; i<bucketsSize; i++) {
163 |             this.buckets.add(new InternalBucket(in));
164 |         }
165 |     }
166 | 
167 |     /**
168 |      * Write to a stream.
169 |      */
170 |     @Override
171 |     protected void doWriteTo(StreamOutput out) throws IOException {
172 |         InternalOrder.Streams.writeOrder(order, out);
173 |         out.writeVLong(minDocCount);
174 |         writeSize(requiredSize, out);
175 |         writeSize(shardSize, out);
176 |         out.writeVLong(otherHierarchyNodes);
177 |         out.writeInt(buckets.size());
178 |         for (InternalBucket bucket: buckets) {
179 |             bucket.writeTo(out);
180 |         }
181 |     }
182 | 
183 |     @Override
184 |     public String getWriteableName() {
185 |         return DateHierarchyAggregationBuilder.NAME;
186 |     }
187 | 
188 |     protected int getShardSize() {
189 |         return shardSize;
190 |     }
191 | 
192 |     public long getSumOtherHierarchyNodes() {
193 |         return otherHierarchyNodes;
194 |     }
195 | 
196 |     @Override
197 |     public InternalDateHierarchy create(List<InternalBucket> buckets) {
198 |         return new InternalDateHierarchy(
199 |                 this.name, buckets, order, minDocCount, requiredSize, shardSize, otherHierarchyNodes,
200 |                 this.metadata);
201 |     }
202 | 
203 |     @Override
204 |     public InternalBucket createBucket(InternalAggregations aggregations, InternalBucket prototype) {
205 |         return new InternalBucket(prototype.docCount, aggregations, prototype.key, prototype.name, prototype.level, prototype.paths);
206 |     }
207 | 
208 |     @Override
209 |     public List<InternalBucket> getBuckets() {
210 |         return buckets;
211 |     }
212 | 
213 |     /**
214 |      * Reduces the given aggregations to a single one and returns it.
215 |      */
216 |     @Override
217 |     public InternalDateHierarchy reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
218 |         Map<BytesRef, List<InternalBucket>> buckets = null;
219 |         long otherHierarchyNodes = 0;
220 | 
221 |         // extract buckets from aggregations
222 |         for (InternalAggregation aggregation : aggregations) {
223 |             InternalDateHierarchy dateHierarchy = (InternalDateHierarchy) aggregation;
224 |             if (buckets == null) {
225 |                 buckets = new LinkedHashMap<>();
226 |             }
227 | 
228 |             otherHierarchyNodes += dateHierarchy.getSumOtherHierarchyNodes();
229 | 
230 |             for (InternalBucket bucket : dateHierarchy.buckets) {
231 |                 List<InternalBucket> existingBuckets = buckets.get(bucket.key);
232 |                 if (existingBuckets == null) {
233 |                     existingBuckets = new ArrayList<>(aggregations.size());
234 |                     buckets.put(bucket.key, existingBuckets);
235 |                 }
236 |                 existingBuckets.add(bucket);
237 |             }
238 |         }
239 | 
240 |         // reduce and sort buckets depending of ordering rules
241 |         final int size = !reduceContext.isFinalReduce() ? buckets.size() : Math.min(requiredSize, buckets.size());
242 |         PathSortedTree<String, InternalBucket> ordered = new PathSortedTree<>(order.comparator(), size);
243 |         for (List<InternalBucket> sameTermBuckets : buckets.values()) {
244 | 
245 |             final InternalBucket b = reduceBucket(sameTermBuckets, reduceContext);
246 |             if (b.getDocCount() >= minDocCount || !reduceContext.isFinalReduce()) {
247 |                 reduceContext.consumeBucketsAndMaybeBreak(1);
248 |                 ordered.add(b.paths, b);
249 |             } else {
250 |                 reduceContext.consumeBucketsAndMaybeBreak(-countInnerBucket(b));
251 |             }
252 |         }
253 | 
254 |         long sum_other_hierarchy_nodes = ordered.getFullSize() - size + otherHierarchyNodes;
255 |         return new InternalDateHierarchy(getName(), ordered.getAsList(), order, minDocCount, requiredSize, shardSize,
256 |                 sum_other_hierarchy_nodes, getMetadata());
257 |     }
258 | 
259 |     @Override
260 |     protected InternalBucket reduceBucket(List<InternalBucket> buckets, ReduceContext context) {
261 |         List<InternalAggregations> aggregationsList = new ArrayList<>(buckets.size());
262 |         InternalBucket reduced = null;
263 |         for (InternalBucket bucket : buckets) {
264 |             if (reduced == null) {
265 |                 reduced = bucket;
266 |             } else {
267 |                 reduced.docCount += bucket.docCount;
268 |             }
269 |             aggregationsList.add(bucket.aggregations);
270 |         }
271 |         reduced.aggregations = InternalAggregations.reduce(aggregationsList, context);
272 |         return reduced;
273 |     }
274 | 
275 |     @Override
276 |     public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
277 |         Iterator<InternalBucket> bucketIterator = buckets.iterator();
278 |         builder.startArray(CommonFields.BUCKETS.getPreferredName());
279 |         InternalBucket prevBucket = null;
280 |         InternalBucket currentBucket = null;
281 |         while (bucketIterator.hasNext()) {
282 |             currentBucket = bucketIterator.next();
283 | 
284 |             if (prevBucket != null) {
285 |                 if (prevBucket.level == currentBucket.level) {
286 |                     builder.endObject();
287 |                 } else if (prevBucket.level < currentBucket.level) {
288 |                     builder.startObject(name);
289 |                     builder.startArray(CommonFields.BUCKETS.getPreferredName());
290 |                 } else {
291 |                     for (int i = currentBucket.level; i < prevBucket.level; i++) {
292 |                         builder.endObject();
293 |                         builder.endArray();
294 |                         builder.endObject();
295 |                     }
296 |                     builder.endObject();
297 |                 }
298 |             }
299 | 
300 |             builder.startObject();
301 |             builder.field(CommonFields.KEY.getPreferredName(), currentBucket.name);
302 |             builder.field(CommonFields.DOC_COUNT.getPreferredName(), currentBucket.docCount);
303 |             currentBucket.getAggregations().toXContentInternal(builder, params);
304 | 
305 |             prevBucket = currentBucket;
306 |         }
307 | 
308 |         if (currentBucket != null) {
309 |             for (int i=0; i < currentBucket.level; i++) {
310 |                 builder.endObject();
311 |                 builder.endArray();
312 |                 builder.endObject();
313 |             }
314 |             builder.endObject();
315 |         }
316 | 
317 |         builder.endArray();
318 |         return builder;
319 |     }
320 | 
321 |     @Override
322 |     public int hashCode() {
323 |         return Objects.hash(buckets, order, requiredSize, shardSize, otherHierarchyNodes, minDocCount);
324 |     }
325 | 
326 |     @Override
327 |     public boolean equals(Object obj) {
328 |         InternalDateHierarchy that = (InternalDateHierarchy) obj;
329 |         return Objects.equals(buckets, that.buckets)
330 |                 && Objects.equals(order, that.order)
331 |                 && Objects.equals(minDocCount, that.minDocCount)
332 |                 && Objects.equals(requiredSize, that.requiredSize)
333 |                 && Objects.equals(shardSize, that.shardSize)
334 |                 && Objects.equals(otherHierarchyNodes, that.otherHierarchyNodes);
335 |     }
336 | }
337 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/InternalPathHierarchy.java:
--------------------------------------------------------------------------------
  1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
  2 | 
  3 | import org.apache.lucene.util.BytesRef;
  4 | import org.elasticsearch.xcontent.ParseField;
  5 | import org.elasticsearch.common.io.stream.StreamInput;
  6 | import org.elasticsearch.common.io.stream.StreamOutput;
  7 | import org.elasticsearch.xcontent.XContentBuilder;
  8 | import org.elasticsearch.search.aggregations.Aggregation;
  9 | import org.elasticsearch.search.aggregations.Aggregations;
 10 | import org.elasticsearch.search.aggregations.BucketOrder;
 11 | import org.elasticsearch.search.aggregations.InternalAggregation;
 12 | import org.elasticsearch.search.aggregations.InternalAggregations;
 13 | import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
 14 | import org.elasticsearch.search.aggregations.InternalOrder;
 15 | import org.elasticsearch.search.aggregations.KeyComparable;
 16 | import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
 17 | 
 18 | import java.io.IOException;
 19 | import java.util.Arrays;
 20 | import java.util.ArrayList;
 21 | import java.util.Iterator;
 22 | import java.util.List;
 23 | import java.util.Map;
 24 | import java.util.Objects;
 25 | import java.util.TreeMap;
 26 | 
 27 | /**
 28 |  * An internal implementation of {@link InternalMultiBucketAggregation} which extends {@link Aggregation}.
 29 |  * Mainly, returns the builder and makes the reduce of buckets.
 30 |  */
 31 | public class InternalPathHierarchy extends InternalMultiBucketAggregation<InternalPathHierarchy,
 32 |         InternalPathHierarchy.InternalBucket> {
 33 |     protected static final ParseField SUM_OF_OTHER_HIERARCHY_NODES = new ParseField("sum_other_hierarchy_nodes");
 34 |     protected static final ParseField PATHS = new ParseField("path");
 35 | 
 36 |     /**
 37 |      * The bucket class of InternalPathHierarchy.
 38 |      * @see MultiBucketsAggregation.Bucket
 39 |      */
 40 |     public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucket implements
 41 |             KeyComparable<InternalBucket> {
 42 | 
 43 |         BytesRef termBytes;
 44 |         long bucketOrd;
 45 |         protected String[] paths;
 46 |         protected long docCount;
 47 |         protected InternalAggregations aggregations;
 48 |         protected int level;
 49 |         protected int minDepth;
 50 |         protected String basename;
 51 | 
 52 |         public InternalBucket(long docCount, InternalAggregations aggregations, String basename,
 53 |                               BytesRef term, int level, int minDepth, String[] paths) {
 54 |             termBytes = term;
 55 |             this.docCount = docCount;
 56 |             this.aggregations = aggregations;
 57 |             this.level = level;
 58 |             this.minDepth = minDepth;
 59 |             this.basename = basename;
 60 |             this.paths = paths;
 61 |         }
 62 | 
 63 |         /**
 64 |          * Read from a stream.
 65 |          */
 66 |         public InternalBucket(StreamInput in) throws IOException {
 67 |             termBytes = in.readBytesRef();
 68 |             docCount = in.readLong();
 69 |             aggregations = InternalAggregations.readFrom(in);
 70 |             level = in.readInt();
 71 |             minDepth = in.readInt();
 72 |             basename = in.readString();
 73 |             int pathsSize = in.readInt();
 74 |             paths = new String[pathsSize];
 75 |             for (int i=0; i < pathsSize; i++) {
 76 |                 paths[i] = in.readString();
 77 |             }
 78 |         }
 79 | 
 80 |         /**
 81 |          * Write to a stream.
 82 |          */
 83 |         @Override
 84 |         public void writeTo(StreamOutput out) throws IOException {
 85 |             out.writeBytesRef(termBytes);
 86 |             out.writeLong(docCount);
 87 |             aggregations.writeTo(out);
 88 |             out.writeInt(level);
 89 |             out.writeInt(minDepth);
 90 |             out.writeString(basename);
 91 |             out.writeInt(paths.length);
 92 |             for (String path: paths) {
 93 |                 out.writeString(path);
 94 |             }
 95 |         }
 96 | 
 97 |         @Override
 98 |         public String getKey() {
 99 |             return termBytes.utf8ToString();
100 |         }
101 | 
102 |         @Override
103 |         public String getKeyAsString() {
104 |             return termBytes.utf8ToString();
105 |         }
106 | 
107 |         @Override
108 |         public int compareKey(InternalBucket other) {
109 |             return termBytes.compareTo(other.termBytes);
110 |         }
111 | 
112 |         @Override
113 |         public long getDocCount() {
114 |             return docCount;
115 |         }
116 | 
117 |         @Override
118 |         public Aggregations getAggregations() {
119 |             return aggregations;
120 |         }
121 | 
122 |         @Override
123 |         public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
124 |             builder.startObject();
125 |             builder.field(CommonFields.DOC_COUNT.getPreferredName(), docCount);
126 |             aggregations.toXContentInternal(builder, params);
127 |             builder.endObject();
128 |             return builder;
129 |         }
130 |     }
131 | 
132 | 
133 |     private List<InternalPathHierarchy.InternalBucket> buckets;
134 |     private BytesRef separator;
135 |     private BucketOrder order;
136 |     private final int requiredSize;
137 |     private final int shardSize;
138 |     private final long otherHierarchyNodes;
139 |     private final long minDocCount;
140 | 
141 |     public InternalPathHierarchy(
142 |             String name,
143 |             List<InternalBucket> buckets,
144 |             BucketOrder order,
145 |             long minDocCount,
146 |             int requiredSize,
147 |             int shardSize,
148 |             long otherHierarchyNodes,
149 |             BytesRef separator,
150 |             Map<String, Object> metadata
151 |     ) {
152 |         super(name, metadata);
153 |         this.buckets = buckets;
154 |         this.order = order;
155 |         this.minDocCount = minDocCount;
156 |         this.requiredSize = requiredSize;
157 |         this.shardSize = shardSize;
158 |         this.otherHierarchyNodes = otherHierarchyNodes;
159 |         this.separator = separator;
160 |     }
161 | 
162 |     /**
163 |      * Read from a stream.
164 |      */
165 |     public InternalPathHierarchy(StreamInput in) throws IOException {
166 |         super(in);
167 |         order = InternalOrder.Streams.readOrder(in);
168 |         minDocCount = in.readVLong();
169 |         requiredSize = readSize(in);
170 |         shardSize = readSize(in);
171 |         otherHierarchyNodes = in.readVLong();
172 |         separator = in.readBytesRef();
173 |         int bucketsSize = in.readInt();
174 |         this.buckets = new ArrayList<>(bucketsSize);
175 |         for (int i=0; i<bucketsSize; i++) {
176 |             this.buckets.add(new InternalBucket(in));
177 |         }
178 |     }
179 | 
180 |     /**
181 |      * Write to a stream.
182 |      */
183 |     @Override
184 |     protected void doWriteTo(StreamOutput out) throws IOException {
185 |         InternalOrder.Streams.writeOrder(order, out);
186 |         out.writeVLong(minDocCount);
187 |         writeSize(requiredSize, out);
188 |         writeSize(shardSize, out);
189 |         out.writeVLong(otherHierarchyNodes);
190 |         out.writeBytesRef(separator);
191 |         out.writeInt(buckets.size());
192 |         for (InternalBucket bucket: buckets) {
193 |             bucket.writeTo(out);
194 |         }
195 |     }
196 | 
197 |     @Override
198 |     public String getWriteableName() {
199 |         return PathHierarchyAggregationBuilder.NAME;
200 |     }
201 | 
202 |     protected int getShardSize() {
203 |         return shardSize;
204 |     }
205 | 
206 |     public long getSumOtherHierarchyNodes() {
207 |         return otherHierarchyNodes;
208 |     }
209 | 
210 |     @Override
211 |     public InternalPathHierarchy create(List<InternalBucket> buckets) {
212 |         return new InternalPathHierarchy(this.name, buckets, order, minDocCount, requiredSize, shardSize, otherHierarchyNodes,
213 |                 this.separator, this.metadata);
214 |     }
215 | 
216 |     @Override
217 |     public InternalBucket createBucket(InternalAggregations aggregations, InternalBucket prototype) {
218 |         return new InternalBucket(prototype.docCount, aggregations, prototype.basename, prototype.termBytes,
219 |                 prototype.level, prototype.minDepth, prototype.paths);
220 |     }
221 | 
222 |     @Override
223 |     public List<InternalBucket> getBuckets() {
224 |         return buckets;
225 |     }
226 | 
227 |     /**
228 |      * Reduces the given aggregations to a single one and returns it.
229 |      */
230 |     @Override
231 |     public InternalPathHierarchy reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
232 |         Map<BytesRef, List<InternalBucket>> buckets = null;
233 |         long otherHierarchyNodes = 0;
234 | 
235 |         // extract buckets from aggregations
236 |         for (InternalAggregation aggregation : aggregations) {
237 |             InternalPathHierarchy pathHierarchy = (InternalPathHierarchy) aggregation;
238 |             if (buckets == null) {
239 |                 buckets = new TreeMap<>();
240 |             }
241 | 
242 |             otherHierarchyNodes += pathHierarchy.getSumOtherHierarchyNodes();
243 | 
244 |             for (InternalBucket bucket : pathHierarchy.buckets) {
245 |                 List<InternalBucket> existingBuckets = buckets.get(bucket.termBytes);
246 |                 if (existingBuckets == null) {
247 |                     existingBuckets = new ArrayList<>(aggregations.size());
248 |                     buckets.put(bucket.termBytes, existingBuckets);
249 |                 }
250 |                 existingBuckets.add(bucket);
251 |             }
252 |         }
253 | 
254 |         // reduce and sort buckets depending of ordering rules
255 |         final int size = !reduceContext.isFinalReduce() ? buckets.size() : Math.min(requiredSize, buckets.size());
256 |         PathSortedTree<String, InternalBucket> ordered = new PathSortedTree<>(order.comparator(), size);
257 |         for (List<InternalBucket> sameTermBuckets : buckets.values()) {
258 |             final InternalBucket b = reduceBucket(sameTermBuckets, reduceContext);
259 |             if (b.getDocCount() >= minDocCount || !reduceContext.isFinalReduce()) {
260 |                 reduceContext.consumeBucketsAndMaybeBreak(1);
261 |                 String [] pathsForTree;
262 |                 if (b.minDepth > 0) {
263 |                     pathsForTree = Arrays.copyOfRange(b.paths, b.minDepth, b.paths.length);
264 |                 } else {
265 |                     pathsForTree = b.paths;
266 |                 }
267 |                 ordered.add(pathsForTree, b);
268 |             } else {
269 |                 reduceContext.consumeBucketsAndMaybeBreak(-countInnerBucket(b));
270 |             }
271 |         }
272 | 
273 |         long sum_other_hierarchy_nodes = ordered.getFullSize() - size + otherHierarchyNodes;
274 |         return new InternalPathHierarchy(getName(), ordered.getAsList(), order, minDocCount, requiredSize, shardSize,
275 |                 sum_other_hierarchy_nodes, separator, getMetadata());
276 |     }
277 | 
278 |     /**
279 |      * Utility method of InternalPathHierarchy.doReduce()
280 |      */
281 |     @Override
282 |     protected InternalBucket reduceBucket(List<InternalBucket> buckets, ReduceContext context) {
283 |         List<InternalAggregations> aggregationsList = new ArrayList<>(buckets.size());
284 |         InternalBucket reduced = null;
285 |         for (InternalBucket bucket : buckets) {
286 |             if (reduced == null) {
287 |                 reduced = bucket;
288 |             } else {
289 |                 reduced.docCount += bucket.docCount;
290 |             }
291 |             aggregationsList.add(bucket.aggregations);
292 |         }
293 |         reduced.aggregations = InternalAggregations.reduce(aggregationsList, context);
294 |         return reduced;
295 |     }
296 | 
297 |     @Override
298 |     public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
299 | //        builder.field(SUM_OF_OTHER_HIERARCHY_NODES.getPreferredName(), otherHierarchyNodes);
300 |         Iterator<InternalBucket> bucketIterator = buckets.iterator();
301 |         builder.startArray(CommonFields.BUCKETS.getPreferredName());
302 |         InternalBucket prevBucket = null;
303 |         InternalBucket currentBucket = null;
304 |         while (bucketIterator.hasNext()) {
305 |             currentBucket = bucketIterator.next();
306 | 
307 |             if (prevBucket != null) {
308 |                 if (prevBucket.level == currentBucket.level) {
309 |                     builder.endObject();
310 |                 } else if (prevBucket.level < currentBucket.level) {
311 |                     builder.startObject(name);
312 |                     builder.startArray(CommonFields.BUCKETS.getPreferredName());
313 |                 } else {
314 |                     for (int i = currentBucket.level; i < prevBucket.level; i++) {
315 |                         builder.endObject();
316 |                         builder.endArray();
317 |                         builder.endObject();
318 |                     }
319 |                     builder.endObject();
320 |                 }
321 |             }
322 | 
323 |             builder.startObject();
324 |             builder.field(CommonFields.KEY.getPreferredName(), currentBucket.basename);
325 |             builder.field(CommonFields.DOC_COUNT.getPreferredName(), currentBucket.docCount);
326 |             builder.field(PATHS.getPreferredName(), Arrays.copyOf(currentBucket.paths, currentBucket.paths.length -1));
327 |             currentBucket.getAggregations().toXContentInternal(builder, params);
328 | 
329 |             prevBucket = currentBucket;
330 |         }
331 | 
332 |         if (currentBucket != null) {
333 |             for (int i=0; i < currentBucket.level; i++) {
334 |                 builder.endObject();
335 |                 builder.endArray();
336 |                 builder.endObject();
337 |             }
338 |             builder.endObject();
339 |         }
340 | 
341 |         builder.endArray();
342 |         return builder;
343 |     }
344 | 
345 |     @Override
346 |     public int hashCode() {
347 |         return Objects.hash(buckets, separator, order, requiredSize, shardSize, otherHierarchyNodes, minDocCount);
348 |     }
349 | 
350 |     @Override
351 |     public boolean equals(Object obj) {
352 |         InternalPathHierarchy that = (InternalPathHierarchy) obj;
353 |         return Objects.equals(buckets, that.buckets)
354 |                 && Objects.equals(separator, that.separator)
355 |                 && Objects.equals(order, that.order)
356 |                 && Objects.equals(minDocCount, that.minDocCount)
357 |                 && Objects.equals(requiredSize, that.requiredSize)
358 |                 && Objects.equals(shardSize, that.shardSize)
359 |                 && Objects.equals(otherHierarchyNodes, that.otherHierarchyNodes);
360 |     }
361 | }
362 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/PathHierarchyAggregationBuilder.java:
--------------------------------------------------------------------------------
  1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
  2 | 
  3 | import org.elasticsearch.Version;
  4 | import org.elasticsearch.xcontent.ParseField;
  5 | import org.elasticsearch.common.io.stream.StreamInput;
  6 | import org.elasticsearch.common.io.stream.StreamOutput;
  7 | import org.elasticsearch.xcontent.ObjectParser;
  8 | import org.elasticsearch.xcontent.XContentBuilder;
  9 | import org.elasticsearch.xcontent.XContentParser;
 10 | import org.elasticsearch.search.aggregations.support.AggregationContext;
 11 | import org.elasticsearch.search.aggregations.AggregationBuilder;
 12 | import org.elasticsearch.search.aggregations.AggregatorFactories;
 13 | import org.elasticsearch.search.aggregations.AggregatorFactory;
 14 | import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
 15 | import org.elasticsearch.search.aggregations.BucketOrder;
 16 | import org.elasticsearch.search.aggregations.InternalOrder;
 17 | import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 18 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder;
 19 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
 20 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
 21 | import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry;
 22 | import org.elasticsearch.search.aggregations.support.ValuesSourceType;
 23 | 
 24 | import java.io.IOException;
 25 | import java.util.List;
 26 | import java.util.Map;
 27 | import java.util.Objects;
 28 | 
 29 | 
 30 | /**
 31 |  * The builder of the aggregatorFactory. Also implements the parsing of the request.
 32 |  */
 33 | public class PathHierarchyAggregationBuilder extends ValuesSourceAggregationBuilder<PathHierarchyAggregationBuilder> {
 34 |     public static final String NAME = "path_hierarchy";
 35 |     public static final ValuesSourceRegistry.RegistryKey<PathHierarchyAggregationSupplier> REGISTRY_KEY =
 36 |             new ValuesSourceRegistry.RegistryKey<>(NAME, PathHierarchyAggregationSupplier.class);
 37 | 
 38 |     public static final ParseField SEPARATOR_FIELD = new ParseField("separator");
 39 |     public static final ParseField MIN_DEPTH_FIELD = new ParseField("min_depth");
 40 |     public static final ParseField MAX_DEPTH_FIELD = new ParseField("max_depth");
 41 |     public static final ParseField KEEP_BLANK_PATH = new ParseField("keep_blank_path");
 42 |     public static final ParseField DEPTH_FIELD = new ParseField("depth");
 43 |     public static final ParseField ORDER_FIELD = new ParseField("order");
 44 |     public static final ParseField SIZE_FIELD = new ParseField("size");
 45 |     public static final ParseField SHARD_SIZE_FIELD = new ParseField("shard_size");
 46 |     public static final ParseField MIN_DOC_COUNT_FIELD = new ParseField("min_doc_count");
 47 | 
 48 |     public static final PathHierarchyAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS = new
 49 |             PathHierarchyAggregator.BucketCountThresholds(10, -1);
 50 |     public static final ObjectParser<PathHierarchyAggregationBuilder, String> PARSER =
 51 |             ObjectParser.fromBuilder(NAME, PathHierarchyAggregationBuilder::new);
 52 |     static {
 53 |         ValuesSourceAggregationBuilder.declareFields(PARSER, true, true, false);
 54 | 
 55 |         PARSER.declareString(PathHierarchyAggregationBuilder::separator, SEPARATOR_FIELD);
 56 |         PARSER.declareInt(PathHierarchyAggregationBuilder::minDepth, MIN_DEPTH_FIELD);
 57 |         PARSER.declareInt(PathHierarchyAggregationBuilder::maxDepth, MAX_DEPTH_FIELD);
 58 |         PARSER.declareBoolean(PathHierarchyAggregationBuilder::keepBlankPath, KEEP_BLANK_PATH);
 59 |         PARSER.declareInt(PathHierarchyAggregationBuilder::depth, DEPTH_FIELD);
 60 |         PARSER.declareInt(PathHierarchyAggregationBuilder::size, SIZE_FIELD);
 61 |         PARSER.declareLong(PathHierarchyAggregationBuilder::minDocCount, MIN_DOC_COUNT_FIELD);
 62 |         PARSER.declareInt(PathHierarchyAggregationBuilder::shardSize, SHARD_SIZE_FIELD);
 63 |         PARSER.declareObjectArray(PathHierarchyAggregationBuilder::order, (p, c) -> InternalOrder.Parser.parseOrderParam(p),
 64 |                 ORDER_FIELD);
 65 |     }
 66 | 
 67 |     public static AggregationBuilder parse(String aggregationName, XContentParser parser) throws IOException {
 68 |         return PARSER.parse(parser, new PathHierarchyAggregationBuilder(aggregationName), null);
 69 |     }
 70 | 
 71 |     public static void registerAggregators(ValuesSourceRegistry.Builder builder) {
 72 |         PathHierarchyAggregatorFactory.registerAggregators(builder);
 73 |     }
 74 | 
 75 |     private static final String DEFAULT_SEPARATOR = "/";
 76 |     private static final int DEFAULT_MIN_DEPTH = 0;
 77 |     private static final int DEFAULT_MAX_DEPTH = 3;
 78 |     private static final boolean DEFAULT_KEEP_BLANK_PATH = false;
 79 |     private String separator = DEFAULT_SEPARATOR;
 80 |     private int minDepth = DEFAULT_MIN_DEPTH;
 81 |     private int maxDepth = DEFAULT_MAX_DEPTH;
 82 |     private boolean keepBlankPath = DEFAULT_KEEP_BLANK_PATH;
 83 |     private long minDocCount = 0;
 84 |     private int depth = -1;
 85 |     private BucketOrder order = BucketOrder.compound(BucketOrder.count(false)); // automatically adds tie-breaker key asc order
 86 |     private PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds = new PathHierarchyAggregator.BucketCountThresholds(
 87 |             DEFAULT_BUCKET_COUNT_THRESHOLDS);
 88 | 
 89 | 
 90 |     private PathHierarchyAggregationBuilder(String name) {
 91 |         super(name);
 92 |     }
 93 | 
 94 |     @Override
 95 |     protected boolean serializeTargetValueType(Version version) {
 96 |         return true;
 97 |     }
 98 | 
 99 |     /**
100 |      * Read from a stream
101 |      */
102 |     public PathHierarchyAggregationBuilder(StreamInput in) throws IOException {
103 |         super(in);
104 |         bucketCountThresholds = new PathHierarchyAggregator.BucketCountThresholds(in);
105 |         separator = in.readString();
106 |         minDocCount = in.readVLong();
107 |         minDepth = in.readOptionalVInt();
108 |         maxDepth = in.readOptionalVInt();
109 |         keepBlankPath = in.readOptionalBoolean();
110 |         depth = in.readOptionalVInt();
111 |         order = InternalOrder.Streams.readOrder(in);
112 |     }
113 | 
114 |     private PathHierarchyAggregationBuilder(PathHierarchyAggregationBuilder clone, Builder factoriesBuilder,
115 |                                            Map<String, Object> metadata) {
116 |         super(clone, factoriesBuilder, metadata);
117 |         separator = clone.separator;
118 |         minDepth = clone.minDepth;
119 |         maxDepth = clone.maxDepth;
120 |         keepBlankPath = clone.keepBlankPath;
121 |         depth = clone.depth;
122 |         order = clone.order;
123 |         minDocCount = clone.minDocCount;
124 |         this.bucketCountThresholds = new PathHierarchyAggregator.BucketCountThresholds(clone.bucketCountThresholds);
125 |     }
126 | 
127 |     @Override
128 |     protected AggregationBuilder shallowCopy(AggregatorFactories.Builder factoriesBuilder, Map<String, Object> metadata) {
129 |         return new PathHierarchyAggregationBuilder(this, factoriesBuilder, metadata);
130 |     }
131 | 
132 |     @Override
133 |     protected ValuesSourceType defaultValueSourceType() {
134 |         return CoreValuesSourceType.KEYWORD;
135 |     }
136 | 
137 |     /**
138 |      * Write to a stream
139 |      */
140 |     @Override
141 |     protected void innerWriteTo(StreamOutput out) throws IOException {
142 |         bucketCountThresholds.writeTo(out);
143 |         out.writeString(separator);
144 |         out.writeVLong(minDocCount);
145 |         out.writeOptionalVInt(minDepth);
146 |         out.writeOptionalVInt(maxDepth);
147 |         out.writeOptionalBoolean(keepBlankPath);
148 |         out.writeOptionalVInt(depth);
149 |         order.writeTo(out);
150 |     }
151 | 
152 |     private PathHierarchyAggregationBuilder separator(String separator) {
153 |         this.separator = separator;
154 |         return this;
155 |     }
156 | 
157 |     private PathHierarchyAggregationBuilder minDepth(int minDepth) {
158 |         this.minDepth = minDepth;
159 |         return this;
160 |     }
161 | 
162 |     private PathHierarchyAggregationBuilder maxDepth(int maxDepth) {
163 |         this.maxDepth = maxDepth;
164 |         return this;
165 |     }
166 | 
167 |     private PathHierarchyAggregationBuilder keepBlankPath(boolean keepBlankPath) {
168 |         this.keepBlankPath = keepBlankPath;
169 |         return this;
170 |     }
171 | 
172 |     private PathHierarchyAggregationBuilder depth(int depth) {
173 |         this.depth = depth;
174 |         return this;
175 |     }
176 | 
177 |     /** Set the order in which the buckets will be returned. It returns the builder so that calls
178 |      *  can be chained. A tie-breaker may be added to avoid non-deterministic ordering. */
179 |     private PathHierarchyAggregationBuilder order(BucketOrder order) {
180 |         if (order == null) {
181 |             throw new IllegalArgumentException("[order] must not be null: [" + name + "]");
182 |         }
183 |         if(order instanceof InternalOrder.CompoundOrder || InternalOrder.isKeyOrder(order)) {
184 |             this.order = order; // if order already contains a tie-breaker we are good to go
185 |         } else { // otherwise add a tie-breaker by using a compound order
186 |             this.order = BucketOrder.compound(order);
187 |         }
188 |         return this;
189 |     }
190 | 
191 |     private PathHierarchyAggregationBuilder order(List<BucketOrder> orders) {
192 |         if (orders == null) {
193 |             throw new IllegalArgumentException("[orders] must not be null: [" + name + "]");
194 |         }
195 |         // if the list only contains one order use that to avoid inconsistent xcontent
196 |         order(orders.size() > 1 ? BucketOrder.compound(orders) : orders.get(0));
197 |         return this;
198 |     }
199 | 
200 | 
201 |     /**
202 |      * Sets the size - indicating how many term buckets should be returned
203 |      * (defaults to 10)
204 |      */
205 |     public PathHierarchyAggregationBuilder size(int size) {
206 |         if (size <= 0) {
207 |             throw new IllegalArgumentException("[size] must be greater than 0. Found [" + size + "] in [" + name + "]");
208 |         }
209 |         bucketCountThresholds.setRequiredSize(size);
210 |         return this;
211 |     }
212 | 
213 |     /** Set the minimum count of matching documents that buckets need to have
214 |      *  and return this builder so that calls can be chained. */
215 |     public PathHierarchyAggregationBuilder minDocCount(long minDocCount) {
216 |         if (minDocCount < 0) {
217 |             throw new IllegalArgumentException(
218 |                     "[minDocCount] must be greater than or equal to 0. Found [" + minDocCount + "] in [" + name + "]");
219 |         }
220 |         this.minDocCount = minDocCount;
221 |         return this;
222 |     }
223 | 
224 |     /**
225 |      * Returns the number of term buckets currently configured
226 |      */
227 |     public int size() {
228 |         return bucketCountThresholds.getRequiredSize();
229 |     }
230 | 
231 |     @Override
232 |     public BucketCardinality bucketCardinality() {
233 |         return BucketCardinality.MANY;
234 |     }
235 | 
236 |     /**
237 |      * Sets the shard_size - indicating the number of term buckets each shard
238 |      * will return to the coordinating node (the node that coordinates the
239 |      * search execution). The higher the shard size is, the more accurate the
240 |      * results are.
241 |      */
242 |     public PathHierarchyAggregationBuilder shardSize(int shardSize) {
243 |         if (shardSize <= 0) {
244 |             throw new IllegalArgumentException(
245 |                     "[shardSize] must be greater than 0. Found [" + shardSize + "] in [" + name + "]");
246 |         }
247 |         bucketCountThresholds.setShardSize(shardSize);
248 |         return this;
249 |     }
250 | 
251 |     /**
252 |      * Returns the number of term buckets per shard that are currently configured
253 |      */
254 |     public int shardSize() {
255 |         return bucketCountThresholds.getShardSize();
256 |     }
257 | 
258 |     @Override
259 |     protected ValuesSourceAggregatorFactory innerBuild(AggregationContext context,
260 |                                                        ValuesSourceConfig config,
261 |                                                        AggregatorFactory parent,
262 |                                                        AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
263 | 
264 | 
265 |         if (minDepth > maxDepth)
266 |             throw new IllegalArgumentException("[minDepth] (" + minDepth + ") must not be greater than [maxDepth] (" +
267 |                     maxDepth + ")");
268 | 
269 |         if (depth >= 0) {
270 |             if (minDepth > depth)
271 |                 throw new IllegalArgumentException("[minDepth] (" + minDepth + ") must not be greater than [depth] (" +
272 |                         depth + ")");
273 |             minDepth = depth;
274 |             maxDepth = depth;
275 |         }
276 | 
277 |         return new PathHierarchyAggregatorFactory(
278 |                 name,
279 |                 config,
280 |                 separator,
281 |                 minDepth,
282 |                 maxDepth,
283 |                 keepBlankPath,
284 |                 order,
285 |                 minDocCount,
286 |                 bucketCountThresholds,
287 |                 context,
288 |                 parent,
289 |                 subFactoriesBuilder,
290 |                 metadata);
291 |     }
292 | 
293 |     @Override
294 |     protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
295 |         builder.startObject();
296 | 
297 |         if (order != null) {
298 |             builder.field(ORDER_FIELD.getPreferredName());
299 |             order.toXContent(builder, params);
300 |         }
301 | 
302 |         builder.field(MIN_DOC_COUNT_FIELD.getPreferredName(), minDocCount);
303 | 
304 |         if (!separator.equals(DEFAULT_SEPARATOR)) {
305 |             builder.field(SEPARATOR_FIELD.getPreferredName(), separator);
306 |         }
307 | 
308 |         if (minDepth != DEFAULT_MIN_DEPTH) {
309 |             builder.field(MIN_DEPTH_FIELD.getPreferredName(), minDepth);
310 |         }
311 | 
312 |         if (maxDepth != DEFAULT_MAX_DEPTH) {
313 |             builder.field(MAX_DEPTH_FIELD.getPreferredName(), maxDepth);
314 |         }
315 | 
316 |         if (depth != 0) {
317 |             builder.field(DEPTH_FIELD.getPreferredName(), depth);
318 |         }
319 | 
320 |         return builder.endObject();
321 |     }
322 | 
323 |     @Override
324 |     public int hashCode() {
325 |         return Objects.hash(super.hashCode(), separator, minDepth, maxDepth, depth, order, minDocCount, bucketCountThresholds);
326 |     }
327 | 
328 |     @Override
329 |     public boolean equals(Object obj) {
330 |         if (this == obj) return true;
331 |         if (obj == null || getClass() != obj.getClass()) return false;
332 |         if (!super.equals(obj)) return false;
333 |         PathHierarchyAggregationBuilder other = (PathHierarchyAggregationBuilder) obj;
334 |         return Objects.equals(separator, other.separator)
335 |                 && Objects.equals(minDepth, other.minDepth)
336 |                 && Objects.equals(maxDepth, other.maxDepth)
337 |                 && Objects.equals(depth, other.depth)
338 |                 && Objects.equals(order, other.order)
339 |                 && Objects.equals(minDocCount, other.minDocCount)
340 |                 && Objects.equals(bucketCountThresholds, other.bucketCountThresholds);
341 |     }
342 | 
343 |     @Override
344 |     public String getType() {
345 |         return NAME;
346 |     }
347 | 
348 |     @Override
349 |     protected ValuesSourceRegistry.RegistryKey<?> getRegistryKey() { return REGISTRY_KEY; }
350 | }
351 | 
352 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/PathHierarchyAggregationSupplier.java:
--------------------------------------------------------------------------------
 1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
 2 | 
 3 | import org.apache.lucene.util.BytesRef;
 4 | import org.elasticsearch.search.aggregations.Aggregator;
 5 | import org.elasticsearch.search.aggregations.AggregatorFactories;
 6 | import org.elasticsearch.search.aggregations.BucketOrder;
 7 | import org.elasticsearch.search.aggregations.CardinalityUpperBound;
 8 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
 9 | import org.elasticsearch.search.internal.SearchContext;
10 | 
11 | import java.io.IOException;
12 | import java.util.Map;
13 | 
14 | @FunctionalInterface
15 | public interface PathHierarchyAggregationSupplier {
16 |     Aggregator build(String name,
17 |                      AggregatorFactories factories,
18 |                      BytesRef separator,
19 |                      int minDepth,
20 |                      int maxDepth,
21 |                      boolean keepBlankPath,
22 |                      BucketOrder order,
23 |                      long minDocCount,
24 |                      PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds,
25 |                      ValuesSourceConfig valuesSourceConfig,
26 |                      SearchContext aggregationContext,
27 |                      Aggregator parent,
28 |                      CardinalityUpperBound cardinality,
29 |                      Map<String, Object> metadata) throws IOException;
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/PathHierarchyAggregator.java:
--------------------------------------------------------------------------------
  1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
  2 | 
  3 | import org.apache.lucene.index.LeafReaderContext;
  4 | import org.apache.lucene.util.BytesRef;
  5 | import org.apache.lucene.util.BytesRefBuilder;
  6 | import org.elasticsearch.ElasticsearchException;
  7 | import org.elasticsearch.common.io.stream.StreamInput;
  8 | import org.elasticsearch.common.io.stream.StreamOutput;
  9 | import org.elasticsearch.common.io.stream.Writeable;
 10 | import org.elasticsearch.core.Releasables;
 11 | import org.elasticsearch.common.util.BytesRefHash;
 12 | import org.elasticsearch.xcontent.ToXContentFragment;
 13 | import org.elasticsearch.xcontent.XContentBuilder;
 14 | import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
 15 | import org.elasticsearch.search.aggregations.Aggregator;
 16 | import org.elasticsearch.search.aggregations.AggregatorFactories;
 17 | import org.elasticsearch.search.aggregations.BucketOrder;
 18 | import org.elasticsearch.search.aggregations.CardinalityUpperBound;
 19 | import org.elasticsearch.search.aggregations.InternalAggregation;
 20 | import org.elasticsearch.search.aggregations.LeafBucketCollector;
 21 | import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
 22 | import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
 23 | import org.elasticsearch.search.aggregations.support.ValuesSource;
 24 | import org.elasticsearch.search.aggregations.support.AggregationContext;
 25 | 
 26 | import java.io.IOException;
 27 | import java.util.Arrays;
 28 | import java.util.Comparator;
 29 | import java.util.Iterator;
 30 | import java.util.Map;
 31 | import java.util.Objects;
 32 | import java.util.regex.Pattern;
 33 | 
 34 | public class PathHierarchyAggregator extends BucketsAggregator {
 35 | 
 36 |     public PathHierarchyAggregator(String name,
 37 |                                    AggregatorFactories factories,
 38 |                                    AggregationContext context,
 39 |                                    ValuesSource valuesSource,
 40 |                                    BucketOrder order,
 41 |                                    long minDocCount,
 42 |                                    BucketCountThresholds bucketCountThresholds,
 43 |                                    BytesRef separator,
 44 |                                    int minDepth,
 45 |                                    Aggregator parent,
 46 |                                    CardinalityUpperBound cardinality,
 47 |                                    Map<String, Object> metadata
 48 |     ) throws IOException {
 49 |         super(name, factories, context, parent, cardinality, metadata);
 50 |         this.valuesSource = valuesSource;
 51 |         this.separator = separator;
 52 |         this.minDocCount = minDocCount;
 53 |         bucketOrds = new BytesRefHash(1, context.bigArrays());
 54 |         order.validate(this);
 55 |         this.order = order;
 56 |         this.partiallyBuiltBucketComparator = order == null ? null : order.partiallyBuiltBucketComparator(b -> b.bucketOrd, this);
 57 |         this.bucketCountThresholds = bucketCountThresholds;
 58 |         this.minDepth = minDepth;
 59 |     }
 60 | 
 61 |     public static class BucketCountThresholds implements Writeable, ToXContentFragment {
 62 |         private int requiredSize;
 63 |         private int shardSize;
 64 | 
 65 |         public BucketCountThresholds(int requiredSize, int shardSize) {
 66 |             this.requiredSize = requiredSize;
 67 |             this.shardSize = shardSize;
 68 |         }
 69 | 
 70 |         /**
 71 |          * Read from a stream.
 72 |          */
 73 |         public BucketCountThresholds(StreamInput in) throws IOException {
 74 |             requiredSize = in.readInt();
 75 |             shardSize = in.readInt();
 76 |         }
 77 | 
 78 |         @Override
 79 |         public void writeTo(StreamOutput out) throws IOException {
 80 |             out.writeInt(requiredSize);
 81 |             out.writeInt(shardSize);
 82 |         }
 83 | 
 84 |         public BucketCountThresholds(PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds) {
 85 |             this(bucketCountThresholds.requiredSize, bucketCountThresholds.shardSize);
 86 |         }
 87 | 
 88 |         public void ensureValidity() {
 89 |             // shard_size cannot be smaller than size as we need to at least fetch size entries from every shards in order to return size
 90 |             if (shardSize < requiredSize) {
 91 |                 setShardSize(requiredSize);
 92 |             }
 93 | 
 94 |             if (requiredSize <= 0 || shardSize <= 0) {
 95 |                 throw new ElasticsearchException("parameters [required_size] and [shard_size] must be >0 in path-hierarchy aggregation.");
 96 |             }
 97 |         }
 98 | 
 99 |         public int getRequiredSize() {
100 |             return requiredSize;
101 |         }
102 | 
103 |         public void setRequiredSize(int requiredSize) {
104 |             this.requiredSize = requiredSize;
105 |         }
106 | 
107 |         public int getShardSize() {
108 |             return shardSize;
109 |         }
110 | 
111 |         public void setShardSize(int shardSize) {
112 |             this.shardSize = shardSize;
113 |         }
114 | 
115 |         @Override
116 |         public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
117 |             builder.field(PathHierarchyAggregationBuilder.SIZE_FIELD.getPreferredName(), requiredSize);
118 |             if (shardSize != -1) {
119 |                 builder.field(PathHierarchyAggregationBuilder.SHARD_SIZE_FIELD.getPreferredName(), shardSize);
120 |             }
121 |             return builder;
122 |         }
123 | 
124 |         @Override
125 |         public int hashCode() {
126 |             return Objects.hash(requiredSize, shardSize);
127 |         }
128 | 
129 |         @Override
130 |         public boolean equals(Object obj) {
131 |             if (obj == null) {
132 |                 return false;
133 |             }
134 |             if (getClass() != obj.getClass()) {
135 |                 return false;
136 |             }
137 |             PathHierarchyAggregator.BucketCountThresholds other = (PathHierarchyAggregator.BucketCountThresholds) obj;
138 |             return Objects.equals(requiredSize, other.requiredSize)
139 |                     && Objects.equals(shardSize, other.shardSize);
140 |         }
141 |     }
142 | 
143 | 
144 |     private final ValuesSource valuesSource;
145 |     private final BytesRefHash bucketOrds;
146 |     private final BucketOrder order;
147 |     private final long minDocCount;
148 |     private final int minDepth;
149 |     protected final Comparator<InternalPathHierarchy.InternalBucket> partiallyBuiltBucketComparator;
150 |     private final BucketCountThresholds bucketCountThresholds;
151 |     private final BytesRef separator;
152 | 
153 |     /**
154 |      * The collector collects the docs, including or not some score (depending of the including of a Scorer) in the
155 |      * collect() process.
156 |      *
157 |      * The LeafBucketCollector is a "Per-leaf bucket collector". It collects docs for the account of buckets.
158 |      */
159 |     @Override
160 |     public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
161 |         if (valuesSource == null) {
162 |             return LeafBucketCollector.NO_OP_COLLECTOR;
163 |         }
164 |         final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
165 |         return new LeafBucketCollectorBase(sub, values) {
166 |             final BytesRefBuilder previous = new BytesRefBuilder();
167 |             /**
168 |              * Collect the given doc in the given bucket.
169 |              * Called once for every document matching a query, with the unbased document number.
170 |              */
171 |             @Override
172 |             public void collect(int doc, long owningBucketOrdinal) throws IOException {
173 |                 assert owningBucketOrdinal == 0;
174 |                 if (values.advanceExact(doc)) {
175 |                     final int valuesCount = values.docValueCount();
176 |                     previous.clear();
177 | 
178 |                     // SortedBinaryDocValues don't guarantee uniqueness so we need to take care of dups
179 |                     for (int i = 0; i < valuesCount; ++i) {
180 |                         final BytesRef bytesValue = values.nextValue();
181 |                         if (i > 0 && previous.get().equals(bytesValue)) {
182 |                             continue;
183 |                         }
184 |                         long bucketOrdinal = bucketOrds.add(bytesValue);
185 |                         if (bucketOrdinal < 0) { // already seen
186 |                             bucketOrdinal = - 1 - bucketOrdinal;
187 |                             collectExistingBucket(sub, doc, bucketOrdinal);
188 |                         } else {
189 |                             collectBucket(sub, doc, bucketOrdinal);
190 |                         }
191 |                         previous.copyBytes(bytesValue);
192 |                     }
193 |                 }
194 |             }
195 |         };
196 |     }
197 | 
198 |     @Override
199 |     public InternalAggregation[] buildAggregations(long[] owningBucketOrdinals) throws IOException {
200 | 
201 |         InternalPathHierarchy.InternalBucket[][] topBucketsPerOrd = new InternalPathHierarchy.InternalBucket[owningBucketOrdinals.length][];
202 |         InternalPathHierarchy[] results = new InternalPathHierarchy[owningBucketOrdinals.length];
203 | 
204 |         for (int ordIdx = 0; ordIdx < owningBucketOrdinals.length; ordIdx++) {
205 |             assert owningBucketOrdinals[ordIdx] == 0;
206 | 
207 |             final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
208 |             PathSortedTree<String, InternalPathHierarchy.InternalBucket> pathSortedTree =
209 |                     new PathSortedTree<>(partiallyBuiltBucketComparator, size);
210 | 
211 |             InternalPathHierarchy.InternalBucket spare;
212 |             for (int i = 0; i < bucketOrds.size(); i++) {
213 |                 spare = new InternalPathHierarchy.InternalBucket(0, null, null, new BytesRef(), 0, 0, null);
214 |                 BytesRef term = new BytesRef();
215 |                 bucketOrds.get(i, term);
216 | 
217 |                 String quotedPattern = Pattern.quote(separator.utf8ToString());
218 | 
219 |                 String[] paths = term.utf8ToString().split(quotedPattern, -1);
220 | 
221 |                 String[] pathsForTree;
222 | 
223 |                 if (minDepth > 0) {
224 |                     pathsForTree = Arrays.copyOfRange(paths, minDepth, paths.length);
225 |                 } else {
226 |                     pathsForTree = paths;
227 |                 }
228 | 
229 |                 spare.termBytes = BytesRef.deepCopyOf(term);
230 |                 spare.level = pathsForTree.length - 1;
231 |                 spare.docCount = bucketDocCount(i);
232 |                 spare.basename = paths[paths.length - 1];
233 |                 spare.minDepth = minDepth;
234 |                 spare.bucketOrd = i;
235 |                 spare.paths = paths;
236 | 
237 |                 pathSortedTree.add(pathsForTree, spare);
238 | 
239 |             }
240 |             // Get the top buckets
241 |             topBucketsPerOrd[ordIdx] = new InternalPathHierarchy.InternalBucket[size];
242 |             long otherHierarchyNodes = pathSortedTree.getFullSize();
243 |             Iterator<InternalPathHierarchy.InternalBucket> iterator = pathSortedTree.consumer();
244 |             for (int i = 0; i < size; i++) {
245 |                 final InternalPathHierarchy.InternalBucket bucket = iterator.next();
246 |                 topBucketsPerOrd[ordIdx][i] = bucket;
247 |                 otherHierarchyNodes -= 1;
248 |             }
249 | 
250 |             results[ordIdx] = new InternalPathHierarchy(name, Arrays.asList(topBucketsPerOrd[ordIdx]), order,
251 |                     minDocCount, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(),
252 |                     otherHierarchyNodes, separator, metadata());
253 |         }
254 | 
255 |         // Build sub-aggregations for pruned buckets
256 |         buildSubAggsForAllBuckets(
257 |                 topBucketsPerOrd,
258 |                 b -> b.bucketOrd,
259 |                 (b, aggregations) -> b.aggregations = aggregations
260 |         );
261 | 
262 |         return results;
263 |     }
264 | 
265 |     @Override
266 |     public InternalAggregation buildEmptyAggregation() {
267 |         return new InternalPathHierarchy(name, null, order, minDocCount, bucketCountThresholds.getRequiredSize(),
268 |                 bucketCountThresholds.getShardSize(), 0, separator, metadata());
269 |     }
270 | 
271 |     @Override
272 |     protected void doClose() {
273 |         Releasables.close(bucketOrds);
274 |     }
275 | }
276 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/PathHierarchyAggregatorFactory.java:
--------------------------------------------------------------------------------
  1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
  2 | 
  3 | import org.apache.lucene.index.LeafReaderContext;
  4 | import org.apache.lucene.util.ArrayUtil;
  5 | import org.apache.lucene.util.BytesRef;
  6 | import org.apache.lucene.util.BytesRefBuilder;
  7 | import org.apache.lucene.util.FutureArrays;
  8 | import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
  9 | import org.elasticsearch.index.fielddata.SortingBinaryDocValues;
 10 | import org.elasticsearch.search.aggregations.Aggregator;
 11 | import org.elasticsearch.search.aggregations.AggregatorFactories;
 12 | import org.elasticsearch.search.aggregations.AggregatorFactory;
 13 | import org.elasticsearch.search.aggregations.BucketOrder;
 14 | import org.elasticsearch.search.aggregations.CardinalityUpperBound;
 15 | import org.elasticsearch.search.aggregations.InternalAggregation;
 16 | import org.elasticsearch.search.aggregations.InternalOrder;
 17 | import org.elasticsearch.search.aggregations.NonCollectingAggregator;
 18 | import org.elasticsearch.search.aggregations.bucket.BucketUtils;
 19 | import org.elasticsearch.search.aggregations.support.AggregationContext;
 20 | import org.elasticsearch.search.aggregations.support.ValuesSource;
 21 | import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
 22 | import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
 23 | import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry;
 24 | import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 25 | 
 26 | import java.io.IOException;
 27 | import java.util.ArrayList;
 28 | import java.util.Map;
 29 | 
 30 | /**
 31 |  * The factory of aggregators.
 32 |  * ValuesSourceAggregatorFactory extends {@link AggregatorFactory}
 33 |  */
 34 | class PathHierarchyAggregatorFactory extends ValuesSourceAggregatorFactory {
 35 | 
 36 |     private BytesRef separator;
 37 |     private int minDepth;
 38 |     private int maxDepth;
 39 |     private BucketOrder order;
 40 |     private long minDocCount;
 41 |     private boolean keepBlankPath;
 42 |     private final PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds;
 43 | 
 44 |     PathHierarchyAggregatorFactory(String name,
 45 |                                    ValuesSourceConfig config,
 46 |                                    String separator,
 47 |                                    int minDepth,
 48 |                                    int maxDepth,
 49 |                                    boolean keepBlankPath,
 50 |                                    BucketOrder order,
 51 |                                    long minDocCount,
 52 |                                    PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds,
 53 |                                    AggregationContext context,
 54 |                                    AggregatorFactory parent,
 55 |                                    AggregatorFactories.Builder subFactoriesBuilder,
 56 |                                    Map<String, Object> metaData
 57 |     ) throws IOException {
 58 |         super(name, config, context, parent, subFactoriesBuilder, metaData);
 59 |         this.separator = new BytesRef(separator);
 60 |         this.minDepth = minDepth;
 61 |         this.maxDepth = maxDepth;
 62 |         this.keepBlankPath = keepBlankPath;
 63 |         this.order = order;
 64 |         this.minDocCount = minDocCount;
 65 |         this.bucketCountThresholds = bucketCountThresholds;
 66 |     }
 67 | 
 68 |     public static void registerAggregators(ValuesSourceRegistry.Builder builder) {
 69 |         builder.register(PathHierarchyAggregationBuilder.REGISTRY_KEY, CoreValuesSourceType.KEYWORD, (name,
 70 |                                                                                                     factories,
 71 |                                                                                                     separator,
 72 |                                                                                                     minDepth,
 73 |                                                                                                     maxDepth,
 74 |                                                                                                     keepBlankPath,
 75 |                                                                                                     order,
 76 |                                                                                                     minDocCount,
 77 |                                                                                                     bucketCountThresholds,
 78 |                                                                                                     valuesSourceConfig,
 79 |                                                                                                     aggregationContext,
 80 |                                                                                                     parent,
 81 |                                                                                                     cardinality,
 82 |                                                                                                     metadata) -> null,
 83 |                 true);
 84 |     }
 85 | 
 86 |     @Override
 87 |     protected Aggregator createUnmapped(Aggregator parent, Map<String, Object> metadata) throws IOException {
 88 |         final InternalAggregation aggregation = new InternalPathHierarchy(name, new ArrayList<>(), order, minDocCount,
 89 |                 bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), 0, separator, metadata);
 90 |         return new NonCollectingAggregator(name, context, parent, factories, metadata) {
 91 |             {
 92 |                 // even in the case of an unmapped aggregator, validate the
 93 |                 // order
 94 |                 order.validate(this);
 95 |             }
 96 | 
 97 |             @Override
 98 |             public InternalAggregation buildEmptyAggregation() {
 99 |                 return aggregation;
100 |             }
101 |         };
102 |     }
103 | 
104 |     @Override
105 |     protected Aggregator doCreateInternal(Aggregator parent, CardinalityUpperBound cardinality,
106 |                                           Map<String, Object> metadata) throws IOException {
107 |         ValuesSource valuesSourceBytes = new HierarchyValuesSource(config.getValuesSource(), separator, minDepth, maxDepth, keepBlankPath);
108 |         PathHierarchyAggregator.BucketCountThresholds bucketCountThresholds = new
109 |                 PathHierarchyAggregator.BucketCountThresholds(this.bucketCountThresholds);
110 |         if (!InternalOrder.isKeyOrder(order)
111 |                 && bucketCountThresholds.getShardSize() == PathHierarchyAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) {
112 |             // The user has not made a shardSize selection. Use default
113 |             // heuristic to avoid any wrong-ranking caused by distributed
114 |             // counting
115 |             bucketCountThresholds.setShardSize(BucketUtils.suggestShardSideQueueSize(bucketCountThresholds.getRequiredSize()));
116 |         }
117 |         bucketCountThresholds.ensureValidity();
118 |         return new PathHierarchyAggregator(
119 |                 name, factories, context,
120 |                 valuesSourceBytes, order, minDocCount, bucketCountThresholds, separator, minDepth,
121 |                 parent, cardinality, metadata);
122 |     }
123 | 
124 |     /**
125 |      * A list of per-document binary values, sorted according to {@link BytesRef}.
126 |      * There might be dups however.
127 |      * @see ValuesSource
128 |      */
129 |     private static class HierarchyValues extends SortingBinaryDocValues {
130 | 
131 |         /** valuesSource is a list of per-document binary values, sorted according to {@link BytesRef#compareTo(BytesRef)}
132 |          * (warning, there might be dups however).
133 |          */
134 |         private SortedBinaryDocValues valuesSource;
135 |         private BytesRef separator;
136 |         private int minDepth;
137 |         private int maxDepth;
138 |         private boolean keepBlankPath;
139 | 
140 |         private HierarchyValues(SortedBinaryDocValues valuesSource, BytesRef separator, int minDepth, int maxDepth,
141 |                                 boolean keepBlankPath) {
142 |             this.valuesSource = valuesSource;
143 |             this.separator = separator;
144 |             this.minDepth = minDepth;
145 |             this.maxDepth = maxDepth;
146 |             this.keepBlankPath = keepBlankPath;
147 |         }
148 | 
149 |         /**
150 |          * Handles iterations on doc values:
151 |          *  Advance the iterator to exactly target and return whether target has a value.
152 |          *  target must be greater than or equal to the current doc ID and must be a valid doc ID, ie. &ge; 0 and &lt; maxDoc.
153 |          *  After this method returns, docID() returns target.
154 |          */
155 |         @Override
156 |         public boolean advanceExact(int docId) throws IOException {
157 |             if (valuesSource.advanceExact(docId)) {
158 |                 count = 0;
159 |                 int t = 0;
160 |                 for (int i=0; i < valuesSource.docValueCount(); i++) {
161 |                     int depth = 0;
162 |                     BytesRef val = valuesSource.nextValue();
163 |                     BytesRefBuilder cleanVal = new BytesRefBuilder();
164 |                     int startNewValOffset = -1;
165 | 
166 |                     for (int offset=0; offset < val.length; offset++) {
167 |                         // it is a separator
168 |                         if (val.length - offset >= separator.length &&
169 |                                 FutureArrays.equals(
170 |                                         separator.bytes, separator.offset, separator.offset + separator.length,
171 |                                         val.bytes, val.offset + offset, val.offset + offset + separator.length)) {
172 |                             // ignore separator at the beginning
173 |                             if (offset == 0) {
174 |                                 offset += separator.length -1;
175 |                                 continue;
176 |                             }
177 | 
178 |                             // A new path needs to be add
179 |                             if (startNewValOffset != -1) {
180 |                                 cleanVal.append(val.bytes, val.offset + startNewValOffset, offset - startNewValOffset);
181 |                                 if (depth >= minDepth) {
182 |                                     values[t++].copyBytes(cleanVal);
183 |                                 }
184 |                                 startNewValOffset = -1;
185 |                                 cleanVal.append(separator);
186 |                                 depth ++;
187 |                             // two separators following each other
188 |                             } else if (keepBlankPath) {
189 |                                 count++;
190 |                                 growExact();
191 |                                 values[t++].copyBytes(cleanVal);
192 |                                 cleanVal.append(separator);
193 |                                 depth ++;
194 |                             }
195 | 
196 |                             if (maxDepth >= 0 && depth > maxDepth) {
197 |                                 break;
198 |                             }
199 |                             offset += separator.length - 1;
200 |                         } else {
201 |                             if (startNewValOffset == -1) {
202 |                                 startNewValOffset = offset;
203 |                                 if (depth >= minDepth) {
204 |                                     count++;
205 |                                     growExact();
206 |                                 }
207 |                             }
208 |                         }
209 |                     }
210 | 
211 |                     if (startNewValOffset != -1 && minDepth <= depth) {
212 |                         cleanVal.append(val.bytes, val.offset + startNewValOffset, val.length - startNewValOffset);
213 |                         values[t++].copyBytes(cleanVal);
214 |                     }
215 | 
216 |                 }
217 |                 sort();  // sort values that are stored between offsets 0 and count of values
218 |                 return true;
219 |             } else
220 |                 return false;
221 |         }
222 | 
223 |         final void growExact() {
224 |             if (values.length < count) {
225 |                 final int oldLen = values.length;
226 |                 values = ArrayUtil.growExact(values, count);
227 |                 for (int i = oldLen; i < count; ++i) {
228 |                     values[i] = new BytesRefBuilder();
229 |                 }
230 |             }
231 |         }
232 |     }
233 | 
234 |     /**
235 |      * To get ValuesSource as sorted bytes.
236 |      */
237 |     private static class HierarchyValuesSource extends ValuesSource.Bytes {
238 |         private final ValuesSource values;
239 |         private final BytesRef separator;
240 |         private final int minDepth;
241 |         private final int maxDepth;
242 |         private final boolean twoSepAsOne;
243 | 
244 |         private HierarchyValuesSource(ValuesSource values, BytesRef separator, int minDepth, int maxDepth, boolean twoSepAsOne){
245 |             this.values = values;
246 |             this.separator = separator;
247 |             this.minDepth = minDepth;
248 |             this.maxDepth = maxDepth;
249 |             this.twoSepAsOne = twoSepAsOne;
250 |         }
251 | 
252 |         @Override
253 |         public SortedBinaryDocValues bytesValues(LeafReaderContext context) throws IOException {
254 |             return new HierarchyValues(values.bytesValues(context), separator, minDepth, maxDepth, twoSepAsOne);
255 |         }
256 | 
257 |     }
258 | }
259 | 
260 | 


--------------------------------------------------------------------------------
/src/main/java/org/opendatasoft/elasticsearch/search/aggregations/bucket/PathSortedTree.java:
--------------------------------------------------------------------------------
  1 | package org.opendatasoft.elasticsearch.search.aggregations.bucket;
  2 | 
  3 | 
  4 | import java.util.ArrayList;
  5 | import java.util.Comparator;
  6 | import java.util.Iterator;
  7 | import java.util.List;
  8 | import java.util.NoSuchElementException;
  9 | import java.util.PriorityQueue;
 10 | import java.util.Stack;
 11 | 
 12 | public class PathSortedTree<K, T> implements Iterable<T>{
 13 | 
 14 |     private Comparator<? super T> comparator;
 15 |     private Node<K, T> root;
 16 |     private int size = -1;
 17 |     private int fullSize = 0;
 18 | 
 19 |     public PathSortedTree(Comparator<? super T> comparator) {
 20 |         root = new Node<>(comparator);
 21 |         this.comparator = comparator;
 22 |     }
 23 | 
 24 |     public PathSortedTree(Comparator<? super T> comparator, int size) {
 25 |         this(comparator);
 26 |         this.size = size;
 27 |     }
 28 | 
 29 |     public int getFullSize() {
 30 |         return fullSize;
 31 |     }
 32 | 
 33 |     public void add(K[] path, T element) {
 34 |         /* Please note that paths in path must be descending-sorted by level. */
 35 |         Node<K, T> currentNode = root;
 36 |         for (K k : path) {
 37 |             boolean newChild = true;
 38 |             for (Node<K, T> child : currentNode.children) {
 39 |                 if (child.key.equals(k)) {
 40 |                     currentNode = child;
 41 |                     newChild = false;
 42 |                     break;
 43 |                 }
 44 |             }
 45 |             if (newChild) {
 46 |                 Node<K, T> newNode = new Node<>(k, comparator, element, currentNode);
 47 |                 currentNode.children.add(newNode);
 48 |                 fullSize ++;
 49 |                 break;
 50 |             }
 51 |         }
 52 |     }
 53 | 
 54 | 
 55 |     public List<T> getAsList() {
 56 | 
 57 |         List<T> result = new ArrayList<>(fullSize);
 58 | 
 59 |         Iterator<T> iterator = consumer();
 60 | 
 61 |         while (iterator.hasNext()){
 62 |             result.add(iterator.next());
 63 |         }
 64 |         return result;
 65 |     }
 66 | 
 67 |     public Iterator<T> consumer() {
 68 |         return new PathSortedTreeConsumer(root, fullSize);
 69 |     }
 70 | 
 71 | 
 72 |     @Override
 73 |     public Iterator<T> iterator() {
 74 |         return new PathSortedTreeIterator(root);
 75 |     }
 76 | 
 77 |     public static class Node<K, T> {
 78 |         private K key;
 79 |         private T data;
 80 |         private Node<K, T> parent;
 81 | 
 82 |         private PriorityQueue<Node<K, T>> children;
 83 | 
 84 |         Node() {
 85 |             this.children = new PriorityQueue<>();
 86 |         }
 87 | 
 88 |         public Node(Comparator<? super T> comparator) {
 89 |             this.children = new PriorityQueue<>(getComparator(comparator));
 90 |         }
 91 | 
 92 |         Comparator<Node<K, T>> getComparator(Comparator<? super T> comparator) {
 93 |             return (n1, n2) -> comparator.compare(n1.data, n2.data);
 94 |         }
 95 | 
 96 | 
 97 |         public Node(K key, Comparator<? super T> comparator, T data, Node<K, T> parent) {
 98 |             this.key = key;
 99 |             this.data = data;
100 |             this.children = new PriorityQueue<>(getComparator(comparator));
101 |             this.parent = parent;
102 |         }
103 |     }
104 | 
105 |     private class PathSortedTreeIterator implements Iterator<T> {
106 | 
107 |         private Stack<Iterator<Node<K, T>>> iterators;
108 |         Iterator<Node<K, T>> current;
109 | 
110 |         PathSortedTreeIterator(Node<K, T> root) {
111 |             current = root.children.iterator();
112 |             iterators = new Stack<>();
113 |         }
114 | 
115 |         @Override
116 |         public boolean hasNext() {
117 |             return current.hasNext();
118 |         }
119 | 
120 |         @Override
121 |         public T next() {
122 | 
123 |             Node<K, T> nextNode = current.next();
124 | 
125 |             if (! nextNode.children.isEmpty()) {
126 |                 iterators.push(current);
127 |                 current = nextNode.children.iterator();
128 |             } else if (! current.hasNext()){
129 |                 while (! iterators.empty()) {
130 |                     current = iterators.pop();
131 |                     if (current.hasNext()) {
132 |                         break;
133 |                     }
134 |                 }
135 |             }
136 | 
137 |             return nextNode.data;
138 | 
139 |         }
140 |     }
141 | 
142 |     private class PathSortedTreeConsumer implements Iterator<T> {
143 | 
144 |         Node<K, T> cursor;
145 | 
146 |         int currentSize = 0;
147 |         int iteratorFullSize;
148 | 
149 |         PathSortedTreeConsumer(Node<K, T> root, int fullSize) {
150 |             iteratorFullSize = fullSize;
151 |             cursor = root;
152 |         }
153 | 
154 |         @Override
155 |         public boolean hasNext() {
156 |             if (size >=0 && currentSize >= size) {
157 |                 return false;
158 |             }
159 |             if (cursor.children.size() > 0) {
160 |                 return true;
161 |             }
162 | 
163 |             return currentSize < iteratorFullSize;
164 |         }
165 | 
166 |         @Override
167 |         public T next() {
168 | 
169 |             Node<K, T> nextNode = null;
170 |             while (nextNode == null) {
171 |                 nextNode = cursor.children.poll();
172 |                 if (nextNode == null) {
173 |                     if (cursor.parent == null) {
174 |                         break;
175 |                     }
176 |                     cursor = cursor.parent;
177 |                 }
178 |             }
179 |             if (nextNode == null) throw new NoSuchElementException();
180 |             currentSize ++;
181 |             fullSize --;
182 |             cursor = nextNode;
183 |             return nextNode.data;
184 | 
185 |         }
186 |     }
187 | 
188 | }
189 | 


--------------------------------------------------------------------------------
/src/test/java/org/opendatasoft/elasticsearch/PathHierarchyTests.java:
--------------------------------------------------------------------------------
 1 | package org.opendatasoft.elasticsearch;
 2 | 
 3 | import org.elasticsearch.xcontent.XContentParser;
 4 | import org.elasticsearch.xcontent.json.JsonXContent;
 5 | import org.elasticsearch.search.aggregations.AggregationBuilder;
 6 | import org.elasticsearch.test.ESTestCase;
 7 | import org.opendatasoft.elasticsearch.search.aggregations.bucket.PathHierarchyAggregationBuilder;
 8 | 
 9 | public class PathHierarchyTests extends ESTestCase {
10 |     public void testParser() throws Exception {
11 |         // can create the factory with utf8 separator
12 |         String separator = "夢";
13 |         XContentParser stParser = createParser(JsonXContent.jsonXContent,
14 |                 "{\"field\":\"path\", \"separator\": \"" + separator + "\"}");
15 |         XContentParser.Token token = stParser.nextToken();
16 |         assertSame(XContentParser.Token.START_OBJECT, token);
17 |         assertNotNull(PathHierarchyAggregationBuilder.parse("path_hierarchy", stParser));
18 | 
19 |         // can create the factory with an array of orders
20 |         String orders = "[{\"_key\": \"asc\"}, {\"_count\": \"desc\"}]";
21 |         stParser = createParser(JsonXContent.jsonXContent,
22 |                 "{\"field\":\"path\", \"order\": " + orders + "}");
23 |         assertNotNull(PathHierarchyAggregationBuilder.parse("path_hierarchy", stParser));
24 |         stParser = createParser(JsonXContent.jsonXContent,
25 |                 "{\"field\":\"path\", \"separator\":\"/\", \"order\": " + orders + ", \"min_depth\": 0, \"max_depth\": 3}");
26 |         AggregationBuilder builder = PathHierarchyAggregationBuilder.parse("path_hierarchy", stParser);
27 |         assertNotNull(builder);
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/yamlRestTest/java/org/opendatasoft/elasticsearch/RestApiYamlIT.java:
--------------------------------------------------------------------------------
 1 | package org.opendatasoft.elasticsearch;
 2 | 
 3 | import com.carrotsearch.randomizedtesting.annotations.Name;
 4 | import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 5 | import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate;
 6 | import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase;
 7 | 
 8 | /*
 9 |  * Generic loader for yaml integration tests
10 |  */
11 | 
12 | public class RestApiYamlIT extends ESClientYamlSuiteTestCase {
13 |     public RestApiYamlIT (@Name("yaml") ClientYamlTestCandidate testCandidate) {
14 |         super(testCandidate);
15 |     }
16 | 
17 |     @ParametersFactory
18 |     public static Iterable<Object[]> parameters() throws Exception {
19 |         return ESClientYamlSuiteTestCase.createParameters();
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/10_basic.yml:
--------------------------------------------------------------------------------
 1 | "PathHierarchy Aggregation plugin installed":
 2 |   - do:
 3 |       cluster.state: {}
 4 | 
 5 |   - set: {master_node: master}
 6 | 
 7 |   - do:
 8 |       nodes.info: {}
 9 | 
10 |   - match: {nodes.$master.plugins.0.name: pathhierarchy-aggregation}
11 | 


--------------------------------------------------------------------------------
/src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/20_path_hierarchy.yml:
--------------------------------------------------------------------------------
  1 | setup:
  2 |   - do:
  3 |       indices.create:
  4 |         index: filesystem
  5 |         body:
  6 |           settings:
  7 |             number_of_shards: 1
  8 |             number_of_replicas: 0
  9 |           mappings:
 10 |             properties:
 11 |               path:
 12 |                 type: keyword
 13 |               views:
 14 |                 type: integer
 15 | 
 16 |   - do:
 17 |       cluster.health:
 18 |         wait_for_status: green
 19 | 
 20 | ---
 21 | "Test with filesystem arborescence":
 22 |   - do:
 23 |       index:
 24 |         index: filesystem
 25 |         id: 1
 26 |         body: { "path": "/My documents/Spreadsheets/Budget_2013.xls", "views": 10 }
 27 | 
 28 |   - do:
 29 |       index:
 30 |         index: filesystem
 31 |         id: 2
 32 |         body: { "path": "/My documents/Spreadsheets/Budget_2014.xls", "views": 7 }
 33 | 
 34 |   - do:
 35 |       index:
 36 |         index: filesystem
 37 |         id: 3
 38 |         body: { "path": "/My documents/Test.txt", "views": 1 }
 39 | 
 40 |   - do:
 41 |       index:
 42 |         index: filesystem
 43 |         id: 4
 44 |         body: { "path": "/My documents/Spreadsheets//Budget_2014.xls", "views": 12 }
 45 | 
 46 |   - do:
 47 |       indices.refresh: {}
 48 | 
 49 | 
 50 | # basic test
 51 |   - do:
 52 |       search:
 53 |         rest_total_hits_as_int: true
 54 |         body: {
 55 |           "size" : 0,
 56 |           "aggs" : {
 57 |             "tree" : {
 58 |               "path_hierarchy" : {
 59 |                 "field" : "path",
 60 |                 "separator": "/",
 61 |                 "order": [ {"_count": "desc"}, {"_key": "asc"}],
 62 |                 "min_depth": 0,
 63 |                 "max_depth": 3
 64 |               },
 65 |               "aggs": {
 66 |                 "total_views": {
 67 |                   "sum": {
 68 |                     "field": "views"
 69 |                   }
 70 |                 }
 71 |               }
 72 |             }
 73 |           }
 74 |         }
 75 | 
 76 |   - match: { hits.total: 4 }
 77 | 
 78 |   - match: { aggregations.tree.buckets.0.key: "My documents" }
 79 |   - match: { aggregations.tree.buckets.0.doc_count: 4 }
 80 |   - match: { aggregations.tree.buckets.0.total_views.value: 30 }
 81 | 
 82 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.key: "Spreadsheets" }
 83 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.doc_count: 3 }
 84 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.total_views.value: 29 }
 85 | 
 86 |   - match: { aggregations.tree.buckets.0.tree.buckets.1.key: "Test.txt" }
 87 |   - match: { aggregations.tree.buckets.0.tree.buckets.1.doc_count: 1 }
 88 |   - match: { aggregations.tree.buckets.0.tree.buckets.1.total_views.value: 1 }
 89 | 
 90 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.key: "Budget_2014.xls" }
 91 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.doc_count: 2 }
 92 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.total_views.value: 19 }
 93 | 
 94 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.key: "Budget_2013.xls" }
 95 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.doc_count: 1 }
 96 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.total_views.value: 10 }
 97 | 
 98 | 
 99 | # test size
100 |   - do:
101 |       search:
102 |         rest_total_hits_as_int: true
103 |         body: {
104 |           "size" : 0,
105 |           "aggs" : {
106 |             "tree" : {
107 |               "path_hierarchy" : {
108 |                 "field" : "path",
109 |                 "size": 2
110 |               }
111 |             }
112 |           }
113 |         }
114 | 
115 |   - match: { hits.total: 4 }
116 | 
117 |   - match: { aggregations.tree.buckets.0.key: "My documents" }
118 |   - match: { aggregations.tree.buckets.0.doc_count: 4 }
119 | 
120 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.key: "Spreadsheets" }
121 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.doc_count: 3 }
122 | 
123 | 
124 | # test depth
125 |   - do:
126 |       search:
127 |         rest_total_hits_as_int: true
128 |         body: {
129 |           "size" : 0,
130 |           "aggs" : {
131 |             "tree" : {
132 |               "path_hierarchy" : {
133 |                 "field" : "path",
134 |                 "separator": "/",
135 |                 "order": [{"_count": "desc"}, {"_key": "asc"}],
136 |                 "depth": 2
137 |               },
138 |               "aggs": {
139 |                 "total_views": {
140 |                   "sum": {
141 |                     "field": "views"
142 |                   }
143 |                 }
144 |               }
145 |             }
146 |           }
147 |         }
148 | 
149 |   - match: { hits.total: 4 }
150 | 
151 |   - match: { aggregations.tree.buckets.0.key: "Budget_2014.xls" }
152 |   - match: { aggregations.tree.buckets.0.doc_count: 2 }
153 |   - match: { aggregations.tree.buckets.0.total_views.value: 19 }
154 | 
155 |   - match: { aggregations.tree.buckets.1.key: "Budget_2013.xls" }
156 |   - match: { aggregations.tree.buckets.1.doc_count: 1 }
157 |   - match: { aggregations.tree.buckets.1.total_views.value: 10 }
158 | 
159 | 
160 | # test keep_blank_path
161 |   - do:
162 |       search:
163 |         rest_total_hits_as_int: true
164 |         body: {
165 |           "size" : 0,
166 |           "aggs" : {
167 |             "tree" : {
168 |               "path_hierarchy" : {
169 |                 "field" : "path",
170 |                 "separator": "/",
171 |                 "order": [{"_count": "desc"}, {"_key": "asc"}],
172 |                 "keep_blank_path": "true"
173 |               },
174 |               "aggs": {
175 |                 "total_views": {
176 |                   "sum": {
177 |                     "field": "views"
178 |                   }
179 |                 }
180 |               }
181 |             }
182 |           }
183 |         }
184 | 
185 |   - match: { hits.total: 4 }
186 | 
187 |   - match: { aggregations.tree.buckets.0.key: "My documents" }
188 |   - match: { aggregations.tree.buckets.0.doc_count: 4 }
189 |   - match: { aggregations.tree.buckets.0.total_views.value: 30 }
190 | 
191 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.key: "Spreadsheets" }
192 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.doc_count: 3 }
193 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.total_views.value: 29 }
194 | 
195 |   - match: { aggregations.tree.buckets.0.tree.buckets.1.key: "Test.txt" }
196 |   - match: { aggregations.tree.buckets.0.tree.buckets.1.doc_count: 1 }
197 |   - match: { aggregations.tree.buckets.0.tree.buckets.1.total_views.value: 1 }
198 | 
199 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.key: "" }
200 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.doc_count: 1 }
201 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.total_views.value: 12 }
202 | 
203 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.key: "Budget_2013.xls" }
204 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.doc_count: 1 }
205 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.1.total_views.value: 10 }
206 | 
207 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.2.key: "Budget_2014.xls" }
208 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.2.doc_count: 1 }
209 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.2.total_views.value: 7 }
210 | 
211 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.tree.buckets.0.key: "Budget_2014.xls" }
212 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.tree.buckets.0.doc_count: 1 }
213 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.tree.buckets.0.tree.buckets.0.total_views.value: 12 }
214 | 
215 | # test multi characters separator
216 |   - do:
217 |       search:
218 |         rest_total_hits_as_int: true
219 |         body: {
220 |           "size" : 0,
221 |           "aggs" : {
222 |             "tree" : {
223 |               "path_hierarchy" : {
224 |                 "field" : "path",
225 |                 "separator": "doc",
226 |                 "order": [{"_key": "asc"}],
227 |               },
228 |               "aggs": {
229 |                 "total_views": {
230 |                   "sum": {
231 |                     "field": "views"
232 |                   }
233 |                 }
234 |               }
235 |             }
236 |           }
237 |         }
238 | 
239 |   - match: { hits.total: 4 }
240 | 
241 |   - match: { aggregations.tree.buckets.0.key: "/My " }
242 |   - match: { aggregations.tree.buckets.0.doc_count: 4 }
243 |   - match: { aggregations.tree.buckets.0.total_views.value: 30 }
244 | 
245 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.key: "uments/Spreadsheets//Budget_2014.xls" }
246 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.doc_count: 1 }
247 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.total_views.value: 12 }
248 | 
249 |   - match: { aggregations.tree.buckets.0.tree.buckets.1.key: "uments/Spreadsheets/Budget_2013.xls" }
250 |   - match: { aggregations.tree.buckets.0.tree.buckets.1.doc_count: 1 }
251 |   - match: { aggregations.tree.buckets.0.tree.buckets.1.total_views.value: 10 }
252 | 
253 |   - match: { aggregations.tree.buckets.0.tree.buckets.2.key: "uments/Spreadsheets/Budget_2014.xls" }
254 |   - match: { aggregations.tree.buckets.0.tree.buckets.2.doc_count: 1 }
255 |   - match: { aggregations.tree.buckets.0.tree.buckets.2.total_views.value: 7 }
256 | 
257 |   - match: { aggregations.tree.buckets.0.tree.buckets.3.key: "uments/Test.txt" }
258 |   - match: { aggregations.tree.buckets.0.tree.buckets.3.doc_count: 1 }
259 |   - match: { aggregations.tree.buckets.0.tree.buckets.3.total_views.value: 1 }
260 | 
261 | ## test sum_other_hierarchy_nodes
262 | #  - do:
263 | #      search:
264 | #        body: {
265 | #          "size" : 0,
266 | #          "aggs" : {
267 | #            "tree" : {
268 | #              "path_hierarchy" : {
269 | #                "field" : "path",
270 | #                "size": 1
271 | #              }
272 | #            }
273 | #          }
274 | #        }
275 | #
276 | #  - match: { hits.total: 4 }
277 | #
278 | #  - match: { aggregations.tree.sum_other_hierarchy_nodes: 4 }
279 | 


--------------------------------------------------------------------------------
/src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/30_date_hierarchy.yml:
--------------------------------------------------------------------------------
  1 | setup:
  2 |   - do:
  3 |       indices.create:
  4 |         index: calendar
  5 |         body:
  6 |           settings:
  7 |             number_of_shards: 2
  8 |             number_of_replicas: 0
  9 |           mappings:
 10 |             properties:
 11 |               date:
 12 |                 type: date
 13 | 
 14 |   - do:
 15 |       cluster.health:
 16 |         wait_for_status: green
 17 | 
 18 | ---
 19 | "Test with date hierarchy":
 20 |   - do:
 21 |       index:
 22 |         index: calendar
 23 |         id: 1
 24 |         body: { "date": "2012-01-10T02:47:28" }
 25 | 
 26 |   - do:
 27 |       index:
 28 |         index: calendar
 29 |         id: 2
 30 |         body: { "date": "2011-01-05T01:43:35" }
 31 | 
 32 |   - do:
 33 |       index:
 34 |         index: calendar
 35 |         id: 3
 36 |         body: { "date": "2012-05-01T12:24:19" }
 37 | 
 38 |   - do:
 39 |       indices.refresh: {}
 40 | 
 41 | 
 42 | # test years interval
 43 |   - do:
 44 |       search:
 45 |         rest_total_hits_as_int: true
 46 |         body: {
 47 |           "size" : 0,
 48 |           "aggs" : {
 49 |             "tree" : {
 50 |               "date_hierarchy" : {
 51 |                 "field" : "date",
 52 |                 "interval": "years",
 53 |                 "order": [{"_key": "asc"}],
 54 |               }
 55 |             }
 56 |           }
 57 |         }
 58 | 
 59 |   - match: { hits.total: 3 }
 60 | 
 61 |   - match: { aggregations.tree.buckets.0.key: "2011" }
 62 |   - match: { aggregations.tree.buckets.0.doc_count: 1 }
 63 | 
 64 |   - match: { aggregations.tree.buckets.1.key: "2012" }
 65 |   - match: { aggregations.tree.buckets.1.doc_count: 2 }
 66 | 
 67 | 
 68 | # test months interval
 69 |   - do:
 70 |       search:
 71 |         rest_total_hits_as_int: true
 72 |         body: {
 73 |           "size": 0,
 74 |           "aggs": {
 75 |             "tree": {
 76 |               "date_hierarchy": {
 77 |                 "field": "date",
 78 |                 "interval": "months",
 79 |                 "order": [{"_key": "asc"}],
 80 |               }
 81 |             }
 82 |           }
 83 |         }
 84 | 
 85 |   - match: { hits.total: 3 }
 86 | 
 87 |   - match: { aggregations.tree.buckets.0.key: "2011" }
 88 |   - match: { aggregations.tree.buckets.0.doc_count: 1 }
 89 | 
 90 |   - match: { aggregations.tree.buckets.1.key: "2012" }
 91 |   - match: { aggregations.tree.buckets.1.doc_count: 2 }
 92 | 
 93 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.key: "01" }
 94 |   - match: { aggregations.tree.buckets.0.tree.buckets.0.doc_count: 1 }
 95 | 
 96 |   - match: { aggregations.tree.buckets.1.tree.buckets.0.key: "01" }
 97 |   - match: { aggregations.tree.buckets.1.tree.buckets.0.doc_count: 1 }
 98 | 
 99 |   - match: { aggregations.tree.buckets.1.tree.buckets.1.key: "05" }
100 |   - match: { aggregations.tree.buckets.1.tree.buckets.1.doc_count: 1 }
101 | 


--------------------------------------------------------------------------------
/src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/40_max_buckets_path_hierarchy.yml:
--------------------------------------------------------------------------------
 1 | setup:
 2 |   - do:
 3 |       indices.create:
 4 |         index: filesystem
 5 |         body:
 6 |           settings:
 7 |             number_of_shards: 1
 8 |             number_of_replicas: 0
 9 |           mappings:
10 |             properties:
11 |               path:
12 |                 type: keyword
13 |               views:
14 |                 type: integer
15 | 
16 |   - do:
17 |       cluster.put_settings:
18 |         body:
19 |           transient:
20 |             search.max_buckets: "3"
21 |         flat_settings: true
22 | 
23 |   - do:
24 |       cluster.health:
25 |         wait_for_status: green
26 | 
27 | ---
28 | teardown:
29 | 
30 |   - do:
31 |       cluster.put_settings:
32 |         body:
33 |           transient:
34 |             search.max_buckets: null
35 | 
36 | ---
37 | "Test with filesystem arborescence":
38 |   - do:
39 |       index:
40 |         index: filesystem
41 |         id: 1
42 |         body: { "path": "/Spreadsheets/Budget_2013.xls", "views": 10 }
43 | 
44 |   - do:
45 |       index:
46 |         index: filesystem
47 |         id: 2
48 |         body: { "path": "/Spreadsheets/Budget_2014.xls", "views": 7 }
49 | 
50 |   - do:
51 |       index:
52 |         index: filesystem
53 |         id: 3
54 |         body: { "path": "/My documents/Test.txt", "views": 1 }
55 | 
56 |   - do:
57 |       index:
58 |         index: filesystem
59 |         id: 4
60 |         body: { "path": "/Spreadsheets/Budget_2014.xls", "views": 12 }
61 | 
62 |   - do:
63 |       indices.refresh: {}
64 | 
65 | 
66 |   # Search with limited size
67 |   - do:
68 |       search:
69 |         rest_total_hits_as_int: true
70 |         body: {
71 |           "size" : 0,
72 |           "aggs" : {
73 |             "tree" : {
74 |               "path_hierarchy" : {
75 |                 "field" : "path",
76 |                 "separator": "/",
77 |                 "size": "3",
78 |                 "shard_size": "3"
79 |               },
80 |               "aggs": {
81 |                 "total_views": {
82 |                   "sum": {
83 |                     "field": "views"
84 |                   }
85 |                 }
86 |               }
87 |             }
88 |           }
89 |         }
90 | 
91 |   - match: { hits.total: 4 }
92 | 


--------------------------------------------------------------------------------
/src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/50_max_buckets_date_hierarchy.yml:
--------------------------------------------------------------------------------
 1 | setup:
 2 |   - do:
 3 |       indices.create:
 4 |         index: calendar
 5 |         body:
 6 |           settings:
 7 |             number_of_shards: 1
 8 |             number_of_replicas: 0
 9 |           mappings:
10 |             properties:
11 |               date:
12 |                 type: date
13 | 
14 |   - do:
15 |       cluster.put_settings:
16 |         body:
17 |           transient:
18 |             search.max_buckets: "3"
19 |         flat_settings: true
20 | 
21 |   - do:
22 |       cluster.health:
23 |         wait_for_status: green
24 | 
25 | ---
26 | teardown:
27 | 
28 |   - do:
29 |       cluster.put_settings:
30 |         body:
31 |           transient:
32 |             search.max_buckets: null
33 | 
34 | ---
35 | "Test with date hierarchy":
36 |   - do:
37 |       index:
38 |         index: calendar
39 |         id: 1
40 |         body: { "date": "2012-01-10T02:47:28" }
41 | 
42 |   - do:
43 |       index:
44 |         index: calendar
45 |         id: 2
46 |         body: { "date": "2011-01-05T01:43:35" }
47 | 
48 |   - do:
49 |       index:
50 |         index: calendar
51 |         id: 3
52 |         body: { "date": "2012-05-01T12:24:19" }
53 | 
54 |   - do:
55 |       index:
56 |         index: calendar
57 |         id: 4
58 |         body: { "date": "2020-05-01T12:24:19" }
59 | 
60 |   - do:
61 |       indices.refresh: {}
62 | 
63 | 
64 |   # Search with limited size
65 |   - do:
66 |       search:
67 |         rest_total_hits_as_int: true
68 |         body: {
69 |           "size": 0,
70 |           "aggs": {
71 |             "tree": {
72 |               "date_hierarchy": {
73 |                 "field": "date",
74 |                 "interval": "months",
75 |                 "order": [{"_key": "asc"}],
76 |                 "size" : 3,
77 |                 "shard_size": 3
78 |               }
79 |             }
80 |           }
81 |         }
82 | 
83 |   - match: { hits.total: 4 }
84 | 


--------------------------------------------------------------------------------
/src/yamlRestTest/resources/rest-api-spec/test/PathHierarchy/60_path_hierarchy_multi_buckets.yml:
--------------------------------------------------------------------------------
 1 | setup:
 2 |   - do:
 3 |       indices.create:
 4 |         index: filesystem
 5 |         body:
 6 |           settings:
 7 |             number_of_shards: 1
 8 |             number_of_replicas: 0
 9 |           mappings:
10 |             properties:
11 |               path1:
12 |                 type: keyword
13 |               path2:
14 |                 type: keyword
15 | 
16 |   - do:
17 |       cluster.health:
18 |         wait_for_status: green
19 | 
20 | ---
21 | "Test with filesystem arborescence":
22 |   - do:
23 |       index:
24 |         index: filesystem
25 |         id: 1
26 |         body: { "path1": "/My documents/Spreadsheets/Budget_2013.xls", "path2": "/My documents/Spreadsheets/Budget_2014.xls" }
27 | 
28 |   - do:
29 |       index:
30 |         index: filesystem
31 |         id: 2
32 |         body: { "path1": "/My documents/Spreadsheets/Budget_2014.xls", "path2": "/My documents/Spreadsheets/Budget_2013.xls" }
33 | 
34 |   - do:
35 |       indices.refresh: {}
36 | 
37 | 
38 | # basic test
39 |   - do:
40 |       search:
41 |         rest_total_hits_as_int: true
42 |         body: {
43 |           "size" : 0,
44 |           "aggs": {
45 |             "_path1_agg": {
46 |               "path_hierarchy": {
47 |                 "field": "path1",
48 |                 "order": {
49 |                   "_key": "asc"
50 |                 },
51 |                 "shard_size": 100,
52 |                 "size": 20000,
53 |                 "min_doc_count": 0
54 |               }
55 |             },
56 |             "_path2_agg": {
57 |               "path_hierarchy": {
58 |                 "field": "path2",
59 |                 "order": {
60 |                   "_key": "asc"
61 |                 },
62 |                 "shard_size": 100,
63 |                 "size": 20000,
64 |                 "min_doc_count": 0
65 |               }
66 |             }
67 |           }
68 |         }
69 | 
70 |   - match: { aggregations._path1_agg.buckets.0.key: "My documents" }
71 | 
72 |   - match: { aggregations._path2_agg.buckets.0.key: "My documents" }
73 | 


--------------------------------------------------------------------------------