├── .github ├── dependabot.yml └── workflows │ └── main.yml ├── .gitignore ├── LICENSE ├── README.md ├── basics ├── force-merge.md ├── pom.xml └── src │ └── test │ └── java │ └── io │ └── mincong │ └── elasticsearch │ ├── CommonUnitClassTest.java │ ├── CompletableFutureRestClientIT.java │ ├── CompletableFutureTransportClientTest.java │ ├── GetAsyncTest.java │ ├── GetTest.java │ ├── GreenClusterIT.java │ ├── HttpIndexIT.java │ ├── IndexTest.java │ ├── MultiGetTest.java │ ├── UpdateTest.java │ ├── YelloClusterIT.java │ ├── index │ ├── IndexNotFoundExceptionTest.java │ └── MapperParsingExceptionTest.java │ └── util │ └── BlogJavaTimeTimeValueTest.java ├── cheatsheets ├── README.md └── curl ├── cluster ├── README.md ├── cluster-settings.md ├── cluster-state.md ├── cluster-stats.md ├── disk.md ├── pom.xml └── src │ └── test │ ├── java │ └── io │ │ └── mincong │ │ └── elasticsearch │ │ ├── ClusterNodesInfoIT.java │ │ ├── ClusterSettingsLegacyClientTest.java │ │ ├── ClusterSettingsRestClientIT.java │ │ ├── ClusterStateTest.java │ │ ├── ClusterStatsLegacyClientTest.java │ │ └── ClusterStatsRestClientIT.java │ └── resources │ └── docker-compose.yml ├── demo-dvf ├── README.md ├── export-kibana-dashboard.sh ├── import-kibana-dashboard.sh ├── pom.xml ├── src │ ├── main │ │ ├── java │ │ │ └── io │ │ │ │ └── mincong │ │ │ │ └── dvf │ │ │ │ ├── demo │ │ │ │ ├── ReadPathAggregationDemo.java │ │ │ │ └── WritePathDemo.java │ │ │ │ ├── model │ │ │ │ ├── Location.java │ │ │ │ ├── Transaction.java │ │ │ │ └── TransactionRow.java │ │ │ │ └── service │ │ │ │ ├── EsWriter.java │ │ │ │ ├── Jackson.java │ │ │ │ ├── TransactionBulkEsWriter.java │ │ │ │ ├── TransactionCsvReader.java │ │ │ │ ├── TransactionEsAggregator.java │ │ │ │ └── TransactionSimpleEsWriter.java │ │ └── resources │ │ │ ├── aggregations │ │ │ ├── request.paris.lot-type-stats-and-percentiles.json │ │ │ ├── request.paris.price-stats-and-percentiles-per-postal-code.json │ │ │ └── request.paris.price-stats-overview.json │ │ │ ├── config │ │ │ └── custom.elasticsearch.yml │ │ │ └── log4j2.xml │ └── test │ │ ├── java │ │ └── io │ │ │ └── mincong │ │ │ └── dvf │ │ │ ├── model │ │ │ └── TestModels.java │ │ │ └── service │ │ │ ├── JacksonTest.java │ │ │ ├── MainIT.java │ │ │ ├── TransactionBulkEsWriterIT.java │ │ │ ├── TransactionCsvReaderTest.java │ │ │ ├── TransactionEsAggregatorIT.java │ │ │ ├── TransactionEsWriterAbstractIT.java │ │ │ └── TransactionSimpleEsWriterIT.java │ │ └── resources │ │ └── dvf-samples.csv ├── start-elasticsearch.sh └── start-kibana.sh ├── mapping ├── README.md ├── pom.xml └── src │ └── test │ └── java │ └── io │ └── mincong │ └── elasticsearch │ ├── DynamicMappingTest.java │ ├── ExplicitMappingTest.java │ ├── IndexStatsTest.java │ ├── IndexTemplateTest.java │ ├── StackOverflow60500157IT.java │ └── StackOverflow60667649IT.java ├── notebooks ├── README.md └── partial-update.md ├── ops └── gc.md ├── pom.xml ├── scripts ├── start-elasticsearch.sh └── upgrade-es-version.sh ├── search ├── README.md ├── pom.xml └── src │ └── test │ └── java │ └── io │ └── mincong │ └── elasticsearch │ ├── BlogDisqus4852306721Test.java │ ├── SearchScrollTest.java │ └── SearchTest.java └── snapshot ├── README.md ├── pom.xml ├── repository.md └── src └── test └── java └── io └── mincong └── elasticsearch ├── ConcurrentSnapshotDeletionTest.java ├── SnapshotStateDemoTest.java └── SnapshotTest.java /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: maven 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | time: "04:00" 8 | open-pull-requests-limit: 10 9 | ignore: 10 | - dependency-name: org.elasticsearch.test:framework 11 | versions: 12 | - 7.10.2 13 | - 7.11.0 14 | - 7.11.1 15 | - 7.11.2 16 | - 7.12.0 17 | - dependency-name: org.elasticsearch.client:elasticsearch-rest-high-level-client 18 | versions: 19 | - 7.10.2 20 | - 7.11.0 21 | - 7.11.1 22 | - 7.11.2 23 | - 7.12.0 24 | - dependency-name: net.javacrumbs.json-unit:json-unit-assertj 25 | versions: 26 | - 2.22.1 27 | - 2.24.0 28 | - dependency-name: com.fasterxml.jackson.datatype:jackson-datatype-jsr310 29 | versions: 30 | - 2.12.1 31 | - dependency-name: com.fasterxml.jackson.datatype:jackson-datatype-jdk8 32 | versions: 33 | - 2.12.1 34 | - dependency-name: com.fasterxml.jackson.module:jackson-module-parameter-names 35 | versions: 36 | - 2.12.1 37 | - dependency-name: com.fasterxml.jackson.dataformat:jackson-dataformat-csv 38 | versions: 39 | - 2.12.1 40 | - dependency-name: com.fasterxml.jackson.core:jackson-databind 41 | versions: 42 | - 2.12.1 43 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # 2 | # https://help.github.com/en/actions/reference/workflow-syntax-for-github-actions 3 | # https://help.github.com/en/actions/language-and-framework-guides/building-and-testing-java-with-maven 4 | # 5 | name: Actions 6 | 7 | on: [push] 8 | 9 | jobs: 10 | Java-11: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - uses: actions/setup-java@v1 15 | with: 16 | java-version: 11 17 | - uses: actions/cache@v1 18 | with: 19 | path: ~/.m2 20 | key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} 21 | restore-keys: ${{ runner.os }}-m2 22 | - run: mvn verify 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ----- Maven ----- 2 | target/ 3 | pom.xml.tag 4 | pom.xml.releaseBackup 5 | pom.xml.versionsBackup 6 | pom.xml.next 7 | release.properties 8 | dependency-reduced-pom.xml 9 | buildNumber.properties 10 | .mvn/timing.properties 11 | 12 | # Avoid ignoring Maven wrapper jar file (.jar files are usually ignored) 13 | !/.mvn/wrapper/maven-wrapper.jar 14 | 15 | # ----- Vim ----- 16 | *.swp 17 | 18 | # ----- Custom ----- 19 | *.kibana 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Mincong Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learning Elasticsearch [![Build Status][actions-img]][actions] 2 | 3 | Elasticsearch is a distributed, RESTful search and analytics engine capable of 4 | addressing a growing number of use cases. As the heart of the Elastic Stack, 5 | it centrally stores your data so you can discover the expected and uncover the 6 | unexpected. 7 | 8 | ## Quickstart 9 | 10 | The fastest way to test any basic Elasticsearch feature is to start a Docker image with the desired Elasticsearch version: 11 | 12 | 13 | 14 | ```sh 15 | docker run \ 16 | --rm \ 17 | -e discovery.type=single-node \ 18 | -p 9200:9200 \ 19 | docker.elastic.co/elasticsearch/elasticsearch:7.12.0 20 | ``` 21 | 22 | 23 | 24 | ## Articles 25 | 26 | Articles wrote using code of this repository and other related repositories: 27 | 28 | - [Testing Elasticsearch With Docker And Java High Level REST Client](https://mincong.io/2020/04/05/testing-elasticsearch-with-docker-and-java-client/) 29 | - [Testing Elasticsearch with ESSingleNodeTestCase](https://mincong.io/2019/11/24/essinglenodetestcase/) 30 | - [Elasticsearch: cat nodes API](https://mincong.io/2020/03/07/elasticsearch-cat-nodes-api/) 31 | - [Elasticsearch: Scroll API in Java](https://mincong.io/2020/01/19/elasticsearch-scroll-api/) 32 | - [Indexing New Data in Elasticsearch](https://mincong.io/2019/12/02/indexing-new-data-in-elasticsearch/) 33 | - [Common Index Exceptions](https://mincong.io/2020/09/13/es-index-exceptions/) 34 | - [Wrap Elasticsearch Response Into CompletableFuture](https://mincong.io/2020/07/26/es-client-completablefuture/) 35 | - [Discovery in Elasticsearch](https://mincong.io/2020/08/22/discovery-in-elasticsearch/) 36 | - [GC in Elasticsearch](https://mincong.io/2020/08/30/gc-in-elasticsearch/) 37 | - [18 Allocation Deciders in Elasticsearch](https://mincong.io/2020/09/27/shard-allocation/) 38 | - [Using Java Time in Different Frameworks](https://mincong.io/2020/10/25/java-time/) 39 | - [DVF: Indexing New Documents](https://mincong.io/2020/12/16/dvf-indexing/) 40 | - [DVF: Indexing Optimization](https://mincong.io/2020/12/17/dvf-indexing-optimization/) 41 | - [DVF: Storage Optimization](https://mincong.io/2020/12/25/dvf-storage-optimization/) 42 | - [DVF: Snapshot And Restore](https://mincong.io/2021/01/10/dvf-snapshot-and-restore/) 43 | - [DVF: Aggregations](https://mincong.io/2021/04/12/dvf-aggregations/) 44 | - [DVF: Real Estate Analysis For Île-de-France in 2020](https://mincong.io/2021/04/16/dvf-real-estate-analysis-idf-2020/) 45 | - [Disk Watermarks In Elasticsearch](https://mincong.io/2021/04/10/disk-watermarks-in-elasticsearch/) 46 | - [Elasticsearch: Generate Configuration With Python Jinja 2](https://mincong.io/2021/04/11/elasticsearch-generate-configuration-with-python-jinja2/) 47 | - [Fix Corrupted Index in Elasticsearch](https://mincong.io/en/elasticsearch-corrupted-index/) / [修复 Elasticsearch 中损坏的索引](https://mincong.io/cn/elasticsearch-corrupted-index/) 48 | - [【翻译】Netflix 资产管理平台 (AMP) 中的 Elasticsearch 索引策略](https://mincong.io/cn/elasticsearch-indexing-strategy-in-asset-management-platform-amp/) 49 | - [How to prevent data loss in Elasticsearch?](https://mincong.io/en/prevent-data-loss-in-elasticsearch/) / [如何防止 Elasticsearch 中的数据丢失?](https://mincong.io/cn/prevent-data-loss-in-elasticsearch/) 50 | - [Elasticsearch Settings](https://mincong.io/en/elasticsearch-settings/) / [一文了解 Elasticsearch 设置](https://mincong.io/cn/elasticsearch-settings/) 51 | - [Internal Structure Of Snapshot Repository](https://mincong.io/en/elasticsearch-snapshot-repository-structure/) / [Elasticsearch 快照仓库的内部结构](https://mincong.io/cn/elasticsearch-snapshot-repository-structure/) 52 | - [Elasticsearch Snapshot Plugins](https://mincong.io/en/elasticsearch-snapshot-plugins/) / [Elasticsearch 的快照插件](https://mincong.io/cn/elasticsearch-snapshot-plugins/) 53 | - [Slow Query Logs In Elasticsearch](https://mincong.io/en/slow-logs-in-elasticsearch/) 54 | - [The Decision System For Shard Allocation in Elasticsearch](https://mincong.io/en/shard-allocation-deciders/) 55 | - [Internal Structure Of Elasticsearch Java High-Level REST Client](https://mincong.io/en/elasticsearch-hlrc/) 56 | 57 | ## Resources 58 | 59 | Not related to this repository, but interesting resources to read about Elasticsearch. 60 | 61 | Elasticsearch documentation: 62 | 63 | - [Java High Level REST Client](https://www.elastic.co/guide/en/elasticsearch/client/java-rest/current/java-rest-high.html) 64 | - [Java Testing Framework](https://www.elastic.co/guide/en/elasticsearch/reference/current/testing-framework.html) 65 | - [REST APIs](https://www.elastic.co/guide/en/elasticsearch/reference/current/rest-apis.html) 66 | 67 | Certifications: 68 | 69 | - [Elastic Certified Engineer](https://www.elastic.co/training/elastic-certified-engineer-exam) 70 | - ^ Article [ElasticSearch Engineer Exam v7.13 (2021)](https://kapuablog.wordpress.com/2021/10/15/elasticsearch-engineer-exam-v7-13-2021/) and Github project [Elastic Certified Engineer exam notes](https://github.com/mohclips/Elastic-Certified-Engineer-Exam-Notes) by [Nicholas Cross](https://github.com/mohclips) 71 | 72 | Books: 73 | 74 | - Radu Gheorghe, Matthew Lee hinman, Roy Russo, "Elasticsearch in Action", Manning, 2016. 75 | - Clinton Gormley and Zachary Tong, "Elasticsearch: The Definitive Guide", O'Reilly Media, 2014 - 2015.
76 | 77 | 78 | GitHub: 79 | 80 | - Awesome Search
81 | https://github.com/frutik/awesome-search 82 | - 互联网 Java 工程师进阶知识完全扫盲 / Elasticsearch
83 | https://github.com/doocs/advanced-java 84 | 85 | Blogs: 86 | 87 | - Code 972, A blog about BigData, Cloud and Search technologies by Itamar Syn-Hershko 88 | - 铭毅天下, Elasticsearch Expert, 89 | - 少强, Alicloud, 90 | - 木洛, Alicloud, 91 | 92 | Articles: 93 | 94 | - Michael McCandless, "Visualizing Lucene's segment merges", 2011.
95 | 96 | - Nico Tonozzi and Dumitru Daniliuc, "Reducing search indexing latency to one second", _Twitter_, 2020.
97 | 98 | - Prabin Meitei M, "Garbage Collection in Elasticsearch and the G1GC", _Medium_, 2018.
99 | 100 | 101 | Forums: 102 | 103 | - Elastic 中文社区 https://elasticsearch.cn/ 104 | 105 | ## Development 106 | 107 | Upgrade Elasticsearch version, e.g 7.8.0 -> 7.10.0: 108 | 109 | ```sh 110 | > scripts/upgrade-es-version.sh 7.8.0 7.10.0 111 | ✅ pom.xml 112 | ✅ cluster/src/test/resources/docker-compose.yml 113 | ✅ README.md 114 | Finished. 115 | ``` 116 | 117 | [actions]: https://github.com/mincong-h/learning-elasticsearch/actions 118 | [actions-img]: https://github.com/mincong-h/learning-elasticsearch/workflows/Actions/badge.svg 119 | -------------------------------------------------------------------------------- /basics/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 4.0.0 7 | 8 | io.mincong 9 | es-demo-parent 10 | 1.0-SNAPSHOT 11 | 12 | 13 | es-demo-test-framework 14 | Elasticsearch Demos - Basics 15 | 16 | 17 | 18 | org.apache.logging.log4j 19 | log4j-core 20 | test 21 | 22 | 23 | org.elasticsearch.client 24 | elasticsearch-rest-high-level-client 25 | test 26 | 27 | 28 | org.elasticsearch.test 29 | framework 30 | test 31 | 32 | 33 | com.fasterxml.jackson.core 34 | jackson-databind 35 | test 36 | 37 | 38 | net.javacrumbs.json-unit 39 | json-unit-assertj 40 | test 41 | 42 | 43 | 44 | 45 | 46 | 47 | io.fabric8 48 | docker-maven-plugin 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/CommonUnitClassTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.elasticsearch.common.unit.ByteSizeUnit; 5 | import org.elasticsearch.common.unit.ByteSizeValue; 6 | import org.junit.Test; 7 | 8 | public class CommonUnitClassTest { 9 | 10 | @Test 11 | public void byteSizeValue() { 12 | var value = new ByteSizeValue(1, ByteSizeUnit.MB); 13 | 14 | // convert to bytes 15 | Assertions.assertThat(value.getBytes()).isEqualTo(1024 * 1024); 16 | 17 | // convert to KB 18 | Assertions.assertThat(value.getKb()).isEqualTo(1024); 19 | Assertions.assertThat(value.getKbFrac()).isEqualTo(1024.0); // preserve fraction 20 | 21 | // comparable 22 | var a = new ByteSizeValue(1, ByteSizeUnit.MB); 23 | var b = new ByteSizeValue(2, ByteSizeUnit.MB); 24 | Assertions.assertThat(a.compareTo(b)).isNegative(); 25 | Assertions.assertThat(b.compareTo(a)).isPositive(); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/CompletableFutureRestClientIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.io.IOException; 4 | import java.util.concurrent.CompletableFuture; 5 | import java.util.concurrent.CompletionException; 6 | import java.util.concurrent.Executors; 7 | import org.apache.http.HttpHost; 8 | import org.assertj.core.api.Assertions; 9 | import org.elasticsearch.action.ActionListener; 10 | import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest; 11 | import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; 12 | import org.elasticsearch.client.RequestOptions; 13 | import org.elasticsearch.client.RestClient; 14 | import org.elasticsearch.client.RestHighLevelClient; 15 | import org.elasticsearch.test.rest.ESRestTestCase; 16 | import org.junit.*; 17 | 18 | /** 19 | * Wraps the response of Java High Level REST Client {@link RestHighLevelClient} into {@link 20 | * CompletableFuture}. 21 | * 22 | * @author Mincong Huang 23 | * @blog https://mincong.io/2020/07/26/es-client-completablefuture/ 24 | */ 25 | public class CompletableFutureRestClientIT extends ESRestTestCase { 26 | 27 | @BeforeClass 28 | public static void setUpBeforeClass() { 29 | System.setProperty("tests.rest.cluster", "localhost:19200"); 30 | } 31 | 32 | @AfterClass 33 | public static void tearDownAfterClass() { 34 | System.clearProperty("tests.rest.cluster"); 35 | } 36 | 37 | private RestHighLevelClient restClient; 38 | 39 | @Before 40 | @Override 41 | public void setUp() throws Exception { 42 | super.setUp(); 43 | 44 | var builder = RestClient.builder(new HttpHost("localhost", 19200, "http")); 45 | restClient = new RestHighLevelClient(builder); 46 | } 47 | 48 | @After 49 | public void tearDown() throws Exception { 50 | restClient.close(); 51 | super.tearDown(); 52 | } 53 | 54 | @Test 55 | public void viaActionListenerWrap() { 56 | // demo:start 57 | var cf = new CompletableFuture(); 58 | restClient 59 | .cluster() 60 | .healthAsync( 61 | new ClusterHealthRequest(), 62 | RequestOptions.DEFAULT, 63 | ActionListener.wrap(cf::complete, cf::completeExceptionally)); 64 | // demo:end 65 | 66 | var response = cf.join(); 67 | Assertions.assertThat(response.getNumberOfNodes()).isEqualTo(1); 68 | } 69 | 70 | @Test 71 | public void viaActionListener() { 72 | // demo:start 73 | var cf = new CompletableFuture(); 74 | restClient 75 | .cluster() 76 | .healthAsync( 77 | new ClusterHealthRequest(), 78 | RequestOptions.DEFAULT, 79 | new ActionListener<>() { 80 | 81 | @Override 82 | public void onResponse(ClusterHealthResponse response) { 83 | cf.complete(response); 84 | } 85 | 86 | @Override 87 | public void onFailure(Exception e) { 88 | cf.completeExceptionally(e); 89 | } 90 | }); 91 | // demo:end 92 | 93 | var response = cf.join(); 94 | Assertions.assertThat(response.getNumberOfNodes()).isEqualTo(1); 95 | } 96 | 97 | @Test 98 | public void supplyAsyncWithExecutor() { 99 | var executor = Executors.newSingleThreadExecutor(); 100 | try { 101 | // demo:start 102 | var cf = 103 | CompletableFuture.supplyAsync( 104 | () -> { 105 | try { 106 | return restClient 107 | .cluster() 108 | .health(new ClusterHealthRequest(), RequestOptions.DEFAULT); 109 | } catch (IOException e) { 110 | throw new CompletionException(e); 111 | } 112 | }, 113 | executor); 114 | // demo:end 115 | 116 | var response = cf.join(); 117 | Assertions.assertThat(response.getNumberOfNodes()).isEqualTo(1); 118 | } finally { 119 | executor.shutdownNow(); 120 | } 121 | } 122 | 123 | /** 124 | * 125 | * 126 | *
127 |    * SEVERE: 1 thread leaked from SUITE scope at io.mincong.elasticsearch.RestClientCompletableFutureIT:
128 |    *    1) Thread[id=32, name=ForkJoinPool.commonPool-worker-3, state=WAITING, group=TGRP-RestClientCompletableFutureIT]
129 |    *         at java.base@14/jdk.internal.misc.Unsafe.park(Native Method)
130 |    *         at java.base@14/java.util.concurrent.locks.LockSupport.park(LockSupport.java:211)
131 |    *         at java.base@14/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1633)
132 |    *         at java.base@14/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:177)
133 |    * 
134 | */ 135 | @Test 136 | @Ignore("thread leaked") 137 | public void supplyAsyncWithoutExecutor() { 138 | // demo:start 139 | var cf = 140 | CompletableFuture.supplyAsync( 141 | () -> { 142 | try { 143 | return restClient 144 | .cluster() 145 | .health(new ClusterHealthRequest(), RequestOptions.DEFAULT); 146 | } catch (IOException e) { 147 | throw new CompletionException(e); 148 | } 149 | }); 150 | // demo:end 151 | 152 | var response = cf.join(); 153 | Assertions.assertThat(response.getNumberOfNodes()).isEqualTo(1); 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/CompletableFutureTransportClientTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.time.Duration; 4 | import java.util.concurrent.CompletableFuture; 5 | import java.util.concurrent.Executors; 6 | import java.util.concurrent.TimeUnit; 7 | import org.assertj.core.api.Assertions; 8 | import org.elasticsearch.action.ActionListener; 9 | import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; 10 | import org.elasticsearch.common.unit.TimeValue; 11 | import org.elasticsearch.test.ESSingleNodeTestCase; 12 | import org.junit.Ignore; 13 | import org.junit.Test; 14 | 15 | /** 16 | * Wraps the response of legacy client {@link org.elasticsearch.client.Client} into {@link 17 | * CompletableFuture}. 18 | * 19 | * @author Mincong Huang 20 | * @blog https://mincong.io/2020/07/26/es-client-completablefuture/ 21 | */ 22 | public class CompletableFutureTransportClientTest extends ESSingleNodeTestCase { 23 | 24 | @Test 25 | public void viaActionListenerWrap() { 26 | var client = client(); 27 | 28 | // demo:start 29 | var cf = new CompletableFuture(); 30 | client 31 | .admin() 32 | .cluster() 33 | .prepareState() 34 | .execute(ActionListener.wrap(cf::complete, cf::completeExceptionally)); 35 | var stateFuture = cf.thenApply(ClusterStateResponse::getState); 36 | // demo:end 37 | 38 | var clusterState = stateFuture.join(); 39 | Assertions.assertThat(clusterState.getNodes().getSize()).isEqualTo(1); 40 | } 41 | 42 | @Test 43 | public void viaActionListener() { 44 | var client = client(); 45 | 46 | // demo:start 47 | var cf = new CompletableFuture(); 48 | client 49 | .admin() 50 | .cluster() 51 | .prepareState() 52 | .execute( 53 | new ActionListener<>() { 54 | 55 | @Override 56 | public void onResponse(ClusterStateResponse response) { 57 | cf.complete(response); 58 | } 59 | 60 | @Override 61 | public void onFailure(Exception e) { 62 | cf.completeExceptionally(e); 63 | } 64 | }); 65 | // demo:end 66 | 67 | var response = cf.join(); 68 | Assertions.assertThat(response.getState().getNodes().getSize()).isEqualTo(1); 69 | } 70 | 71 | @Test 72 | public void timeoutGet() throws Exception { 73 | var client = client(); 74 | var cf = new CompletableFuture(); 75 | client 76 | .admin() 77 | .cluster() 78 | .prepareState() 79 | .execute(ActionListener.wrap(cf::complete, cf::completeExceptionally)); 80 | 81 | // demo:start 82 | var response = cf.get(3000, TimeUnit.MILLISECONDS); 83 | // demo:end 84 | 85 | Assertions.assertThat(response.getState().getNodes().getSize()).isEqualTo(1); 86 | } 87 | 88 | @Test 89 | public void timeoutWithNumber() { 90 | var client = client(); 91 | var cf = new CompletableFuture(); 92 | client 93 | .admin() 94 | .cluster() 95 | .prepareState() 96 | .execute(ActionListener.wrap(cf::complete, cf::completeExceptionally)); 97 | 98 | // demo:start 99 | var responseFuture = cf.orTimeout(3000, TimeUnit.MILLISECONDS); 100 | // demo:end 101 | 102 | Assertions.assertThat(responseFuture.join().getState().getNodes().getSize()).isEqualTo(1); 103 | } 104 | 105 | @Test 106 | public void timeoutWithDuration() { 107 | var client = client(); 108 | var cf = new CompletableFuture(); 109 | client 110 | .admin() 111 | .cluster() 112 | .prepareState() 113 | .execute(ActionListener.wrap(cf::complete, cf::completeExceptionally)); 114 | 115 | // demo:start 116 | var timeout = Duration.ofSeconds(3); 117 | var responseFuture = cf.orTimeout(timeout.toMillis(), TimeUnit.MILLISECONDS); 118 | // demo:end 119 | 120 | Assertions.assertThat(responseFuture.join().getState().getNodes().getSize()).isEqualTo(1); 121 | } 122 | 123 | @Test 124 | public void timeoutWithTimeValue() { 125 | var client = client(); 126 | var cf = new CompletableFuture(); 127 | client 128 | .admin() 129 | .cluster() 130 | .prepareState() 131 | .execute(ActionListener.wrap(cf::complete, cf::completeExceptionally)); 132 | 133 | // demo:start 134 | var timeout = TimeValue.timeValueSeconds(3); 135 | var responseFuture = cf.orTimeout(timeout.millis(), TimeUnit.MILLISECONDS); 136 | // demo:end 137 | 138 | Assertions.assertThat(responseFuture.join().getState().getNodes().getSize()).isEqualTo(1); 139 | } 140 | 141 | @Test 142 | public void supplyAsyncWithExecutor() { 143 | var client = client(); 144 | var executor = Executors.newSingleThreadExecutor(); 145 | try { 146 | // demo:start 147 | var cf = 148 | CompletableFuture.supplyAsync( 149 | () -> client.admin().cluster().prepareState().get(), executor); 150 | // demo:end 151 | 152 | var response = cf.join(); 153 | Assertions.assertThat(response.getState().getNodes().getSize()).isEqualTo(1); 154 | } finally { 155 | executor.shutdownNow(); 156 | } 157 | } 158 | 159 | /** 160 | * 161 | * 162 | *
163 |    * SEVERE: 1 thread leaked from SUITE scope at io.mincong.elasticsearch.TransportClientCompletableFutureTest:
164 |    *    1) Thread[id=28, name=ForkJoinPool.commonPool-worker-3, state=WAITING, group=TGRP-TransportClientCompletableFutureTest]
165 |    *         at java.base@14/jdk.internal.misc.Unsafe.park(Native Method)
166 |    *         at java.base@14/java.util.concurrent.locks.LockSupport.park(LockSupport.java:211)
167 |    *         at java.base@14/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1633)
168 |    *         at java.base@14/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:177)
169 |    * 
170 | */ 171 | @Test 172 | @Ignore("thread leaked") 173 | public void supplyAsyncWithoutExecutor() { 174 | var client = client(); 175 | // demo:start 176 | var cf = CompletableFuture.supplyAsync(() -> client.admin().cluster().prepareState().get()); 177 | // demo:end 178 | 179 | var response = cf.join(); 180 | Assertions.assertThat(response.getState().getNodes().getSize()).isEqualTo(1); 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/GetAsyncTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import java.util.concurrent.CompletableFuture; 6 | import org.elasticsearch.action.ActionListener; 7 | import org.elasticsearch.action.bulk.BulkItemResponse; 8 | import org.elasticsearch.action.bulk.BulkResponse; 9 | import org.elasticsearch.action.get.GetResponse; 10 | import org.elasticsearch.action.index.IndexRequest; 11 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 12 | import org.elasticsearch.rest.RestStatus; 13 | import org.elasticsearch.test.ESSingleNodeTestCase; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | public class GetAsyncTest extends ESSingleNodeTestCase { 18 | 19 | @Override 20 | @Before 21 | public void setUp() throws Exception { 22 | super.setUp(); 23 | 24 | Map sansa = new HashMap<>(); 25 | sansa.put("firstName", "Sansa"); 26 | sansa.put("lastName", "Stark"); 27 | 28 | Map arya = new HashMap<>(); 29 | arya.put("firstName", "Arya"); 30 | arya.put("lastName", "Stark"); 31 | 32 | BulkResponse response = 33 | client() 34 | .prepareBulk() 35 | .add(new IndexRequest().index("users").id("sansa").source(sansa)) 36 | .add(new IndexRequest().index("users").id("arya").source(arya)) 37 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 38 | .execute() 39 | .actionGet(); 40 | 41 | assertEquals(RestStatus.OK, response.status()); 42 | for (BulkItemResponse r : response.getItems()) { 43 | assertEquals(RestStatus.CREATED, r.status()); 44 | } 45 | } 46 | 47 | @Test 48 | public void getRequest_completableFuture() { 49 | var cf = new CompletableFuture(); 50 | client() 51 | .prepareGet() 52 | .setIndex("users") 53 | .setId("sansa") 54 | .execute( 55 | new ActionListener<>() { 56 | 57 | @Override 58 | public void onResponse(GetResponse response) { 59 | cf.complete(response); 60 | } 61 | 62 | @Override 63 | public void onFailure(Exception e) { 64 | cf.completeExceptionally(e); 65 | } 66 | }); 67 | 68 | var response = cf.join(); 69 | 70 | assertEquals("users", response.getIndex()); 71 | assertEquals("sansa", response.getId()); 72 | 73 | Map source = response.getSourceAsMap(); 74 | assertEquals("Sansa", source.get("firstName")); 75 | assertEquals("Stark", source.get("lastName")); 76 | } 77 | 78 | @Test 79 | public void getRequest_customListener() { 80 | var listener = new EsListener(); 81 | client().prepareGet().setIndex("users").setId("sansa").execute(listener); 82 | 83 | var response = listener.getCompletableFuture().join(); 84 | 85 | assertEquals("users", response.getIndex()); 86 | assertEquals("sansa", response.getId()); 87 | 88 | Map source = response.getSourceAsMap(); 89 | assertEquals("Sansa", source.get("firstName")); 90 | assertEquals("Stark", source.get("lastName")); 91 | } 92 | 93 | static class EsListener implements ActionListener { 94 | 95 | private final CompletableFuture completableFuture; 96 | 97 | public EsListener() { 98 | this.completableFuture = new CompletableFuture<>(); 99 | } 100 | 101 | @Override 102 | public void onResponse(T response) { 103 | completableFuture.complete(response); 104 | } 105 | 106 | @Override 107 | public void onFailure(Exception e) { 108 | completableFuture.completeExceptionally(e); 109 | } 110 | 111 | public CompletableFuture getCompletableFuture() { 112 | return completableFuture; 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/GetTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import org.elasticsearch.action.bulk.BulkItemResponse; 6 | import org.elasticsearch.action.bulk.BulkResponse; 7 | import org.elasticsearch.action.get.GetResponse; 8 | import org.elasticsearch.action.index.IndexRequest; 9 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 10 | import org.elasticsearch.rest.RestStatus; 11 | import org.elasticsearch.test.ESSingleNodeTestCase; 12 | import org.junit.Before; 13 | import org.junit.Test; 14 | 15 | /** 16 | * Tests "Get API". 17 | * 18 | * @author Mincong Huang 19 | * @see Get 21 | * API | Java REST Client | Elastic 22 | */ 23 | public class GetTest extends ESSingleNodeTestCase { 24 | 25 | @Override 26 | @Before 27 | public void setUp() throws Exception { 28 | super.setUp(); 29 | 30 | Map sansa = new HashMap<>(); 31 | sansa.put("firstName", "Sansa"); 32 | sansa.put("lastName", "Stark"); 33 | 34 | Map arya = new HashMap<>(); 35 | arya.put("firstName", "Arya"); 36 | arya.put("lastName", "Stark"); 37 | 38 | BulkResponse response = 39 | client() 40 | .prepareBulk() 41 | .add(new IndexRequest().index("users").id("sansa").source(sansa)) 42 | .add(new IndexRequest().index("users").id("arya").source(arya)) 43 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 44 | .execute() 45 | .actionGet(); 46 | 47 | assertEquals(RestStatus.OK, response.status()); 48 | for (BulkItemResponse r : response.getItems()) { 49 | assertEquals(RestStatus.CREATED, r.status()); 50 | } 51 | } 52 | 53 | @Test 54 | public void getRequest() { 55 | GetResponse response = 56 | client() // 57 | .prepareGet() 58 | .setIndex("users") 59 | .setId("sansa") 60 | .execute() 61 | .actionGet(); 62 | 63 | assertEquals("users", response.getIndex()); 64 | assertEquals("sansa", response.getId()); 65 | 66 | Map source = response.getSourceAsMap(); 67 | assertEquals("Sansa", source.get("firstName")); 68 | assertEquals("Stark", source.get("lastName")); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/GreenClusterIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.Map; 4 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 5 | import org.elasticsearch.cluster.health.ClusterHealthStatus; 6 | import org.elasticsearch.test.ESIntegTestCase; 7 | import org.elasticsearch.test.ESIntegTestCase.ClusterScope; 8 | import org.junit.Test; 9 | 10 | /** 11 | * Elasticsearch cluster is {@link ClusterHealthStatus#GREEN} when all shards are allocated (both 12 | * primary and replicas). Here, we have two data nodes: one for primary shard and one for replica 13 | * shard. Since the condition is satisfied, the cluster health status is GREEN. 14 | * 15 | * @author Mincong Huang 16 | * @see Cluster 18 | * Health API | Elasticsearch Reference 19 | */ 20 | @ClusterScope(numDataNodes = 2) 21 | public class GreenClusterIT extends ESIntegTestCase { 22 | 23 | @Override 24 | public int minimumNumberOfReplicas() { 25 | return 1; 26 | } 27 | 28 | @Override 29 | public int maximumNumberOfReplicas() { 30 | return 1; 31 | } 32 | 33 | @Test 34 | public void yellowClusterWhenMissingReplicas() { 35 | client() 36 | .prepareIndex() 37 | .setIndex("users") 38 | .setSource(Map.of("foo", "bar")) 39 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 40 | .execute() 41 | .actionGet(); 42 | ClusterHealthStatus status = ensureGreen("users"); 43 | assertEquals(ClusterHealthStatus.GREEN, status); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/HttpIndexIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import static java.nio.charset.StandardCharsets.UTF_8; 4 | 5 | import java.io.OutputStream; 6 | import java.io.OutputStreamWriter; 7 | import java.io.Writer; 8 | import java.math.BigDecimal; 9 | import java.net.HttpURLConnection; 10 | import java.net.URL; 11 | import java.util.Scanner; 12 | import net.javacrumbs.jsonunit.assertj.JsonAssertions; 13 | import org.apache.http.HttpHost; 14 | import org.assertj.core.api.Assertions; 15 | import org.elasticsearch.action.index.IndexRequest; 16 | import org.elasticsearch.client.RequestOptions; 17 | import org.elasticsearch.client.RestClient; 18 | import org.elasticsearch.client.RestHighLevelClient; 19 | import org.elasticsearch.common.xcontent.XContentType; 20 | import org.elasticsearch.rest.RestStatus; 21 | import org.junit.Assert; 22 | import org.junit.Test; 23 | 24 | public class HttpIndexIT { 25 | 26 | @Test 27 | public void index() throws Exception { 28 | // HTTP Request 29 | URL url = new URL("http://localhost:19200/users/_doc/sansa?pretty"); 30 | HttpURLConnection conn = (HttpURLConnection) url.openConnection(); 31 | conn.setDoOutput(true); 32 | conn.setRequestMethod("PUT"); 33 | conn.setRequestProperty("Content-Type", "application/json"); 34 | try (OutputStream os = conn.getOutputStream(); 35 | Writer writer = new OutputStreamWriter(os, UTF_8)) { 36 | // language=json 37 | writer.write("{ \"name\": \"Sansa Stark\" }"); 38 | } 39 | 40 | // HTTP Response 41 | try { 42 | conn.connect(); 43 | int statusCode = conn.getResponseCode(); 44 | Assertions.assertThat(statusCode).isEqualTo(201); 45 | Scanner s = new Scanner(conn.getInputStream()).useDelimiter("\\A"); 46 | String content = s.hasNext() ? s.next() : ""; 47 | System.out.println("PUT " + statusCode + " " + url); 48 | System.out.println(content); 49 | JsonAssertions.assertThatJson(content) 50 | .isObject() 51 | .containsEntry("_index", "users") 52 | .containsEntry("_type", "_doc") 53 | .containsEntry("_id", "sansa") 54 | /* 55 | * The first time a document is uploaded, the version is set to 1. 56 | * Then, the following modifications will result to version 57 | * incrementation: 2, 3, 4, ... 58 | */ 59 | .containsKey("_version") 60 | .containsEntry("result", "created") 61 | .containsKey("_seq_no") 62 | .containsEntry("_primary_term", BigDecimal.valueOf(1)); 63 | JsonAssertions.assertThatJson(content) 64 | .node("_shards") 65 | .isObject() 66 | .containsEntry("total", BigDecimal.valueOf(2)) 67 | .containsEntry("successful", BigDecimal.valueOf(1)) 68 | .containsEntry("failed", BigDecimal.valueOf(0)); 69 | } finally { 70 | conn.disconnect(); 71 | } 72 | } 73 | 74 | @Test 75 | public void itShouldIndexWithRestClient() throws Exception { 76 | var builder = RestClient.builder(new HttpHost("localhost", 19200, "http")); 77 | var idxRequest = 78 | new IndexRequest("my_index").source("{\"msg\":\"Hello world!\"}", XContentType.JSON); 79 | try (var client = new RestHighLevelClient(builder)) { 80 | var idxResponse = client.index(idxRequest, RequestOptions.DEFAULT); 81 | Assert.assertEquals("my_index", idxResponse.getIndex()); 82 | Assert.assertEquals(RestStatus.CREATED, idxResponse.status()); 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/IndexTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import org.elasticsearch.action.bulk.BulkItemResponse; 6 | import org.elasticsearch.action.bulk.BulkResponse; 7 | import org.elasticsearch.action.index.IndexRequest; 8 | import org.elasticsearch.action.index.IndexResponse; 9 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 10 | import org.elasticsearch.action.support.replication.ReplicationResponse.ShardInfo; 11 | import org.elasticsearch.common.xcontent.XContentType; 12 | import org.elasticsearch.rest.RestStatus; 13 | import org.elasticsearch.test.ESSingleNodeTestCase; 14 | import org.junit.Test; 15 | 16 | /** 17 | * Tests "Index API". 18 | * 19 | * @author Mincong Huang 20 | * @see Index 22 | * API | Java REST Client | Elastic 23 | */ 24 | public class IndexTest extends ESSingleNodeTestCase { 25 | 26 | @Test 27 | public void itShouldIndexWithoutDocumentId() { 28 | IndexRequest idxRequest = 29 | new IndexRequest("msg").source("{\"msg\":\"Hello world!\"}", XContentType.JSON); 30 | IndexResponse idxResponse = client().index(idxRequest).actionGet(); 31 | assertEquals("msg", idxResponse.getIndex()); 32 | assertEquals(RestStatus.CREATED, idxResponse.status()); 33 | logger.info("docId={}", idxRequest.id()); 34 | } 35 | 36 | @Test 37 | public void itShouldIndexWithBuilder() { 38 | IndexResponse idxResponse = 39 | client() 40 | .prepareIndex() 41 | .setIndex("my_index") 42 | .setSource("{\"msg\":\"Hello world!\"}", XContentType.JSON) 43 | .execute() 44 | .actionGet(); 45 | assertEquals("my_index", idxResponse.getIndex()); 46 | assertEquals(RestStatus.CREATED, idxResponse.status()); 47 | } 48 | 49 | @Test 50 | public void itShouldIndexWithBulkRequest() { 51 | Map sansa = new HashMap<>(); 52 | sansa.put("firstName", "Sansa"); 53 | sansa.put("lastName", "Stark"); 54 | 55 | Map arya = new HashMap<>(); 56 | arya.put("firstName", "Arya"); 57 | arya.put("lastName", "Stark"); 58 | 59 | BulkResponse response = 60 | client() 61 | .prepareBulk() 62 | .add(new IndexRequest().index("users").id("sansa").source(sansa)) 63 | .add(new IndexRequest().index("users").id("arya").source(arya)) 64 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 65 | .execute() 66 | .actionGet(); 67 | 68 | assertEquals(RestStatus.OK, response.status()); 69 | for (BulkItemResponse r : response.getItems()) { 70 | assertEquals(RestStatus.CREATED, r.status()); 71 | } 72 | } 73 | 74 | @Test 75 | public void indexing() { 76 | HashMap source = new HashMap<>(); 77 | source.put("firstName", "Sansa"); 78 | source.put("lastName", "Stark"); 79 | 80 | IndexRequest idxRequest = 81 | new IndexRequest() // 82 | .index("users") 83 | .id("sansa") 84 | .source(source); 85 | 86 | IndexResponse idxResponse = client().index(idxRequest).actionGet(); 87 | assertEquals("users", idxResponse.getIndex()); 88 | assertEquals(RestStatus.CREATED, idxResponse.status()); 89 | assertEquals("sansa", idxResponse.getId()); 90 | assertEquals(1L, idxResponse.getPrimaryTerm()); 91 | assertEquals(0L, idxResponse.getSeqNo()); 92 | assertEquals(1L, idxResponse.getVersion()); 93 | 94 | ShardInfo shardInfo = idxResponse.getShardInfo(); 95 | assertEquals(0, shardInfo.getFailed()); 96 | assertEquals(1, shardInfo.getSuccessful()); 97 | assertEquals(1, shardInfo.getTotal()); 98 | } 99 | 100 | @Test 101 | public void itShouldIndexContentTypeJson() { 102 | IndexRequest idxRequest = 103 | new IndexRequest("my_index").source("{\"msg\":\"Hello world!\"}", XContentType.JSON); 104 | IndexResponse idxResponse = client().index(idxRequest).actionGet(); 105 | assertEquals("my_index", idxResponse.getIndex()); 106 | assertEquals(RestStatus.CREATED, idxResponse.status()); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/MultiGetTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import org.elasticsearch.action.bulk.BulkItemResponse; 6 | import org.elasticsearch.action.bulk.BulkResponse; 7 | import org.elasticsearch.action.get.MultiGetItemResponse; 8 | import org.elasticsearch.action.get.MultiGetRequest; 9 | import org.elasticsearch.action.get.MultiGetResponse; 10 | import org.elasticsearch.action.index.IndexRequest; 11 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 12 | import org.elasticsearch.rest.RestStatus; 13 | import org.elasticsearch.test.ESSingleNodeTestCase; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | /** 18 | * Tests "Multi-Get API". 19 | * 20 | * @author Mincong Huang 21 | * @see Multi-Get 23 | * API | Java REST Client | Elastic 24 | */ 25 | public class MultiGetTest extends ESSingleNodeTestCase { 26 | 27 | @Override 28 | @Before 29 | public void setUp() throws Exception { 30 | super.setUp(); 31 | 32 | Map sansa = new HashMap<>(); 33 | sansa.put("firstName", "Sansa"); 34 | sansa.put("lastName", "Stark"); 35 | 36 | Map arya = new HashMap<>(); 37 | arya.put("firstName", "Arya"); 38 | arya.put("lastName", "Stark"); 39 | 40 | BulkResponse response = 41 | client() // 42 | .prepareBulk() 43 | .add(new IndexRequest().index("users").id("sansa").source(sansa)) 44 | .add(new IndexRequest().index("users").id("arya").source(arya)) 45 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 46 | .execute() 47 | .actionGet(); 48 | 49 | assertEquals(RestStatus.OK, response.status()); 50 | for (BulkItemResponse r : response.getItems()) { 51 | assertEquals(RestStatus.CREATED, r.status()); 52 | } 53 | } 54 | 55 | @Test 56 | public void multiGetRequest() { 57 | MultiGetRequest request = 58 | new MultiGetRequest() // 59 | .add("users", "sansa") 60 | .add("users", "arya"); 61 | MultiGetResponse response = client().multiGet(request).actionGet(); 62 | MultiGetItemResponse[] responses = response.getResponses(); 63 | 64 | assertEquals("users", responses[0].getIndex()); 65 | assertEquals("sansa", responses[0].getId()); 66 | Map source0 = responses[0].getResponse().getSourceAsMap(); 67 | assertEquals("Sansa", source0.get("firstName")); 68 | assertEquals("Stark", source0.get("lastName")); 69 | 70 | assertEquals("users", responses[1].getIndex()); 71 | assertEquals("arya", responses[1].getId()); 72 | Map source1 = responses[1].getResponse().getSourceAsMap(); 73 | assertEquals("Arya", source1.get("firstName")); 74 | assertEquals("Stark", source1.get("lastName")); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/UpdateTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 4 | import org.elasticsearch.common.xcontent.XContentType; 5 | import org.elasticsearch.rest.RestStatus; 6 | import org.elasticsearch.test.ESSingleNodeTestCase; 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | 10 | /** 11 | * Tests "Update API". 12 | * 13 | * @author Mincong Huang 14 | */ 15 | public class UpdateTest extends ESSingleNodeTestCase { 16 | 17 | @Override 18 | @Before 19 | public void setUp() throws Exception { 20 | super.setUp(); 21 | 22 | var response = 23 | client() 24 | .prepareIndex() 25 | .setIndex("users") 26 | .setId("1") 27 | .setSource("{\"name\":\"My Name\"}", XContentType.JSON) 28 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 29 | .execute() 30 | .actionGet(); 31 | assertEquals(RestStatus.CREATED, response.status()); 32 | assertEquals(1L, response.getVersion()); 33 | } 34 | 35 | @Test 36 | public void updateExistingDocument() { 37 | // Given an existing user "1" 38 | 39 | // When updating it 40 | var response = 41 | client() // 42 | .prepareUpdate() 43 | .setIndex("users") 44 | .setId("1") 45 | .setDoc("{\"age\":28}", XContentType.JSON) 46 | .setFetchSource(true) // fetch source for testing purpose 47 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 48 | .execute() 49 | .actionGet(); 50 | 51 | // Then the update is successful 52 | assertEquals(RestStatus.OK, response.status()); 53 | assertEquals("users", response.getIndex()); 54 | assertEquals("1", response.getId()); 55 | 56 | // And the source is merged: both old and new keys exist 57 | var source = response.getGetResult().getSource(); 58 | assertEquals("My Name", source.get("name")); 59 | assertEquals(28, source.get("age")); 60 | 61 | // And the version is incremented 62 | assertEquals(2L, response.getVersion()); 63 | } 64 | 65 | @Test 66 | public void updateNonexistentDocument() { 67 | // Given a nonexistent user "2" 68 | 69 | // When updating it 70 | var response = 71 | client() // 72 | .prepareUpdate() 73 | .setIndex("users") 74 | .setId("2") 75 | .setDoc("{\"age\":28}", XContentType.JSON) 76 | .setDocAsUpsert(true) // use update-or-insert option to avoid exception 77 | .setFetchSource(true) // fetch source for testing purpose 78 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 79 | .execute() 80 | .actionGet(); 81 | 82 | // Then the update is successful 83 | assertEquals(RestStatus.CREATED, response.status()); 84 | assertEquals("users", response.getIndex()); 85 | assertEquals("2", response.getId()); 86 | 87 | // And the source contains the original source 88 | var source = response.getGetResult().getSource(); 89 | assertEquals(28, source.get("age")); 90 | 91 | // And this is the first version 92 | assertEquals(1L, response.getVersion()); 93 | } 94 | 95 | @Test 96 | public void overwriteExistingField() { 97 | // Given an existing user "1" 98 | 99 | // When updating it 100 | var response = 101 | client() // 102 | .prepareUpdate() 103 | .setIndex("users") 104 | .setId("1") 105 | .setDoc("{\"name\":\"My New Name\",\"age\":28}", XContentType.JSON) 106 | .setDocAsUpsert(true) // use update-or-insert option to avoid exception 107 | .setFetchSource(true) // fetch source for testing purpose 108 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 109 | .execute() 110 | .actionGet(); 111 | 112 | // Then the update is successful 113 | assertEquals(RestStatus.OK, response.status()); 114 | assertEquals("users", response.getIndex()); 115 | assertEquals("1", response.getId()); 116 | 117 | // And the source is updated: existing field is overwritten and new field is added 118 | var source = response.getGetResult().getSource(); 119 | assertEquals(28, source.get("age")); 120 | assertEquals("My New Name", source.get("name")); 121 | 122 | // And the version is incremented 123 | assertEquals(2L, response.getVersion()); 124 | } 125 | 126 | @Test 127 | public void dropExistingValue() { 128 | // Given an existing user "1" 129 | 130 | // When update it with name equal to null 131 | var response = 132 | client() // 133 | .prepareUpdate() 134 | .setIndex("users") 135 | .setId("1") 136 | .setDoc("{\"name\":null}", XContentType.JSON) 137 | .setFetchSource(true) // fetch source for testing purpose 138 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 139 | .execute() 140 | .actionGet(); 141 | 142 | // Then the update is successful and the existing value is overwritten 143 | var source = response.getGetResult().getSource(); 144 | assertTrue(source.containsKey("name")); 145 | assertNull(source.get("name")); 146 | 147 | // And the version is incremented 148 | assertEquals(2L, response.getVersion()); 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/YelloClusterIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.Map; 4 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 5 | import org.elasticsearch.cluster.health.ClusterHealthStatus; 6 | import org.elasticsearch.test.ESIntegTestCase; 7 | import org.elasticsearch.test.ESIntegTestCase.ClusterScope; 8 | import org.junit.Test; 9 | 10 | /** 11 | * Elasticsearch cluster is {@link ClusterHealthStatus#YELLOW} when the primary shard is allocated 12 | * but replicas are not. Here, we have only one data node but we need at least two data nodes: one 13 | * for primary shard and one for replica shard. Since the condition is not satisfied, the cluster 14 | * health status is YELLOW. 15 | * 16 | * @author Mincong Huang 17 | * @see Cluster 19 | * Health API | Elasticsearch Reference 20 | */ 21 | @ClusterScope(numDataNodes = 1) 22 | public class YelloClusterIT extends ESIntegTestCase { 23 | 24 | @Override 25 | public int minimumNumberOfReplicas() { 26 | return 1; 27 | } 28 | 29 | @Override 30 | public int maximumNumberOfReplicas() { 31 | return 1; 32 | } 33 | 34 | @Test 35 | public void yellowClusterWhenMissingReplicas() { 36 | client() 37 | .prepareIndex() 38 | .setIndex("users") 39 | .setSource(Map.of("foo", "bar")) 40 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 41 | .execute() 42 | .actionGet(); 43 | ClusterHealthStatus status = ensureYellowAndNoInitializingShards("users"); 44 | assertEquals(ClusterHealthStatus.YELLOW, status); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/index/IndexNotFoundExceptionTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch.index; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.elasticsearch.action.index.IndexRequest; 5 | import org.elasticsearch.common.settings.Settings; 6 | import org.elasticsearch.common.xcontent.XContentType; 7 | import org.elasticsearch.index.IndexNotFoundException; 8 | import org.elasticsearch.test.ESSingleNodeTestCase; 9 | import org.junit.Test; 10 | 11 | /** 12 | * @blog https://mincong.io/2020/09/13/es-index-exceptions/ 13 | * @author Mincong Huang 14 | */ 15 | public class IndexNotFoundExceptionTest extends ESSingleNodeTestCase { 16 | 17 | @Override 18 | protected Settings nodeSettings() { 19 | return Settings.builder() 20 | /* 21 | * Disable auto index creation 22 | * 23 | * See https://www.elastic.co/guide/en/elasticsearch/reference/7.9/docs-index_.html#index-creation 24 | */ 25 | .put("action.auto_create_index", false) 26 | .build(); 27 | } 28 | 29 | @Test 30 | public void testAutoCreationDisabled() { 31 | var request = new IndexRequest("my_index").source("{\"msg\": \"hello\"}", XContentType.JSON); 32 | 33 | Assertions.assertThatThrownBy(() -> client().index(request).actionGet()) 34 | .isInstanceOf(IndexNotFoundException.class) 35 | .hasMessageContaining("no such index [my_index]"); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/index/MapperParsingExceptionTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch.index; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.elasticsearch.action.index.IndexRequest; 5 | import org.elasticsearch.common.xcontent.XContentType; 6 | import org.elasticsearch.index.mapper.MapperParsingException; 7 | import org.elasticsearch.rest.RestStatus; 8 | import org.elasticsearch.test.ESSingleNodeTestCase; 9 | import org.junit.Test; 10 | 11 | /** 12 | * @blog https://mincong.io/2020/09/13/es-index-exceptions/ 13 | * @author Mincong Huang 14 | */ 15 | public class MapperParsingExceptionTest extends ESSingleNodeTestCase { 16 | 17 | @Test 18 | public void testMetadataField() { 19 | var request = new IndexRequest("my_index").source("{\"_id\": \"123\"}", XContentType.JSON); 20 | Assertions.assertThatThrownBy(() -> client().index(request).actionGet()) 21 | .isInstanceOf(MapperParsingException.class); 22 | } 23 | 24 | @Test 25 | public void testWrongMapping() { 26 | var request1 = 27 | new IndexRequest("my_index2") 28 | .source("{\"updated\": \"2020-09-12T21:12:00\"}", XContentType.JSON); 29 | var response1 = client().index(request1).actionGet(); 30 | Assertions.assertThat(response1.status()).isEqualTo(RestStatus.CREATED); 31 | 32 | var request2 = 33 | new IndexRequest("my_index2") 34 | .source( 35 | "{\"updated\": {\"date\": \"2020-09-12\", \"time\": \"21:12:00\"}}", 36 | XContentType.JSON); 37 | /* 38 | * failed to parse field [updated] of type [date] in document with id 39 | * 'vZLKg3QBwzbK8KxrfutG'. Preview of field's value: 40 | * '{date=2020-09-12, time=21:12:00}' 41 | */ 42 | Assertions.assertThatThrownBy(() -> client().index(request2).actionGet()) 43 | .isInstanceOf(MapperParsingException.class) 44 | .hasMessageStartingWith( 45 | "failed to parse field [updated] of type [date] in document with id") 46 | .hasMessageEndingWith("Preview of field's value: '{date=2020-09-12, time=21:12:00}'"); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /basics/src/test/java/io/mincong/elasticsearch/util/BlogJavaTimeTimeValueTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.time.Duration; 6 | import org.elasticsearch.common.settings.Settings; 7 | import org.elasticsearch.common.unit.TimeValue; 8 | import org.junit.Test; 9 | 10 | /** 11 | * @author Mincong Huang 12 | * @blog https://mincong.io/2020/10/25/java-time/ 13 | */ 14 | public class BlogJavaTimeTimeValueTest { 15 | 16 | @Test 17 | public void toDuration() { 18 | TimeValue timeValue = TimeValue.timeValueMinutes(5); 19 | Duration duration = Duration.ofMillis(timeValue.millis()); 20 | assertThat(duration).isEqualTo(Duration.ofMinutes(5)); 21 | } 22 | 23 | @Test 24 | public void toTimeValue() { 25 | Duration duration = Duration.ofMinutes(5); 26 | TimeValue timeValue = TimeValue.timeValueMillis(duration.toMillis()); 27 | assertThat(timeValue).isEqualTo(TimeValue.timeValueMinutes(5)); 28 | } 29 | 30 | @Test 31 | public void fromSettings() { 32 | Settings settings = Settings.builder().put("timeout", "5m").build(); 33 | TimeValue timeout = settings.getAsTime("timeout", TimeValue.ZERO); 34 | assertThat(timeout).isEqualTo(TimeValue.timeValueMinutes(5)); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /cheatsheets/README.md: -------------------------------------------------------------------------------- 1 | # Cheatsheets 2 | 3 | Elasticsearch-related cheatsheets to be used with [`cheat`](https://github.com/cheat/cheat). 4 | 5 | ## Install 6 | 7 | You need to install `cheat` and add this directory as cheatsheets. 8 | 9 | MacOS: 10 | 11 | ``` 12 | brew install cheat 13 | ``` 14 | 15 | Use `cheat` and let it guide you for the initialization: 16 | 17 | ``` 18 | $ cheat 19 | A config file was not found. Would you like to create one now? [Y/n]: Y 20 | Would you like to download the community cheatsheets? [Y/n]: Y 21 | Cloning into '/Users/mincong.huang/.config/cheat/cheatsheets/community'... 22 | remote: Enumerating objects: 823, done. 23 | remote: Counting objects: 100% (119/119), done. 24 | remote: Compressing objects: 100% (89/89), done. 25 | remote: Total 823 (delta 47), reused 71 (delta 30), pack-reused 704 26 | Receiving objects: 100% (823/823), 231.26 KiB | 19.27 MiB/s, done. 27 | Resolving deltas: 100% (310/310), done. 28 | Created config file: /Users/mincong.huang/.config/cheat/conf.yml 29 | Please read this file for advanced configuration information. 30 | ``` 31 | 32 | Add this path to your `cheat` configuration (`~/.config/cheat/conf.yml`): 33 | 34 | ```diff 35 | cheatpaths: 36 | - name: community # a name for the cheatpath 37 | path: ~/documents/cheat/community # the path's location on the filesystem 38 | tags: [ community ] # these tags will be applied to all sheets on the path 39 | readonly: true # if true, `cheat` will not create new cheatsheets here 40 | 41 | + - name: elasticsearch 42 | + path: /path/to/learning-elasticsearch/cheatsheets 43 | + tags: [ elasticsearch, es ] 44 | + readonly: true 45 | 46 | - name: personal 47 | path: ~/documents/cheat/personal # this is a separate directory and repository than above 48 | tags: [ personal ] 49 | readonly: false # new sheets may be written here 50 | ``` 51 | 52 | ## Usage 53 | 54 | ```sh 55 | # Show all the curl commands related to Elasticsearch 56 | cheat -t elasticsearch curl 57 | cheat -t es curl 58 | 59 | # Show curl commands related to module "snapshot" 60 | cheat -t elasticsearch:snapshot curl 61 | cheat -t es:snapshot curl 62 | ``` 63 | -------------------------------------------------------------------------------- /cheatsheets/curl: -------------------------------------------------------------------------------- 1 | --- 2 | syntax: shell 3 | tags: ["elasticsearch:snapshot", "es:snapshot"] 4 | --- 5 | # 6 | # Snapshot and restore APIs 7 | # https://www.elastic.co/guide/en/elasticsearch/reference/7.x/snapshot-restore-apis.html 8 | # 9 | 10 | ##### Snapshot repository management APIs ##### 11 | 12 | # List all snapshot repositories 13 | curl localhost:9200/_snapshot/ 14 | 15 | # Get snapshot repository 16 | curl localhost:9200/_snapshot/ 17 | 18 | # Create or update snapshot repository 19 | curl -X PUT localhost:9200/_snapshot/ 20 | 21 | # Verify snapshot repository 22 | curl -X POST localhost:9200/_snapshot//_verify 23 | 24 | # Delete snapshot repository 25 | curl -X DELETE localhost:9200/_snapshot/ 26 | 27 | # Clean up snapshot repository 28 | curl -X POST localhost:9200/_snapshot//_cleanup 29 | 30 | ##### Snapshot management APIs ##### 31 | 32 | # Create snapshot 33 | curl -X PUT localhost:9200/_snapshot// 34 | 35 | # Clone snapshot 36 | curl -X PUT localhost:9200/_snapshot///_clone/ 37 | 38 | # Get snapshot 39 | curl localhost:9200/_snapshot// 40 | 41 | # Get snapshot status 42 | curl localhost:9200/_snapshot///_status 43 | 44 | # Restore snapshot 45 | curl -X POST localhost:9200/_snapshot///_restore 46 | 47 | # Delete snapshot 48 | curl -X DELETE localhost:9200/_snapshot// 49 | -------------------------------------------------------------------------------- /cluster/README.md: -------------------------------------------------------------------------------- 1 | # Cluster 2 | 3 | ## Nodes Information 4 | 5 | 6 | 7 | ``` 8 | GET /_cat/nodes 9 | ``` 10 | 11 | ## Get Cluster Settings 12 | 13 | ### API 14 | 15 | 16 | 17 | ``` 18 | GET /_cluster/settings 19 | ``` 20 | 21 | ### Java Legacy Client 22 | 23 | ```java 24 | var clusterStateResponse = client.admin().cluster().prepareState().get(); 25 | var metadata = clusterStateResponse.getState().metadata(); 26 | 27 | metadata.transientSettings(); 28 | metadata.persistentSettings(); 29 | metadata.settings(); 30 | ``` 31 | 32 | ### Java High Level Rest Client 33 | 34 | 35 | 36 | ```java 37 | var request = new ClusterGetSettingsRequest(); 38 | var response = client.cluster().getSettings(request, RequestOptions.DEFAULT); 39 | 40 | response.getDefaultSettings(); 41 | response.getPersistentSettings(); 42 | response.getTransientSettings(); 43 | ``` 44 | 45 | ## Update Cluster Settings 46 | 47 | ### API 48 | 49 | 50 | 51 | ``` 52 | PUT /_cluster/settings 53 | ``` 54 | 55 | ## Articles 56 | 57 | - Alexander Reelsen, "Does the cluster state size impact performance?", _Elastic_, 2013.
58 | 59 | 60 | ## References 61 | 62 | - Elastic, "Install Elasticsearch with Docker", _Elastic_, 2020.
63 | 64 | - Elastic, "Nodes Info API", _Elastic_, 2020.
65 | 66 | - Elastic, "Cat Nodes API", _Elastic_, 2020.
67 | 68 | -------------------------------------------------------------------------------- /cluster/cluster-settings.md: -------------------------------------------------------------------------------- 1 | # Cluster Settings 2 | 3 | Settings can be persistent, meaning they apply across restarts, or transient, 4 | where they don’t survive a full cluster restart. 5 | 6 | ## Get Settings 7 | 8 | ### API 9 | 10 | 11 | 12 | ``` 13 | GET /_cluster/settings 14 | ``` 15 | 16 | ### Java Legacy Client 17 | 18 | ```java 19 | var clusterStateResponse = client.admin().cluster().prepareState().get(); 20 | var metadata = clusterStateResponse.getState().metadata(); 21 | 22 | metadata.transientSettings(); 23 | metadata.persistentSettings(); 24 | metadata.settings(); 25 | ``` 26 | 27 | ### Java High Level Rest Client 28 | 29 | 30 | 31 | ```java 32 | var request = new ClusterGetSettingsRequest(); 33 | var response = client.cluster().getSettings(request, RequestOptions.DEFAULT); 34 | 35 | response.getDefaultSettings(); 36 | response.getPersistentSettings(); 37 | response.getTransientSettings(); 38 | ``` 39 | 40 | ## Update Settings 41 | 42 | 43 | 44 | ``` 45 | PUT /_cluster/settings 46 | ``` 47 | 48 | ## Persistent Settings 49 | 50 | Persistent settings are stored on each master-eligible node in the global 51 | cluster state file, which can be found in the Elasticsearch data directory: 52 | `data/CLUSTER_NAME/nodes/N/_state`, where `CLUSTER_NAME` is the name of the 53 | cluster and `N` is the node number (0 if this is the only node on this 54 | machine). 55 | 56 | Besides persistent settings, this file may contain other global metadata such as 57 | index templates. By default the global cluster state file is stored in the 58 | binary SMILE format. For debugging purposes, if you want to see what's actually 59 | stored in this file, you can change the format of this file to JSON by adding 60 | the following line to the `elasticsearch.yml` file: 61 | 62 | format: json 63 | 64 | Every time cluster state changes, all master-eligible nodes store the new 65 | version of the file, so during cluster restart the node that starts first and 66 | elects itself as a master will have the newest version of the cluster state. 67 | 68 | ## References 69 | 70 | - imotov, "Where does ElasticSearch store persistent settings?", _Stack Overflow_, 2014. 71 | 72 | - Elastic, "Cluster update settings API", _Elastic_, 2020. 73 | 74 | -------------------------------------------------------------------------------- /cluster/cluster-state.md: -------------------------------------------------------------------------------- 1 | # Cluster State 2 | 3 | ## Get Cluster State 4 | 5 | ### API 6 | 7 | 8 | 9 | ``` 10 | GET /_cluster/state 11 | ``` 12 | ``` 13 | GET /_cluster/state/_all 14 | ``` 15 | 16 | ### Legacy Client 17 | 18 | ```java 19 | var response = client 20 | .admin() 21 | .cluster() 22 | .prepareState() 23 | // set options here ... 24 | .get(); 25 | ``` 26 | 27 | ## Components 28 | 29 | The components of cluster state: 30 | 31 | ``` 32 | blocks 33 | cluster_name 34 | compressed_size_in_bytes 35 | metadata 36 | cluster_uuid 37 | templates 38 | indices 39 | index-graveyard 40 | repositories 41 | nodes 42 | restore 43 | routing_nodes 44 | routing_table 45 | snapshot_deletions 46 | snapshots 47 | state_uuid 48 | version 49 | ``` 50 | 51 | Explanation: 52 | 53 | Name | Description 54 | :--- | :--- 55 | blocks | Description, retry-ability, permission (read/write) of the index and its metadata 56 | cluster\_name | Name of the cluster 57 | compressed\_size\_in\_bytes | Maybe compressed size of cluster state in bytes 58 | metadata | Metadata 59 | nodes | Nodes of the cluster: name, id, IP address, attributes 60 | restore | Restore operations on snapshots. 61 | routing\_nodes | Unassigned shards (snapshot restore), routing nodes 62 | routing\_table | Routing indices with shards info 63 | snapshot\_deletions | In-progress snapshot deletions 64 | snapshots | In-progress snapshot creations 65 | state\_uuid | UUID of the cluster state 66 | version | Version of the cluster state 67 | 68 | Inside metadata section: 69 | 70 | Name | Description 71 | :--- | :--- 72 | cluster\_uuid | UUID of the cluster state 73 | templates | Security index templates, machine-learning metadata/state/config, logstash index template etc 74 | indices | Index state, settings, mappings, alias, primary terms, allocations, etc 75 | index-graveyard | ? 76 | repositories | Snapshot repositories with type and settings 77 | 78 | ## Reduce Network I/O 79 | 80 | Clear all the options before performing the actual call using `clear()` to 81 | avoid heavy network I/O. Useful when the cluster state is important: 82 | 83 | ```java 84 | var clusterState = client 85 | .admin() 86 | .cluster() 87 | .prepareState() 88 | .clear() 89 | /* 90 | * Define your options explicitly here as: 91 | * 92 | * setXxx(...) 93 | */ 94 | .setCustoms(true) 95 | .get() 96 | .getState(); 97 | ``` 98 | -------------------------------------------------------------------------------- /cluster/cluster-stats.md: -------------------------------------------------------------------------------- 1 | # Cluster Stats 2 | 3 | ## Get Cluster Stats 4 | 5 | https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-stats.html 6 | 7 | The Cluster Stats API allows to retrieve statistics from a cluster wide perspective. The API returns 8 | basic index metrics (shard numbers, store size, memory usage) and information about the current 9 | nodes that form the cluster (number, roles, os, jvm versions, memory usage, cpu and installed 10 | plugins). 11 | 12 | See section "Response Body" of the link above the see the JSON structure and available metrics and information. 13 | 14 | ### API 15 | 16 | ``` 17 | GET /_cluster/stats 18 | ``` 19 | 20 | ``` 21 | GET /_cluster/stats/nodes/ 22 | ``` 23 | 24 | ## Java Legacy Client 25 | 26 | ```java 27 | var clusterStatsResponse = client.admin().cluster().prepareClusterStats().get(); 28 | 29 | // query value of `nodes.fs.available` 30 | var availableByteSize = clusterStatsResponse.getNodesStats().getFs().getAvailable(); 31 | ``` 32 | 33 | ## Java REST Client 34 | 35 | You cannot use Java High-Level REST Client to retrieve this information. You need to send a request 36 | using the Java Low-Level REST Client: 37 | 38 | ```java 39 | var request = new Request("GET", "/_nodes/_all/stats/fs"); 40 | var response = restClient.getLowLevelClient().performRequest(request); 41 | var body = EntityUtils.toString(response.getEntity()); 42 | /* 43 | * { 44 | * "_nodes": { ... }, 45 | * "cluster_name": "docker-cluster", 46 | * "nodes": { 47 | * "fs": { 48 | * "timestamp": 1596277078797, 49 | * "total": { 50 | * "total_in_bytes": 15679725568, 51 | * "free_in_bytes": 7031689216, 52 | * "available_in_bytes": 6215008256 53 | * }, 54 | * ... 55 | * } 56 | */ 57 | // Then, parse the response body (JSON) in your preferred way 58 | ``` 59 | 60 | ## References 61 | 62 | - Korhan Herguner, "How to high level rest client request request nodes stats URGENT!", _Elastic Discuss_, 2019.
63 | https://discuss.elastic.co/t/how-to-high-level-rest-client-request-request-nodes-stats-urgent/170324 64 | -------------------------------------------------------------------------------- /cluster/disk.md: -------------------------------------------------------------------------------- 1 | # Disk 2 | 3 | ## Disk Performance 4 | 5 | Using `fio` — Flexible I/O Tester (https://github.com/axboe/fio) is an efficient way to test the disk performance. 6 | You can find some samples here: 7 | 8 | * [Sample FIO Commands for Block Volume Performance Tests on Linux-based Instances](https://docs.oracle.com/en-us/iaas/Content/Block/References/samplefiocommandslinux.htm) 9 | * [How to use Fio (Flexible I/O Tester) to Measure Disk Performance in Linux](https://dotlayer.com/how-to-use-fio-to-measure-disk-performance-in-linux/) 10 | * [How to Measure Disk Performance using Fio in Linux](https://linoxide.com/measure-disk-performance-fio/) 11 | 12 | Cannot understand the options specified in the samples above? You can check the command line options and job file parameters here: . 13 | 14 | ## References 15 | 16 | - Stanislav Příhoda, "Elasticsearch Disk and Data Storage Optimizations with Benchmarks", _coralogix.com_, 2021.
17 | 18 | - Elasticsearch, "How to design your Elasticsearch data storage architecture for scale", _elastic.co_, 2021.
19 | 20 | -------------------------------------------------------------------------------- /cluster/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | es-demo-parent 7 | io.mincong 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-demo-cluster 13 | Elasticsearch Demos - Cluster 14 | 15 | 16 | 17 | org.elasticsearch.test 18 | framework 19 | test 20 | 21 | 22 | org.elasticsearch.client 23 | elasticsearch-rest-high-level-client 24 | test 25 | 26 | 27 | com.fasterxml.jackson.core 28 | jackson-databind 29 | test 30 | 31 | 32 | org.apache.logging.log4j 33 | log4j-core 34 | test 35 | 36 | 37 | org.assertj 38 | assertj-core 39 | test 40 | 41 | 42 | 43 | 44 | 45 | 46 | io.fabric8 47 | docker-maven-plugin 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /cluster/src/test/java/io/mincong/elasticsearch/ClusterNodesInfoIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.elasticsearch.cluster.node.DiscoveryNode; 5 | import org.elasticsearch.test.ESIntegTestCase; 6 | import org.elasticsearch.test.ESIntegTestCase.ClusterScope; 7 | import org.elasticsearch.test.ESIntegTestCase.Scope; 8 | import org.junit.Test; 9 | 10 | /** 11 | * Tests "Nodes Info API". 12 | * 13 | * @author Mincong Huang 14 | * @see Nodes 16 | * Info API | Elastic 17 | */ 18 | @ClusterScope(minNumDataNodes = 3, maxNumDataNodes = 3, scope = Scope.TEST) 19 | public class ClusterNodesInfoIT extends ESIntegTestCase { 20 | 21 | @Test 22 | public void getRequest() { 23 | var nodesInfoResponse = client().admin().cluster().prepareNodesInfo().all().get(); 24 | 25 | var dataNodeCount = 0; 26 | 27 | for (var nodeInfo : nodesInfoResponse.getNodes()) { 28 | if (DiscoveryNode.isDataNode(nodeInfo.getSettings())) { 29 | dataNodeCount++; 30 | } 31 | } 32 | 33 | Assertions.assertThat(dataNodeCount).isEqualTo(3); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /cluster/src/test/java/io/mincong/elasticsearch/ClusterSettingsLegacyClientTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.elasticsearch.test.ESSingleNodeTestCase; 5 | import org.junit.Test; 6 | 7 | /** 8 | * Cluster get settings API 9 | * 10 | * @see ClusterSettingsRestClientIT 11 | */ 12 | public class ClusterSettingsLegacyClientTest extends ESSingleNodeTestCase { 13 | 14 | @Test 15 | public void getSettings() { 16 | var clusterStateResponse = client().admin().cluster().prepareState().get(); 17 | var metaData = clusterStateResponse.getState().metadata(); 18 | 19 | Assertions.assertThat(metaData.transientSettings().isEmpty()).isTrue(); 20 | Assertions.assertThat(metaData.persistentSettings().isEmpty()).isTrue(); 21 | Assertions.assertThat(metaData.settings().isEmpty()).isTrue(); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /cluster/src/test/java/io/mincong/elasticsearch/ClusterSettingsRestClientIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.io.IOException; 4 | import org.apache.http.HttpHost; 5 | import org.assertj.core.api.Assertions; 6 | import org.elasticsearch.action.admin.cluster.settings.ClusterGetSettingsRequest; 7 | import org.elasticsearch.client.RequestOptions; 8 | import org.elasticsearch.client.RestClient; 9 | import org.elasticsearch.client.RestHighLevelClient; 10 | import org.elasticsearch.client.indices.CreateIndexRequest; 11 | import org.elasticsearch.test.rest.ESRestTestCase; 12 | import org.junit.*; 13 | 14 | /** 15 | * Cluster get settings API 16 | * 17 | * @see ClusterSettingsLegacyClientTest 18 | * @see 20 | * Cluster get settings API 21 | */ 22 | public class ClusterSettingsRestClientIT extends ESRestTestCase { 23 | 24 | @BeforeClass 25 | public static void setUpBeforeClass() { 26 | System.setProperty("tests.rest.cluster", "localhost:19200"); 27 | } 28 | 29 | @AfterClass 30 | public static void tearDownAfterClass() { 31 | System.clearProperty("tests.rest.cluster"); 32 | } 33 | 34 | private RestHighLevelClient restClient; 35 | 36 | @Before 37 | @Override 38 | public void setUp() throws Exception { 39 | super.setUp(); 40 | 41 | var builder = RestClient.builder(new HttpHost("localhost", 19200, "http")); 42 | restClient = new RestHighLevelClient(builder); 43 | 44 | var createRequest = new CreateIndexRequest("my_index"); 45 | var response = restClient.indices().create(createRequest, RequestOptions.DEFAULT); 46 | Assertions.assertThat(response.isAcknowledged()).isTrue(); 47 | } 48 | 49 | @After 50 | public void tearDown() throws Exception { 51 | restClient.close(); 52 | super.tearDown(); 53 | } 54 | 55 | @Test 56 | public void getSettings() throws IOException { 57 | var request = new ClusterGetSettingsRequest(); 58 | 59 | // optional flags 60 | request.includeDefaults(true); 61 | request.local(true); 62 | 63 | var response = restClient.cluster().getSettings(request, RequestOptions.DEFAULT); 64 | 65 | // default settings 66 | Assertions.assertThat(response.getDefaultSettings().isEmpty()).isFalse(); 67 | 68 | // persistent settings 69 | Assertions.assertThat(response.getPersistentSettings().isEmpty()).isTrue(); 70 | 71 | // transient settings 72 | Assertions.assertThat(response.getTransientSettings().isEmpty()).isTrue(); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /cluster/src/test/java/io/mincong/elasticsearch/ClusterStateTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.elasticsearch.cluster.block.ClusterBlocks; 5 | import org.elasticsearch.test.ESSingleNodeTestCase; 6 | import org.junit.Test; 7 | 8 | public class ClusterStateTest extends ESSingleNodeTestCase { 9 | 10 | @Test 11 | public void getMinimumClusterState() { 12 | var clusterState = 13 | client() // 14 | .admin() 15 | .cluster() 16 | .prepareState() 17 | .clear() 18 | /* 19 | * Define your options explicitly here as: 20 | * 21 | * setXxx(...) 22 | */ 23 | .setCustoms(true) 24 | .get() 25 | .getState(); 26 | 27 | Assertions.assertThat(clusterState.blocks()).isEqualTo(ClusterBlocks.EMPTY_CLUSTER_BLOCK); 28 | Assertions.assertThat(clusterState.metadata().indices()).isEmpty(); 29 | Assertions.assertThat(clusterState.routingTable().allShards()).isEmpty(); 30 | Assertions.assertThat(clusterState.nodes()).isEmpty(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /cluster/src/test/java/io/mincong/elasticsearch/ClusterStatsLegacyClientTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.elasticsearch.test.ESSingleNodeTestCase; 5 | import org.junit.Test; 6 | 7 | public class ClusterStatsLegacyClientTest extends ESSingleNodeTestCase { 8 | 9 | @Test 10 | public void getStats() { 11 | var client = client(); 12 | 13 | // demo:start 14 | var clusterStatsResponse = client.admin().cluster().prepareClusterStats().get(); 15 | // query value of `nodes.fs.available` 16 | var availableByteSize = clusterStatsResponse.getNodesStats().getFs().getAvailable(); 17 | // demo:end 18 | 19 | Assertions.assertThat(availableByteSize.getBytes()).isPositive(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /cluster/src/test/java/io/mincong/elasticsearch/ClusterStatsRestClientIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import com.fasterxml.jackson.databind.node.ObjectNode; 5 | import java.io.IOException; 6 | import org.apache.http.HttpHost; 7 | import org.apache.http.util.EntityUtils; 8 | import org.assertj.core.api.Assertions; 9 | import org.elasticsearch.client.Request; 10 | import org.elasticsearch.client.RestClient; 11 | import org.elasticsearch.client.RestHighLevelClient; 12 | import org.elasticsearch.test.rest.ESRestTestCase; 13 | import org.junit.*; 14 | 15 | public class ClusterStatsRestClientIT extends ESRestTestCase { 16 | 17 | @BeforeClass 18 | public static void setUpBeforeClass() { 19 | System.setProperty("tests.rest.cluster", "localhost:19200"); 20 | } 21 | 22 | @AfterClass 23 | public static void tearDownAfterClass() { 24 | System.clearProperty("tests.rest.cluster"); 25 | } 26 | 27 | private RestHighLevelClient restClient; 28 | 29 | @Before 30 | @Override 31 | public void setUp() throws Exception { 32 | super.setUp(); 33 | 34 | var builder = RestClient.builder(new HttpHost("localhost", 19200, "http")); 35 | restClient = new RestHighLevelClient(builder); 36 | } 37 | 38 | @After 39 | public void tearDown() throws Exception { 40 | restClient.close(); 41 | super.tearDown(); 42 | } 43 | 44 | @Test 45 | public void getFsStats() throws IOException { 46 | // demo:start 47 | var request = new Request("GET", "/_nodes/_all/stats/fs"); 48 | var response = restClient.getLowLevelClient().performRequest(request); 49 | var body = EntityUtils.toString(response.getEntity()); 50 | /* 51 | * { 52 | * "_nodes": { ... }, 53 | * "cluster_name": "docker-cluster", 54 | * "nodes": { 55 | * ... 56 | * "fs": { 57 | * "timestamp": 1596277078797, 58 | * "total": { 59 | * "total_in_bytes": 15679725568, 60 | * "free_in_bytes": 7031689216, 61 | * "available_in_bytes": 6215008256 62 | * }, 63 | * ... 64 | * } 65 | */ 66 | System.out.println(body); 67 | // demo:end 68 | 69 | var node = new ObjectMapper().readValue(body, ObjectNode.class); 70 | var firstNodeMetrics = node.get("nodes").fields().next().getValue(); 71 | var bytes = firstNodeMetrics.get("fs").get("total").get("available_in_bytes").asLong(); 72 | 73 | Assertions.assertThat(bytes).isPositive(); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /cluster/src/test/resources/docker-compose.yml: -------------------------------------------------------------------------------- 1 | # 2 | # Install Elasticsearch With Docker 3 | # https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html 4 | # 5 | version: '2.2' 6 | services: 7 | es01: 8 | image: docker.elastic.co/elasticsearch/elasticsearch:7.12.0 # CURRENT_ES_VERSION 9 | container_name: es01 10 | environment: 11 | - node.name=es01 12 | - cluster.name=es-docker-cluster 13 | - discovery.seed_hosts=es02,es03 14 | - cluster.initial_master_nodes=es01,es02,es03 15 | - bootstrap.memory_lock=true 16 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 17 | ulimits: 18 | memlock: 19 | soft: -1 20 | hard: -1 21 | volumes: 22 | - data01:/usr/share/elasticsearch/data 23 | ports: 24 | - 9200:9200 25 | networks: 26 | - elastic 27 | es02: 28 | image: docker.elastic.co/elasticsearch/elasticsearch:7.12.0 # CURRENT_ES_VERSION 29 | container_name: es02 30 | environment: 31 | - node.name=es02 32 | - cluster.name=es-docker-cluster 33 | - discovery.seed_hosts=es01,es03 34 | - cluster.initial_master_nodes=es01,es02,es03 35 | - bootstrap.memory_lock=true 36 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 37 | ulimits: 38 | memlock: 39 | soft: -1 40 | hard: -1 41 | volumes: 42 | - data02:/usr/share/elasticsearch/data 43 | networks: 44 | - elastic 45 | es03: 46 | image: docker.elastic.co/elasticsearch/elasticsearch:7.12.0 # CURRENT_ES_VERSION 47 | container_name: es03 48 | environment: 49 | - node.name=es03 50 | - cluster.name=es-docker-cluster 51 | - discovery.seed_hosts=es01,es02 52 | - cluster.initial_master_nodes=es01,es02,es03 53 | - bootstrap.memory_lock=true 54 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 55 | ulimits: 56 | memlock: 57 | soft: -1 58 | hard: -1 59 | volumes: 60 | - data03:/usr/share/elasticsearch/data 61 | networks: 62 | - elastic 63 | 64 | volumes: 65 | data01: 66 | driver: local 67 | data02: 68 | driver: local 69 | data03: 70 | driver: local 71 | 72 | networks: 73 | elastic: 74 | driver: bridge 75 | -------------------------------------------------------------------------------- /demo-dvf/README.md: -------------------------------------------------------------------------------- 1 | # DVF 2 | 3 | ## Articles 4 | 5 | 1. [Indexing New Documents](https://mincong.io/2020/12/16/dvf-indexing/) -- Indexing new documents into Elasticsearch using French government's open data "Demande de valeurs foncières (DVF)". 6 | 2. [Indexing Optimization](https://mincong.io/2020/12/17/dvf-indexing-optimization/) -- Optimize the indexing process using bulk index requests and multi-threading. 7 | 3. [Storage Optimization](https://mincong.io/2020/12/25/dvf-storage-optimization/) -- How to optmize storage of a given index by 40% using force-merge. 8 | 4. [Snapshot And Restore](https://mincong.io/2021/01/10/dvf-snapshot-and-restore/) -- How to create a snapshot for index "transactions" of DVF and restore it to an Elasticsearch cluster. 9 | 5. [Aggregations](https://mincong.io/2021/04/12/dvf-aggregations/) -- How to write and execute metric and bucket aggregations in Elasticsearch for data analytics. Also, how to execute aggregations that contain sub-aggregations. 10 | 6. [Real Estate Analysis For Île-de-France in 2020](https://mincong.io/2021/04/16/dvf-real-estate-analysis-idf-2020/) -- This article studies the real estate market of Île-de-France in 2020 by exploring and visualizing the dataset DVF using Kibana. We will discuss the global landscape, the impact of COVID-19, the situation in different departments, and more. 11 | 12 | ## Frontend 13 | 14 | ### elasticsearch-head 15 | 16 | Use . 17 | 18 | ### Kibana 19 | 20 | Run commands: 21 | 22 | ``` 23 | ./start-elasticsearch.sh 24 | ./start-kibana.sh 25 | ``` 26 | 27 | Then visit Kibana: http://localhost:5601 28 | -------------------------------------------------------------------------------- /demo-dvf/export-kibana-dashboard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Description: 4 | # 5 | # Export Kibana dashboard via the experimental Kibana Export Dashboard API. 6 | # 7 | # This script assumes that the Kibana instance is running on localhost:5601 8 | # 9 | # Usage: 10 | # 11 | # export-kibana-dashboard.sh 12 | # 13 | # Example: 14 | # 15 | # export-kibana-dashboard.sh 49e18890-9cf5-11eb-b207-efcdf249253b 16 | # 17 | dashboard_id="$1" 18 | 19 | if [[ -z $dashboard_id ]] 20 | then 21 | echo "Cannot export dashboard because the dashboard ID is missing. Usage:" 22 | echo 23 | echo " export-kibana-dashboard.sh " 24 | echo 25 | exit 1 26 | fi 27 | 28 | # How can I get the source directory of a Bash script from within the script itself? 29 | # https://stackoverflow.com/questions/59895/ 30 | current_dir="$(cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)" 31 | backup_path="${current_dir}/.kibana/dashboard.${dashboard_id}.current.json" 32 | backup_rotate_path="${current_dir}/.kibana/dashboard.${dashboard_id}.$(date +'%s').json" 33 | 34 | if [[ -f $backup_path ]] 35 | then 36 | echo "The target file already exists (path: ${backup_path})" 37 | echo "Ratating it to ${backup_rotate_path}" 38 | mv "$backup_path" "$backup_rotate_path" 39 | fi 40 | 41 | response=$(curl "localhost:5601/api/kibana/dashboards/export?dashboard=${dashboard_id}") 42 | 43 | if [[ $? != "0" ]] 44 | then 45 | echo "Export failed with response:" 46 | echo -e "$response" | jq 47 | exit 1 48 | fi 49 | 50 | echo -e "$response" | jq > "$backup_path" 51 | 52 | echo "Done. Dashboard ${dashboard_id} expored to:" 53 | echo "${backup_path}" 54 | -------------------------------------------------------------------------------- /demo-dvf/import-kibana-dashboard.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Description: 4 | # 5 | # Import Kibana dashboard via the experimental Kibana Import Dashboard API. 6 | # 7 | # This script assumes that the Kibana instance is running on localhost:5601 8 | # 9 | # Usage: 10 | # 11 | # import-kibana-dashboard.sh 12 | # 13 | # Example: 14 | # 15 | # import-kibana-dashboard.sh 49e18890-9cf5-11eb-b207-efcdf249253b 16 | # 17 | dashboard_id="$1" 18 | 19 | if [[ -z $dashboard_id ]] 20 | then 21 | echo "Cannot import dashboard because the dashboard ID is missing. Usage:" 22 | echo 23 | echo " import-kibana-dashboard.sh " 24 | echo 25 | exit 1 26 | fi 27 | 28 | # How can I get the source directory of a Bash script from within the script itself? 29 | # https://stackoverflow.com/questions/59895/ 30 | current_dir="$(cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)" 31 | backup_path="${current_dir}/.kibana/dashboard.${dashboard_id}.current.json" 32 | 33 | data=$(cat "$backup_path") 34 | response=$(curl -X POST "localhost:5601/api/kibana/dashboards/import" \ 35 | -H "Content-Type: application/json" \ 36 | -H "kbn-xsrf: reporting" \ 37 | -d "$data") 38 | 39 | if [[ $? != "0" ]] 40 | then 41 | echo "Import failed with response:" 42 | echo -e "$response" | jq 43 | exit 1 44 | fi 45 | 46 | echo -e "$response" | jq 47 | 48 | echo "Done. Dashboard ${dashboard_id} imported to Kibana." 49 | -------------------------------------------------------------------------------- /demo-dvf/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | es-demo-parent 7 | io.mincong 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-demo-dvf 13 | Elasticsearch Demos - Project DVF 14 | 15 | 16 | 17 | org.immutables 18 | value 19 | 20 | 21 | com.fasterxml.jackson.core 22 | jackson-databind 23 | 24 | 25 | com.fasterxml.jackson.dataformat 26 | jackson-dataformat-csv 27 | 28 | 29 | com.fasterxml.jackson.module 30 | jackson-module-parameter-names 31 | 32 | 33 | com.fasterxml.jackson.datatype 34 | jackson-datatype-jdk8 35 | 36 | 37 | com.fasterxml.jackson.datatype 38 | jackson-datatype-jsr310 39 | 40 | 41 | org.elasticsearch.client 42 | elasticsearch-rest-high-level-client 43 | 44 | 45 | 46 | org.assertj 47 | assertj-core 48 | test 49 | 50 | 51 | org.apache.logging.log4j 52 | log4j-core 53 | 54 | 55 | org.elasticsearch.test 56 | framework 57 | test 58 | 59 | 60 | 61 | 62 | 63 | 64 | io.fabric8 65 | docker-maven-plugin 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /demo-dvf/src/main/java/io/mincong/dvf/demo/ReadPathAggregationDemo.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.demo; 2 | 3 | import io.mincong.dvf.service.TransactionEsAggregator; 4 | import java.io.IOException; 5 | import java.util.Map; 6 | import java.util.stream.Collectors; 7 | import org.apache.http.HttpHost; 8 | import org.apache.logging.log4j.LogManager; 9 | import org.apache.logging.log4j.Logger; 10 | import org.elasticsearch.client.RestClient; 11 | import org.elasticsearch.client.RestHighLevelClient; 12 | 13 | public class ReadPathAggregationDemo { 14 | 15 | private static final Logger logger = LogManager.getLogger(ReadPathAggregationDemo.class); 16 | private static final int YEAR = 2020; 17 | 18 | public void run() { 19 | var builder = RestClient.builder(new HttpHost("localhost", 9200, "http")); 20 | logger.info("Start creating REST high-level client..."); 21 | try (var restClient = new RestHighLevelClient(builder)) { 22 | var aggregator = new TransactionEsAggregator(restClient, YEAR); 23 | 24 | runMetricAggregations(aggregator); 25 | runBucketAggregations(aggregator); 26 | runMetricScriptingStatsAggregations(aggregator); 27 | runMetricScriptingPercentilesAggregations(aggregator); 28 | 29 | runParisOverviewAnalysis(aggregator); 30 | runParisDistrictAnalysis(aggregator); 31 | runParisLotAnalysis(aggregator); 32 | } catch (IOException e) { 33 | logger.error("Failed to execute DVF program", e); 34 | } 35 | } 36 | 37 | public void runMetricAggregations(TransactionEsAggregator aggregator) { 38 | var count = aggregator.mutationIdsCount().getValue(); 39 | logger.info("== Requesting single metric aggregation:"); 40 | logger.info("Number of mutations: {}", String.format("%,d", count)); 41 | } 42 | 43 | private void runBucketAggregations(TransactionEsAggregator aggregator) { 44 | logger.info("== Requesting bucket aggregation:"); 45 | logger.info( 46 | "Transactions activity per postal code:\n{}", 47 | aggregator.mutationsByPostalCode().entrySet().stream() 48 | .sorted(Map.Entry.comparingByKey()) 49 | .map(entry -> " " + entry.getKey() + ": " + entry.getValue()) 50 | .collect(Collectors.joining("\n"))); 51 | } 52 | 53 | public void runMetricScriptingStatsAggregations(TransactionEsAggregator aggregator) { 54 | var stats = aggregator.priceM2Stats(); 55 | logger.info("== Requesting analytics for price/m2 - Overview:"); 56 | logger.info( 57 | "Property values are between {} and {} (avg: {})", 58 | String.format("%,.1f€/m2", stats.getMin()), 59 | String.format("%,.1f€/m2", stats.getMax()), 60 | String.format("%,.1f€/m2", stats.getAvg())); 61 | logger.info("There were {} mutations", String.format("%,d", stats.getCount())); 62 | } 63 | 64 | public void runMetricScriptingPercentilesAggregations(TransactionEsAggregator aggregator) { 65 | var stats = aggregator.priceM2Percentiles(); 66 | logger.info("== Requesting analytics for price/m2 - Percentiles:"); 67 | logger.info("p5: {}", String.format("%,.0f€/m2", stats.percentile(5.0))); 68 | logger.info("p25: {}", String.format("%,.0f€/m2", stats.percentile(25.0))); 69 | logger.info("p50: {}", String.format("%,.0f€/m2", stats.percentile(50.0))); 70 | logger.info("p75: {}", String.format("%,.0f€/m2", stats.percentile(75.0))); 71 | logger.info("p95: {}", String.format("%,.0f€/m2", stats.percentile(95.0))); 72 | } 73 | 74 | public void runParisOverviewAnalysis(TransactionEsAggregator aggregator) { 75 | var overviewStats = aggregator.parisStatsOverview(); 76 | logger.info("== Requesting analytics for Paris - Overview:"); 77 | logger.info( 78 | "min: {}, avg: {}, max: {}, count: {}, sum: {}", 79 | String.format("%,.0f€", overviewStats.getMin()), 80 | String.format("%,.0f€", overviewStats.getAvg()), 81 | String.format("%,.0f€", overviewStats.getMax()), 82 | String.format("%,d", overviewStats.getCount()), 83 | String.format("%,.0f€", overviewStats.getSum())); 84 | } 85 | 86 | public void runParisDistrictAnalysis(TransactionEsAggregator aggregator) { 87 | logger.info("== Requesting analytics for Paris - Per Postal Code:"); 88 | var percentilesArray = aggregator.parisPricePercentilesPerPostalCode(); 89 | var totalPriceRows = 90 | percentilesArray.entrySet().stream() 91 | .map( 92 | entry -> { 93 | var postalCode = entry.getKey(); 94 | var percentiles = entry.getValue(); 95 | return String.format( 96 | "%s | %,.0f | %,.0f | %,.0f | %,.0f | %,.0f", 97 | postalCode, 98 | percentiles[0].percentile(5), 99 | percentiles[0].percentile(25), 100 | percentiles[0].percentile(50), 101 | percentiles[0].percentile(75), 102 | percentiles[0].percentile(95)); 103 | }) 104 | .collect(Collectors.joining("\n")); 105 | var totalPriceTable = 106 | "Postal Code | p5 (€) | p25 (€) | p50 (€) | p75 (€) | p95 (€)\n:---: | ---: | ---: | ---: | ---: | ---: |\n" 107 | + totalPriceRows; 108 | logger.info("Total Price Percentiles Per Postal Code in Paris\n{}", totalPriceTable); 109 | 110 | var m2PriceRows = 111 | percentilesArray.entrySet().stream() 112 | .map( 113 | entry -> { 114 | var postalCode = entry.getKey(); 115 | var percentiles = entry.getValue(); 116 | return String.format( 117 | "%s | %,.0f | %,.0f | %,.0f | %,.0f | %,.0f", 118 | postalCode, 119 | percentiles[1].percentile(5), 120 | percentiles[1].percentile(25), 121 | percentiles[1].percentile(50), 122 | percentiles[1].percentile(75), 123 | percentiles[1].percentile(95)); 124 | }) 125 | .collect(Collectors.joining("\n")); 126 | var m2PriceTable = 127 | "Postal Code | p5 (€/m2) | p25 (€/m2) | p50 (€/m2) | p75 (€/m2) | p95 (€/m2)\n:---: | ---: | ---: | ---: | ---: | ---: |\n" 128 | + m2PriceRows; 129 | logger.info("Price/M2 Percentiles Per Postal Code in Paris\n{}", m2PriceTable); 130 | } 131 | 132 | public void runParisLotAnalysis(TransactionEsAggregator aggregator) { 133 | logger.info("== Requesting analytics for Paris - Per Lot Type:"); 134 | var percentilesArray = aggregator.parisPricePercentilesPerLotType(); 135 | var totalPriceRows = 136 | percentilesArray.entrySet().stream() 137 | .map( 138 | entry -> { 139 | var lotType = entry.getKey(); 140 | var percentiles = entry.getValue(); 141 | return String.format( 142 | "%s | %,.0f | %,.0f | %,.0f | %,.0f | %,.0f", 143 | lotType, 144 | percentiles[0].percentile(5), 145 | percentiles[0].percentile(25), 146 | percentiles[0].percentile(50), 147 | percentiles[0].percentile(75), 148 | percentiles[0].percentile(95)); 149 | }) 150 | .collect(Collectors.joining("\n")); 151 | var totalPriceTable = 152 | "Lot Type | p5 (€) | p25 (€) | p50 (€) | p75 (€) | p95 (€)\n:---: | ---: | ---: | ---: | ---: | ---: |\n" 153 | + totalPriceRows; 154 | logger.info("Total Price Percentiles Per Lot Type in Paris\n{}", totalPriceTable); 155 | 156 | var m2PriceRows = 157 | percentilesArray.entrySet().stream() 158 | .map( 159 | entry -> { 160 | var lotType = entry.getKey(); 161 | var percentiles = entry.getValue(); 162 | return String.format( 163 | "%s | %,.0f | %,.0f | %,.0f | %,.0f | %,.0f", 164 | lotType, 165 | percentiles[1].percentile(5), 166 | percentiles[1].percentile(25), 167 | percentiles[1].percentile(50), 168 | percentiles[1].percentile(75), 169 | percentiles[1].percentile(95)); 170 | }) 171 | .collect(Collectors.joining("\n")); 172 | var m2PriceTable = 173 | "Lot Type | p5 (€/m2) | p25 (€/m2) | p50 (€/m2) | p75 (€/m2) | p95 (€/m2)\n:---: | ---: | ---: | ---: | ---: | ---: |\n" 174 | + m2PriceRows; 175 | logger.info("Price/M2 Percentiles Per Postal Code in Paris\n{}", m2PriceTable); 176 | } 177 | 178 | public static void main(String[] args) { 179 | new ReadPathAggregationDemo().run(); 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /demo-dvf/src/main/java/io/mincong/dvf/model/Location.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.model; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 5 | import com.fasterxml.jackson.databind.annotation.JsonSerialize; 6 | import org.immutables.value.Value.Immutable; 7 | 8 | /** 9 | * @see Geo-point 11 | * field type 12 | */ 13 | @Immutable 14 | @JsonSerialize(as = ImmutableLocation.class) 15 | @JsonDeserialize(as = ImmutableLocation.class) 16 | public interface Location { 17 | 18 | static Location of(double longitude, double latitude) { 19 | return ImmutableLocation.builder().longitude(longitude).latitude(latitude).build(); 20 | } 21 | 22 | @JsonProperty("lon") // Name "lon" is required by Elasticsearch 23 | double longitude(); 24 | 25 | @JsonProperty("lat") // Name "lat" is required by Elasticsearch 26 | double latitude(); 27 | } 28 | -------------------------------------------------------------------------------- /demo-dvf/src/main/java/io/mincong/dvf/model/Transaction.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.model; 2 | 3 | import com.fasterxml.jackson.annotation.JsonIgnore; 4 | import com.fasterxml.jackson.annotation.JsonProperty; 5 | import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 6 | import com.fasterxml.jackson.databind.annotation.JsonSerialize; 7 | import java.time.LocalDate; 8 | import java.util.HashMap; 9 | import java.util.Map; 10 | import java.util.Optional; 11 | import org.immutables.value.Value; 12 | 13 | @Value.Immutable 14 | @JsonSerialize(as = ImmutableTransaction.class) 15 | @JsonDeserialize(as = ImmutableTransaction.class) 16 | public abstract class Transaction { 17 | 18 | private static final String INDEX_NAME = "transactions"; 19 | 20 | public static final String FIELD_MUTATION_ID = "mutation_id"; 21 | 22 | public static final String FIELD_PROPERTY_VALUE = "property_value"; 23 | 24 | public static final String FIELD_POSTAL_CODE = "postal_code"; 25 | 26 | public static final String FIELD_MUTATION_NATURE = "mutation_nature"; 27 | 28 | public static final String FIELD_LOCAL_TYPE = "local_type"; 29 | 30 | public static final String FIELD_REAL_BUILT_UP_AREA = "real_built_up_area"; 31 | 32 | /** See https://www.elastic.co/guide/en/elasticsearch/reference/7.9/mapping-types.html */ 33 | private static final Map mappings = new HashMap<>(); 34 | 35 | public static Map esMappings() { 36 | return Map.of("properties", Map.copyOf(mappings)); 37 | } 38 | 39 | @JsonProperty("mutation_id") 40 | public abstract String mutationId(); 41 | 42 | static { 43 | mappings.put("mutation_id", Map.of("type", "keyword")); 44 | } 45 | 46 | @JsonProperty("mutation_date") 47 | public abstract LocalDate mutationDate(); 48 | 49 | static { 50 | mappings.put("mutation_date", Map.of("type", "date")); 51 | } 52 | 53 | @JsonProperty("disposition_number") 54 | public abstract String dispositionNumber(); 55 | 56 | @JsonProperty("mutation_nature") 57 | public abstract String mutationNature(); 58 | 59 | @JsonProperty("property_value") 60 | public abstract double propertyValue(); 61 | 62 | @JsonProperty("price_m2") 63 | @Value.Default 64 | public Optional priceM2() { 65 | return realBuiltUpArea().map(area -> propertyValue() / area); 66 | } 67 | 68 | @JsonProperty("address_number") 69 | public abstract String addressNumber(); 70 | 71 | @JsonProperty("address_suffix") 72 | public abstract String addressSuffix(); 73 | 74 | @JsonProperty("address_road_name") 75 | public abstract String addressRoadName(); 76 | 77 | @JsonProperty("address_road_code") 78 | public abstract String addressRoadCode(); 79 | 80 | static { 81 | mappings.put("address_road_code", Map.of("type", "keyword")); 82 | } 83 | 84 | @JsonProperty("postal_code") 85 | public abstract String postalCode(); 86 | 87 | static { 88 | mappings.put("postal_code", Map.of("type", "keyword")); 89 | } 90 | 91 | @JsonProperty("commune_code") 92 | public abstract String communeCode(); 93 | 94 | static { 95 | mappings.put("commune_code", Map.of("type", "keyword")); 96 | } 97 | 98 | @JsonProperty("commune_name") 99 | public abstract String communeName(); 100 | 101 | @JsonProperty("department_code") 102 | public abstract String departmentCode(); 103 | 104 | static { 105 | mappings.put("department_code", Map.of("type", "keyword")); 106 | } 107 | 108 | @JsonProperty("old_commune_code") 109 | public abstract String oldCommuneCode(); 110 | 111 | static { 112 | mappings.put("old_commune_code", Map.of("type", "keyword")); 113 | } 114 | 115 | @JsonProperty("old_commune_name") 116 | public abstract String oldCommuneName(); 117 | 118 | @JsonProperty("plot_id") 119 | public abstract String plotId(); 120 | 121 | static { 122 | mappings.put("plot_id", Map.of("type", "keyword")); 123 | } 124 | 125 | @JsonProperty("old_plot_id") 126 | public abstract String oldPlotId(); 127 | 128 | static { 129 | mappings.put("old_plot_id", Map.of("type", "keyword")); 130 | } 131 | 132 | @JsonProperty("volume_number") 133 | public abstract String volumeNumber(); 134 | 135 | @JsonProperty("number_lot1") 136 | public abstract String numberLot1(); 137 | 138 | @JsonProperty("surface_lot1") 139 | public abstract Optional surfaceSquareLot1(); 140 | 141 | static { 142 | mappings.put("surface_lot1", Map.of("type", "double")); 143 | } 144 | 145 | @JsonProperty("number_lot2") 146 | public abstract String numberLot2(); 147 | 148 | @JsonProperty("surface_lot2") 149 | public abstract Optional surfaceSquareLot2(); 150 | 151 | static { 152 | mappings.put("surface_log2", Map.of("type", "double")); 153 | } 154 | 155 | @JsonProperty("number_lot3") 156 | public abstract String numberLot3(); 157 | 158 | @JsonProperty("surface_lot3") 159 | public abstract Optional surfaceSquareLot3(); 160 | 161 | static { 162 | mappings.put("surface_lot3", Map.of("type", "double")); 163 | } 164 | 165 | @JsonProperty("number_lot4") 166 | public abstract String numberLot4(); 167 | 168 | @JsonProperty("surface_lot4") 169 | public abstract Optional surfaceSquareLot4(); 170 | 171 | static { 172 | mappings.put("surface_lot4", Map.of("type", "double")); 173 | } 174 | 175 | @JsonProperty("number_lot5") 176 | public abstract String numberLot5(); 177 | 178 | @JsonProperty("surface_lot5") 179 | public abstract Optional surfaceSquareLot5(); 180 | 181 | static { 182 | mappings.put("surface_lot5", Map.of("type", "double")); 183 | } 184 | 185 | @JsonProperty("lots_count") 186 | public abstract int lotsCount(); 187 | 188 | static { 189 | mappings.put("lots_count", Map.of("type", "integer")); 190 | } 191 | 192 | @JsonProperty("local_code_type") 193 | public abstract String localTypeCode(); 194 | 195 | static { 196 | mappings.put("local_code_type", Map.of("type", "keyword")); 197 | } 198 | 199 | @JsonProperty("local_type") 200 | public abstract String localType(); 201 | 202 | @JsonProperty("real_built_up_area") 203 | public abstract Optional realBuiltUpArea(); 204 | 205 | static { 206 | mappings.put("real_built_up_area", Map.of("type", "double")); 207 | } 208 | 209 | @JsonProperty("principle_pieces_count") 210 | public abstract Optional principlePiecesCount(); 211 | 212 | static { 213 | mappings.put("principle_pieces_count", Map.of("type", "integer")); 214 | } 215 | 216 | @JsonProperty("nature_culture_code") 217 | public abstract String natureCultureCode(); 218 | 219 | @JsonProperty("nature_culture") 220 | public abstract String natureCulture(); 221 | 222 | @JsonProperty("special_nature_culture_code") 223 | public abstract String specialNatureCultureCode(); 224 | 225 | static { 226 | mappings.put("special_nature_culture_code", Map.of("type", "keyword")); 227 | } 228 | 229 | @JsonProperty("special_nature_culture") 230 | public abstract String specialNatureCulture(); 231 | 232 | @JsonProperty("land_surface") 233 | public abstract double landSurface(); 234 | 235 | static { 236 | mappings.put("land_surface", Map.of("type", "double")); 237 | } 238 | 239 | @JsonProperty("location") 240 | public abstract Optional location(); 241 | 242 | static { 243 | mappings.put("location", Map.of("type", "geo_point")); 244 | } 245 | 246 | /* ----- JSON unrelated methods ----- */ 247 | 248 | @JsonIgnore 249 | public static String indexNameForYear(int year) { 250 | return INDEX_NAME + "." + year; 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /demo-dvf/src/main/java/io/mincong/dvf/model/TransactionRow.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.model; 2 | 3 | import com.fasterxml.jackson.annotation.JsonProperty; 4 | import com.fasterxml.jackson.annotation.JsonPropertyOrder; 5 | import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 6 | import com.fasterxml.jackson.databind.annotation.JsonSerialize; 7 | import java.time.LocalDate; 8 | import java.util.Optional; 9 | import org.immutables.value.Value.Immutable; 10 | 11 | @Immutable 12 | @JsonSerialize(as = ImmutableTransactionRow.class) 13 | @JsonDeserialize(as = ImmutableTransactionRow.class) 14 | @JsonPropertyOrder({ 15 | "id_mutation", 16 | "date_mutation", 17 | "numero_disposition", 18 | "nature_mutation", 19 | "valeur_fonciere", 20 | "adresse_numero", 21 | "adresse_suffixe", 22 | "adresse_nom_voie", 23 | "adresse_code_voie", 24 | "code_postal", 25 | "code_commune", 26 | "nom_commune", 27 | "code_departement", 28 | "ancien_code_commune", 29 | "ancien_nom_commune", 30 | "id_parcelle", 31 | "ancien_id_parcelle", 32 | "numero_volume", 33 | "lot1_numero", 34 | "lot1_surface_carrez", 35 | "lot2_numero", 36 | "lot2_surface_carrez", 37 | "lot3_numero", 38 | "lot3_surface_carrez", 39 | "lot4_numero", 40 | "lot4_surface_carrez", 41 | "lot5_numero", 42 | "lot5_surface_carrez", 43 | "nombre_lots", 44 | "code_type_local", 45 | "type_local", 46 | "surface_reelle_bati", 47 | "nombre_pieces_principales", 48 | "code_nature_culture", 49 | "nature_culture", 50 | "code_nature_culture_speciale", 51 | "nature_culture_speciale", 52 | "surface_terrain", 53 | "location", 54 | "longitude", 55 | "latitude" 56 | }) 57 | public interface TransactionRow { 58 | 59 | @JsonProperty("id_mutation") 60 | String mutationId(); 61 | 62 | @JsonProperty("date_mutation") 63 | LocalDate mutationDate(); 64 | 65 | @JsonProperty("numero_disposition") 66 | String dispositionNumber(); 67 | 68 | @JsonProperty("nature_mutation") 69 | String mutationNature(); 70 | 71 | @JsonProperty("valeur_fonciere") 72 | double propertyValue(); 73 | 74 | @JsonProperty("adresse_numero") 75 | String addressNumber(); 76 | 77 | @JsonProperty("adresse_suffixe") 78 | String addressSuffix(); 79 | 80 | @JsonProperty("adresse_nom_voie") 81 | String addressRoadName(); 82 | 83 | @JsonProperty("adresse_code_voie") 84 | String addressRoadCode(); 85 | 86 | @JsonProperty("code_postal") 87 | String postalCode(); 88 | 89 | @JsonProperty("code_commune") 90 | String communeCode(); 91 | 92 | @JsonProperty("nom_commune") 93 | String communeName(); 94 | 95 | @JsonProperty("code_departement") 96 | String departmentCode(); 97 | 98 | @JsonProperty("ancien_code_commune") 99 | String oldCommuneCode(); 100 | 101 | @JsonProperty("ancien_nom_commune") 102 | String oldCommuneName(); 103 | 104 | @JsonProperty("id_parcelle") 105 | String plotId(); 106 | 107 | @JsonProperty("ancien_id_parcelle") 108 | String oldPlotId(); 109 | 110 | @JsonProperty("numero_volume") 111 | String volumeNumber(); 112 | 113 | @JsonProperty("lot1_numero") 114 | String numberLot1(); 115 | 116 | @JsonProperty("lot1_surface_carrez") 117 | Optional surfaceSquareLot1(); 118 | 119 | @JsonProperty("lot2_numero") 120 | String numberLot2(); 121 | 122 | @JsonProperty("lot2_surface_carrez") 123 | Optional surfaceSquareLot2(); 124 | 125 | @JsonProperty("lot3_numero") 126 | String numberLot3(); 127 | 128 | @JsonProperty("lot3_surface_carrez") 129 | Optional surfaceSquareLot3(); 130 | 131 | @JsonProperty("lot4_numero") 132 | String numberLot4(); 133 | 134 | @JsonProperty("lot4_surface_carrez") 135 | Optional surfaceSquareLot4(); 136 | 137 | @JsonProperty("lot5_numero") 138 | String numberLot5(); 139 | 140 | @JsonProperty("lot5_surface_carrez") 141 | Optional surfaceSquareLot5(); 142 | 143 | @JsonProperty("nombre_lots") 144 | int lotsCount(); 145 | 146 | @JsonProperty("code_type_local") 147 | String localTypeCode(); 148 | 149 | @JsonProperty("type_local") 150 | String localType(); 151 | 152 | @JsonProperty("surface_reelle_bati") 153 | Optional realBuiltUpArea(); 154 | 155 | @JsonProperty("nombre_pieces_principales") 156 | Optional principlePiecesCount(); 157 | 158 | @JsonProperty("code_nature_culture") 159 | String natureCultureCode(); 160 | 161 | @JsonProperty("nature_culture") 162 | String natureCulture(); 163 | 164 | @JsonProperty("code_nature_culture_speciale") 165 | String specialNatureCultureCode(); 166 | 167 | @JsonProperty("nature_culture_speciale") 168 | String specialNatureCulture(); 169 | 170 | @JsonProperty("surface_terrain") 171 | double landSurface(); 172 | 173 | @JsonProperty("longitude") 174 | Optional longitude(); 175 | 176 | @JsonProperty("latitude") 177 | Optional latitude(); 178 | 179 | default ImmutableTransaction toTransactionObj() { 180 | final Optional optLocation; 181 | if (longitude().isPresent() && latitude().isPresent()) { 182 | optLocation = Optional.of(Location.of(longitude().get(), latitude().get())); 183 | } else { 184 | optLocation = Optional.empty(); 185 | } 186 | 187 | return ImmutableTransaction.builder() 188 | .mutationId(mutationId()) 189 | .mutationDate(mutationDate()) 190 | .dispositionNumber(dispositionNumber()) 191 | .mutationNature(mutationNature()) 192 | .propertyValue(propertyValue()) 193 | .addressNumber(addressNumber()) 194 | .addressSuffix(addressSuffix()) 195 | .addressRoadName(addressRoadName()) 196 | .addressRoadCode(addressRoadCode()) 197 | .postalCode(postalCode()) 198 | .communeCode(communeCode()) 199 | .communeName(communeName()) 200 | .departmentCode(departmentCode()) 201 | .oldCommuneCode(oldCommuneCode()) 202 | .oldCommuneName(oldCommuneName()) 203 | .plotId(plotId()) 204 | .oldPlotId(oldPlotId()) 205 | .volumeNumber(volumeNumber()) 206 | .numberLot1(numberLot1()) 207 | .surfaceSquareLot1(surfaceSquareLot1()) 208 | .numberLot2(numberLot2()) 209 | .surfaceSquareLot2(surfaceSquareLot2()) 210 | .numberLot3(numberLot3()) 211 | .surfaceSquareLot3(surfaceSquareLot3()) 212 | .numberLot4(numberLot4()) 213 | .surfaceSquareLot4(surfaceSquareLot4()) 214 | .numberLot5(numberLot5()) 215 | .surfaceSquareLot5(surfaceSquareLot5()) 216 | .lotsCount(lotsCount()) 217 | .localTypeCode(localTypeCode()) 218 | .localType(localType()) 219 | .realBuiltUpArea(realBuiltUpArea()) 220 | .principlePiecesCount(principlePiecesCount()) 221 | .natureCultureCode(natureCultureCode()) 222 | .natureCulture(natureCulture()) 223 | .specialNatureCultureCode(specialNatureCultureCode()) 224 | .specialNatureCulture(specialNatureCulture()) 225 | .landSurface(landSurface()) 226 | .location(optLocation) 227 | .build(); 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /demo-dvf/src/main/java/io/mincong/dvf/service/EsWriter.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import io.mincong.dvf.model.ImmutableTransaction; 4 | import java.util.List; 5 | import java.util.concurrent.CompletableFuture; 6 | import java.util.stream.Stream; 7 | 8 | public interface EsWriter { 9 | void createIndex(); 10 | 11 | default CompletableFuture write(ImmutableTransaction... items) { 12 | return write(Stream.of(List.of(items))); 13 | } 14 | 15 | CompletableFuture write(Stream> items); 16 | } 17 | -------------------------------------------------------------------------------- /demo-dvf/src/main/java/io/mincong/dvf/service/Jackson.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import com.fasterxml.jackson.databind.SerializationFeature; 5 | import com.fasterxml.jackson.dataformat.csv.CsvMapper; 6 | import com.fasterxml.jackson.datatype.jdk8.Jdk8Module; 7 | import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; 8 | import com.fasterxml.jackson.module.paramnames.ParameterNamesModule; 9 | 10 | public class Jackson { 11 | 12 | public static CsvMapper newCsvMapper() { 13 | var csvMapper = new CsvMapper(); 14 | csvMapper.registerModule(new ParameterNamesModule()); 15 | csvMapper.registerModule(new Jdk8Module()); 16 | csvMapper.registerModule(new JavaTimeModule()); 17 | csvMapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); 18 | return csvMapper; 19 | } 20 | 21 | public static ObjectMapper newObjectMapper() { 22 | var objectMapper = new ObjectMapper(); 23 | objectMapper.registerModule(new ParameterNamesModule()); 24 | objectMapper.registerModule(new Jdk8Module()); 25 | objectMapper.registerModule(new JavaTimeModule()); 26 | objectMapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); 27 | return objectMapper; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /demo-dvf/src/main/java/io/mincong/dvf/service/TransactionBulkEsWriter.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import io.mincong.dvf.model.ImmutableTransaction; 6 | import io.mincong.dvf.model.Transaction; 7 | import java.io.IOException; 8 | import java.util.List; 9 | import java.util.concurrent.CompletableFuture; 10 | import java.util.concurrent.Executor; 11 | import java.util.concurrent.atomic.AtomicInteger; 12 | import java.util.concurrent.atomic.AtomicLong; 13 | import java.util.stream.Collectors; 14 | import java.util.stream.Stream; 15 | import org.apache.logging.log4j.LogManager; 16 | import org.apache.logging.log4j.Logger; 17 | import org.elasticsearch.action.bulk.BulkRequest; 18 | import org.elasticsearch.action.index.IndexRequest; 19 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 20 | import org.elasticsearch.client.RequestOptions; 21 | import org.elasticsearch.client.RestHighLevelClient; 22 | import org.elasticsearch.client.indices.CreateIndexRequest; 23 | import org.elasticsearch.client.indices.CreateIndexResponse; 24 | import org.elasticsearch.common.xcontent.XContentType; 25 | 26 | public class TransactionBulkEsWriter implements EsWriter { 27 | 28 | private static final Logger logger = LogManager.getLogger(TransactionBulkEsWriter.class); 29 | 30 | private final RestHighLevelClient client; 31 | private final ObjectMapper objectMapper; 32 | private final AtomicInteger counter; 33 | private final RefreshPolicy refreshPolicy; 34 | private final Executor executor; 35 | private final String indexName; 36 | 37 | public TransactionBulkEsWriter( 38 | RestHighLevelClient client, 39 | String indexName, 40 | Executor executor, 41 | RefreshPolicy refreshPolicy) { 42 | this.client = client; 43 | this.indexName = indexName; 44 | this.objectMapper = Jackson.newObjectMapper(); 45 | this.counter = new AtomicInteger(0); 46 | this.executor = executor; 47 | this.refreshPolicy = refreshPolicy; 48 | } 49 | 50 | @Override 51 | public CompletableFuture write(Stream> transactions) { 52 | var resultCount = new AtomicLong(); 53 | var cfs = transactions.map(tx -> indexAsync(tx, resultCount)).collect(Collectors.toList()); 54 | return CompletableFuture.allOf(cfs.toArray(CompletableFuture[]::new)) 55 | .thenApply(ignored -> resultCount.get()); 56 | } 57 | 58 | @Override 59 | public void createIndex() { 60 | var request = new CreateIndexRequest(indexName).mapping(Transaction.esMappings()); 61 | CreateIndexResponse response; 62 | try { 63 | response = client.indices().create(request, RequestOptions.DEFAULT); 64 | } catch (IOException e) { 65 | throw new IllegalStateException("Failed to create index " + indexName, e); 66 | } 67 | if (!response.isAcknowledged()) { 68 | throw new IllegalStateException( 69 | "Failed to create index " + indexName + ": response was not acknowledged"); 70 | } 71 | logger.info("Creation of index {} is acknowledged", indexName); 72 | } 73 | 74 | private CompletableFuture indexAsync( 75 | List transactions, AtomicLong counter) { 76 | return CompletableFuture.supplyAsync(() -> index(transactions), executor) 77 | .thenApply( 78 | results -> { 79 | counter.addAndGet(results); 80 | return null; 81 | }); 82 | } 83 | 84 | private long index(List transactions) { 85 | logger.info( 86 | "Indexing transaction {}: {}", 87 | counter.getAndIncrement(), 88 | transactions.stream() 89 | .map(ImmutableTransaction::mutationId) 90 | .collect(Collectors.joining(","))); 91 | 92 | var bulkRequest = new BulkRequest().setRefreshPolicy(refreshPolicy); 93 | 94 | for (var transaction : transactions) { 95 | try { 96 | var json = objectMapper.writeValueAsString(transaction); 97 | bulkRequest.add(new IndexRequest(indexName).source(json, XContentType.JSON)); 98 | } catch (JsonProcessingException e) { 99 | // This should never happen 100 | throw new IllegalStateException("Failed to serialize transaction " + transaction, e); 101 | } 102 | } 103 | 104 | try { 105 | var response = client.bulk(bulkRequest, RequestOptions.DEFAULT); 106 | return response.getItems().length; 107 | } catch (IOException e) { 108 | var msg = 109 | String.format( 110 | "Failed to index %d transactions: %s", 111 | transactions.size(), 112 | transactions.stream() 113 | .map(ImmutableTransaction::mutationId) 114 | .collect(Collectors.joining(","))); 115 | logger.error(msg, e); 116 | throw new IllegalStateException(msg, e); 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /demo-dvf/src/main/java/io/mincong/dvf/service/TransactionCsvReader.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import static java.util.Spliterator.ORDERED; 4 | 5 | import com.fasterxml.jackson.databind.ObjectReader; 6 | import io.mincong.dvf.model.ImmutableTransaction; 7 | import io.mincong.dvf.model.ImmutableTransactionRow; 8 | import io.mincong.dvf.model.TransactionRow; 9 | import java.io.IOException; 10 | import java.nio.file.Path; 11 | import java.util.*; 12 | import java.util.stream.Collectors; 13 | import java.util.stream.Stream; 14 | import java.util.stream.StreamSupport; 15 | import org.apache.logging.log4j.LogManager; 16 | import org.apache.logging.log4j.Logger; 17 | 18 | public class TransactionCsvReader { 19 | 20 | private static final Logger logger = LogManager.getLogger(TransactionCsvReader.class); 21 | private final ObjectReader objectReader; 22 | private final int bulkSize; 23 | 24 | public TransactionCsvReader(int bulkSize) { 25 | var csvMapper = Jackson.newCsvMapper(); 26 | var csvSchema = csvMapper.schemaFor(TransactionRow.class).withHeader(); 27 | this.objectReader = csvMapper.readerFor(TransactionRow.class).with(csvSchema); 28 | this.bulkSize = bulkSize; 29 | } 30 | 31 | public Stream> readCsv(Path... paths) { 32 | return Stream.of(paths) 33 | .flatMap( 34 | path -> { 35 | Iterator iterator; 36 | try { 37 | logger.info("Reading file {}", path); 38 | iterator = objectReader.readValues(path.toFile()); 39 | } catch (IOException e) { 40 | throw new IllegalStateException("Failed to read file " + path, e); 41 | } 42 | var bulkIterator = new BulkIterator<>(iterator, bulkSize); 43 | return StreamSupport.stream( 44 | Spliterators.spliteratorUnknownSize(bulkIterator, ORDERED), false); 45 | }) 46 | .map( 47 | rows -> 48 | rows.stream().map(TransactionRow::toTransactionObj).collect(Collectors.toList())); 49 | } 50 | 51 | static class BulkIterator implements Iterator> { 52 | private final Iterator iterator; 53 | private final int bulkSize; 54 | 55 | public BulkIterator(Iterator iterator, int bulkSize) { 56 | this.iterator = iterator; 57 | this.bulkSize = bulkSize; 58 | } 59 | 60 | @Override 61 | public boolean hasNext() { 62 | return iterator.hasNext(); 63 | } 64 | 65 | @Override 66 | public List next() { 67 | if (!hasNext()) { 68 | throw new NoSuchElementException(); 69 | } 70 | var results = new ArrayList(bulkSize); 71 | while (hasNext() && results.size() < bulkSize) { 72 | results.add(iterator.next()); 73 | } 74 | return results; 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /demo-dvf/src/main/java/io/mincong/dvf/service/TransactionSimpleEsWriter.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import io.mincong.dvf.model.ImmutableTransaction; 6 | import io.mincong.dvf.model.Transaction; 7 | import java.io.IOException; 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | import java.util.concurrent.CompletableFuture; 11 | import java.util.concurrent.atomic.AtomicInteger; 12 | import java.util.stream.Collectors; 13 | import java.util.stream.Stream; 14 | import org.apache.logging.log4j.LogManager; 15 | import org.apache.logging.log4j.Logger; 16 | import org.elasticsearch.action.index.IndexRequest; 17 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 18 | import org.elasticsearch.client.RequestOptions; 19 | import org.elasticsearch.client.RestHighLevelClient; 20 | import org.elasticsearch.client.indices.CreateIndexRequest; 21 | import org.elasticsearch.client.indices.CreateIndexResponse; 22 | import org.elasticsearch.common.xcontent.XContentType; 23 | 24 | public class TransactionSimpleEsWriter implements EsWriter { 25 | 26 | private static final Logger logger = LogManager.getLogger(TransactionSimpleEsWriter.class); 27 | 28 | private final RestHighLevelClient client; 29 | private final ObjectMapper objectMapper; 30 | private final AtomicInteger counter; 31 | private final RefreshPolicy refreshPolicy; 32 | private final String indexName; 33 | 34 | public TransactionSimpleEsWriter( 35 | RestHighLevelClient client, String indexName, RefreshPolicy refreshPolicy) { 36 | this.client = client; 37 | this.indexName = indexName; 38 | this.objectMapper = Jackson.newObjectMapper(); 39 | this.counter = new AtomicInteger(0); 40 | this.refreshPolicy = refreshPolicy; 41 | } 42 | 43 | @Override 44 | public CompletableFuture write(Stream> transactions) { 45 | var cfs = transactions.flatMap(List::stream).map(this::index).collect(Collectors.toList()); 46 | return CompletableFuture.allOf(cfs.toArray(CompletableFuture[]::new)) 47 | .thenApply( 48 | ignored -> { 49 | List ids = new ArrayList<>(); 50 | for (var cf : cfs) { 51 | if (cf.isDone()) { 52 | ids.addAll(cf.join()); 53 | } 54 | } 55 | return (long) ids.size(); 56 | }); 57 | } 58 | 59 | @Override 60 | public void createIndex() { 61 | var request = new CreateIndexRequest(indexName).mapping(Transaction.esMappings()); 62 | CreateIndexResponse response; 63 | try { 64 | response = client.indices().create(request, RequestOptions.DEFAULT); 65 | } catch (IOException e) { 66 | throw new IllegalStateException("Failed to create index " + indexName, e); 67 | } 68 | if (!response.isAcknowledged()) { 69 | throw new IllegalStateException( 70 | "Failed to create index " + indexName + ": response was not acknowledged"); 71 | } 72 | logger.info("Creation of index {} is acknowledged", indexName); 73 | } 74 | 75 | private CompletableFuture> index(ImmutableTransaction transaction) { 76 | logger.info("Indexing transaction {}: {}", counter.getAndIncrement(), transaction.mutationId()); 77 | 78 | String json; 79 | try { 80 | json = objectMapper.writeValueAsString(transaction); 81 | } catch (JsonProcessingException e) { 82 | // This should never happen 83 | logger.error("Failed to serialize transaction " + transaction, e); 84 | return CompletableFuture.failedFuture(e); 85 | } 86 | var request = 87 | new IndexRequest(indexName).source(json, XContentType.JSON).setRefreshPolicy(refreshPolicy); 88 | try { 89 | var response = client.index(request, RequestOptions.DEFAULT); 90 | return CompletableFuture.completedFuture(List.of(response.getId())); 91 | } catch (IOException e) { 92 | logger.error("Transaction: FAILED", e); 93 | return CompletableFuture.failedFuture(e); 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /demo-dvf/src/main/resources/aggregations/request.paris.lot-type-stats-and-percentiles.json: -------------------------------------------------------------------------------- 1 | { 2 | "query": { 3 | "bool": { 4 | "must": [ 5 | { 6 | "wildcard": { 7 | "postal_code": { 8 | "value": "75*" 9 | } 10 | } 11 | } 12 | ], 13 | "filter": [ 14 | { 15 | "match": { 16 | "mutation_nature": { 17 | "query": "Vente" 18 | } 19 | } 20 | }, 21 | { 22 | "match": { 23 | "local_type": { 24 | "query": "Appartement" 25 | } 26 | } 27 | }, 28 | { 29 | "range": { 30 | "property_value": { 31 | "gt": 0 32 | } 33 | } 34 | }, 35 | { 36 | "range": { 37 | "real_built_up_area": { 38 | "gt": 0 39 | } 40 | } 41 | } 42 | ] 43 | } 44 | }, 45 | "runtime_mappings": { 46 | "price_m2": { 47 | "type": "double", 48 | "script": "emit(doc['property_value'].value / doc['real_built_up_area'].value)" 49 | }, 50 | "lot_type": { 51 | "type": "keyword", 52 | "script": "if (0 < doc['lots_count'].value && doc['lots_count'].value < 6) { emit('T' + doc['lots_count'].value) } else { emit('Others') }" 53 | } 54 | }, 55 | "size": 0, 56 | "aggs": { 57 | "lots-aggregation": { 58 | "terms": { 59 | "field": "lot_type" 60 | }, 61 | "aggs": { 62 | "price_m2/percentiles": { 63 | "percentiles": { 64 | "field": "price_m2" 65 | } 66 | }, 67 | "property_value/stats": { 68 | "stats": { 69 | "field": "property_value" 70 | } 71 | } 72 | } 73 | } 74 | } 75 | } -------------------------------------------------------------------------------- /demo-dvf/src/main/resources/aggregations/request.paris.price-stats-and-percentiles-per-postal-code.json: -------------------------------------------------------------------------------- 1 | { 2 | "query": { 3 | "bool": { 4 | "must": [ 5 | { 6 | "wildcard": { 7 | "postal_code": { 8 | "value": "75*" 9 | } 10 | } 11 | } 12 | ], 13 | "filter": [ 14 | { 15 | "match": { 16 | "mutation_nature": { 17 | "query": "Vente" 18 | } 19 | } 20 | }, 21 | { 22 | "match": { 23 | "local_type": { 24 | "query": "Appartement" 25 | } 26 | } 27 | }, 28 | { 29 | "range": { 30 | "property_value": { 31 | "gt": 0 32 | } 33 | } 34 | }, 35 | { 36 | "range": { 37 | "real_built_up_area": { 38 | "gt": 0 39 | } 40 | } 41 | } 42 | ] 43 | } 44 | }, 45 | "runtime_mappings": { 46 | "price_m2": { 47 | "type": "double", 48 | "script": "emit(doc['property_value'].value / doc['real_built_up_area'].value)" 49 | } 50 | }, 51 | "size": 0, 52 | "aggs": { 53 | "postal-code-aggregation": { 54 | "terms": { 55 | "field": "postal_code", 56 | "size": 20 57 | }, 58 | "aggs": { 59 | "price_m2/percentiles": { 60 | "percentiles": { 61 | "field": "price_m2" 62 | } 63 | }, 64 | "property_value/stats": { 65 | "stats": { 66 | "field": "property_value" 67 | } 68 | } 69 | } 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /demo-dvf/src/main/resources/aggregations/request.paris.price-stats-overview.json: -------------------------------------------------------------------------------- 1 | { 2 | "query": { 3 | "bool": { 4 | "must": [ 5 | { 6 | "wildcard": { 7 | "postal_code": { 8 | "value": "75*" 9 | } 10 | } 11 | } 12 | ], 13 | "filter": [ 14 | { 15 | "match": { 16 | "mutation_nature": { 17 | "query": "Vente" 18 | } 19 | } 20 | }, 21 | { 22 | "match": { 23 | "local_type": { 24 | "query": "Appartement" 25 | } 26 | } 27 | }, 28 | { 29 | "range": { 30 | "property_value": { 31 | "gt": 0 32 | } 33 | } 34 | }, 35 | { 36 | "range": { 37 | "real_built_up_area": { 38 | "gt": 0 39 | } 40 | } 41 | } 42 | ] 43 | } 44 | }, 45 | "runtime_mappings": { 46 | "price_m2": { 47 | "type": "double", 48 | "script": "emit(doc['property_value'].value / doc['real_built_up_area'].value)" 49 | } 50 | }, 51 | "size": 0, 52 | "aggs": { 53 | "property_value/stats": { 54 | "stats": { 55 | "field": "property_value" 56 | } 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /demo-dvf/src/main/resources/config/custom.elasticsearch.yml: -------------------------------------------------------------------------------- 1 | network.host: 0.0.0.0 2 | 3 | # Allow using elasticsearch-head (UI) 4 | # https://github.com/mobz/elasticsearch-head 5 | http.cors.enabled: true 6 | http.cors.allow-origin: "*" 7 | -------------------------------------------------------------------------------- /demo-dvf/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /demo-dvf/src/test/java/io/mincong/dvf/model/TestModels.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.model; 2 | 3 | import java.time.LocalDate; 4 | import java.util.Optional; 5 | 6 | public class TestModels { 7 | 8 | public static final ImmutableTransaction TRANSACTION_1 = 9 | ImmutableTransaction.builder() 10 | .mutationId("2020-1") 11 | .mutationDate(LocalDate.of(2020, 1, 7)) 12 | .dispositionNumber("000001") 13 | .mutationNature("Vente") 14 | .propertyValue(8000) 15 | .addressNumber("") 16 | .addressSuffix("") 17 | .addressRoadName("FORTUNAT") 18 | .addressRoadCode("B063") 19 | .postalCode("01250") 20 | .communeCode("01072") 21 | .communeName("Ceyzériat") 22 | .departmentCode("01") 23 | .oldCommuneCode("") 24 | .oldCommuneName("") 25 | .plotId("01072000AK0216") 26 | .oldPlotId("") 27 | .volumeNumber("") 28 | .numberLot1("") 29 | .surfaceSquareLot1(Optional.empty()) 30 | .numberLot2("") 31 | .surfaceSquareLot2(Optional.empty()) 32 | .numberLot3("") 33 | .surfaceSquareLot3(Optional.empty()) 34 | .numberLot4("") 35 | .surfaceSquareLot4(Optional.empty()) 36 | .numberLot5("") 37 | .surfaceSquareLot5(Optional.empty()) 38 | .lotsCount(0) 39 | .localTypeCode("") 40 | .localType("") 41 | .realBuiltUpArea(Optional.empty()) 42 | .principlePiecesCount(Optional.empty()) 43 | .natureCultureCode("T") 44 | .natureCulture("terres") 45 | .specialNatureCultureCode("") 46 | .specialNatureCulture("") 47 | .landSurface(1061) 48 | .location(Location.of(5.323522, 46.171899)) 49 | .build(); 50 | 51 | public static final ImmutableTransaction TRANSACTION_2 = 52 | ImmutableTransaction.builder() 53 | .mutationId("2020-2") 54 | .mutationDate(LocalDate.of(2020, 1, 7)) 55 | .dispositionNumber("000001") 56 | .mutationNature("Vente") 57 | .propertyValue(75000) 58 | .addressNumber("") 59 | .addressSuffix("") 60 | .addressRoadName("RUE DE LA CHARTREUSE") 61 | .addressRoadCode("0064") 62 | .postalCode("01960") 63 | .communeCode("01289") 64 | .communeName("Péronnas") 65 | .departmentCode("01") 66 | .oldCommuneCode("") 67 | .oldCommuneName("") 68 | .plotId("01289000AI0210") 69 | .oldPlotId("") 70 | .volumeNumber("") 71 | .numberLot1("") 72 | .surfaceSquareLot1(Optional.empty()) 73 | .numberLot2("") 74 | .surfaceSquareLot2(Optional.empty()) 75 | .numberLot3("") 76 | .surfaceSquareLot3(Optional.empty()) 77 | .numberLot4("") 78 | .surfaceSquareLot4(Optional.empty()) 79 | .numberLot5("") 80 | .surfaceSquareLot5(Optional.empty()) 81 | .lotsCount(0) 82 | .localTypeCode("") 83 | .localType("") 84 | .realBuiltUpArea(Optional.empty()) 85 | .principlePiecesCount(Optional.empty()) 86 | .natureCultureCode("AB") 87 | .natureCulture("terrains a bâtir") 88 | .specialNatureCultureCode("") 89 | .specialNatureCulture("") 90 | .landSurface(610) 91 | .location(Location.of(5.226197, 46.184538)) 92 | .build(); 93 | 94 | public static final ImmutableTransaction TRANSACTION_3 = 95 | ImmutableTransaction.builder() 96 | .mutationId("2020-3") 97 | .mutationDate(LocalDate.of(2020, 1, 14)) 98 | .dispositionNumber("000001") 99 | .mutationNature("Vente") 100 | .propertyValue(89000) 101 | .addressNumber("") 102 | .addressSuffix("") 103 | .addressRoadName("VACAGNOLE") 104 | .addressRoadCode("B112") 105 | .postalCode("01340") 106 | .communeCode("01024") 107 | .communeName("Attignat") 108 | .departmentCode("01") 109 | .oldCommuneCode("") 110 | .oldCommuneName("") 111 | .plotId("01024000AL0120") 112 | .oldPlotId("") 113 | .volumeNumber("") 114 | .numberLot1("") 115 | .surfaceSquareLot1(Optional.empty()) 116 | .numberLot2("") 117 | .surfaceSquareLot2(Optional.empty()) 118 | .numberLot3("") 119 | .surfaceSquareLot3(Optional.empty()) 120 | .numberLot4("") 121 | .surfaceSquareLot4(Optional.empty()) 122 | .numberLot5("") 123 | .surfaceSquareLot5(Optional.empty()) 124 | .lotsCount(0) 125 | .localTypeCode("") 126 | .localType("") 127 | .realBuiltUpArea(Optional.empty()) 128 | .principlePiecesCount(Optional.empty()) 129 | .natureCultureCode("AB") 130 | .natureCulture("terrains a bâtir") 131 | .specialNatureCultureCode("") 132 | .specialNatureCulture("") 133 | .landSurface(600) 134 | .location(Optional.empty()) 135 | .build(); 136 | 137 | public static final ImmutableTransaction TRANSACTION_4 = 138 | ImmutableTransaction.builder() 139 | .mutationId("2020-3") 140 | .mutationDate(LocalDate.of(2020, 1, 14)) 141 | .dispositionNumber("000001") 142 | .mutationNature("Vente") 143 | .propertyValue(89000) 144 | .addressNumber("") 145 | .addressSuffix("") 146 | .addressRoadName("VACAGNOLE") 147 | .addressRoadCode("B112") 148 | .postalCode("01340") 149 | .communeCode("01024") 150 | .communeName("Attignat") 151 | .departmentCode("01") 152 | .oldCommuneCode("") 153 | .oldCommuneName("") 154 | .plotId("01024000AL0132") 155 | .oldPlotId("") 156 | .volumeNumber("") 157 | .numberLot1("") 158 | .surfaceSquareLot1(Optional.empty()) 159 | .numberLot2("") 160 | .surfaceSquareLot2(Optional.empty()) 161 | .numberLot3("") 162 | .surfaceSquareLot3(Optional.empty()) 163 | .numberLot4("") 164 | .surfaceSquareLot4(Optional.empty()) 165 | .numberLot5("") 166 | .surfaceSquareLot5(Optional.empty()) 167 | .lotsCount(0) 168 | .localTypeCode("") 169 | .localType("") 170 | .realBuiltUpArea(Optional.empty()) 171 | .principlePiecesCount(Optional.empty()) 172 | .natureCultureCode("AB") 173 | .natureCulture("terrains a bâtir") 174 | .specialNatureCultureCode("") 175 | .specialNatureCulture("") 176 | .landSurface(491) 177 | .location(Optional.empty()) 178 | .build(); 179 | 180 | public static final ImmutableTransaction TRANSACTION_5 = 181 | ImmutableTransaction.builder() 182 | .mutationId("2020-4") 183 | .mutationDate(LocalDate.of(2020, 1, 22)) 184 | .dispositionNumber("000001") 185 | .mutationNature("Vente") 186 | .propertyValue(209950) 187 | .addressNumber("11") 188 | .addressSuffix("") 189 | .addressRoadName("RUE REYER") 190 | .addressRoadCode("3340") 191 | .postalCode("01000") 192 | .communeCode("01053") 193 | .communeName("Bourg-en-Bresse") 194 | .departmentCode("01") 195 | .oldCommuneCode("") 196 | .oldCommuneName("") 197 | .plotId("01053000AL0074") 198 | .oldPlotId("") 199 | .volumeNumber("") 200 | .numberLot1("") 201 | .surfaceSquareLot1(Optional.empty()) 202 | .numberLot2("") 203 | .surfaceSquareLot2(Optional.empty()) 204 | .numberLot3("") 205 | .surfaceSquareLot3(Optional.empty()) 206 | .numberLot4("") 207 | .surfaceSquareLot4(Optional.empty()) 208 | .numberLot5("") 209 | .surfaceSquareLot5(Optional.empty()) 210 | .lotsCount(0) 211 | .localTypeCode("1") 212 | .localType("Maison") 213 | .realBuiltUpArea(78) 214 | .principlePiecesCount(5) 215 | .natureCultureCode("S") 216 | .natureCulture("sols") 217 | .specialNatureCultureCode("") 218 | .specialNatureCulture("") 219 | .landSurface(242) 220 | .location(Location.of(5.219902, 46.196484)) 221 | .build(); 222 | } 223 | -------------------------------------------------------------------------------- /demo-dvf/src/test/java/io/mincong/dvf/service/JacksonTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.time.LocalDate; 6 | import org.junit.Test; 7 | 8 | public class JacksonTest { 9 | 10 | @Test 11 | public void newObjectMapper() throws Exception { 12 | var mapper = Jackson.newObjectMapper(); 13 | var string = mapper.writeValueAsString(LocalDate.of(2020, 12, 16)); 14 | assertThat(string).isEqualTo("\"2020-12-16\""); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /demo-dvf/src/test/java/io/mincong/dvf/service/MainIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import org.apache.http.HttpHost; 4 | import org.elasticsearch.client.RestClient; 5 | import org.elasticsearch.client.RestHighLevelClient; 6 | import org.elasticsearch.test.rest.ESRestTestCase; 7 | import org.junit.*; 8 | 9 | public class MainIT extends ESRestTestCase { 10 | 11 | @BeforeClass 12 | public static void setUpBeforeClass() { 13 | System.setProperty("tests.rest.cluster", "localhost:19200"); 14 | } 15 | 16 | @AfterClass 17 | public static void tearDownAfterClass() { 18 | System.clearProperty("tests.rest.cluster"); 19 | } 20 | 21 | private RestHighLevelClient restClient; 22 | 23 | @Before 24 | @Override 25 | public void setUp() throws Exception { 26 | super.setUp(); 27 | 28 | var builder = RestClient.builder(new HttpHost("localhost", 19200, "http")); 29 | restClient = new RestHighLevelClient(builder); 30 | } 31 | 32 | @After 33 | public void tearDown() throws Exception { 34 | restClient.close(); 35 | super.tearDown(); 36 | } 37 | 38 | // @Test 39 | // public void testRun() throws Exception { 40 | // // Given 41 | // var main = new Main(); 42 | // 43 | // // When 44 | // main.run(restClient).get(10, SECONDS); 45 | // 46 | // // Then 47 | // } 48 | } 49 | -------------------------------------------------------------------------------- /demo-dvf/src/test/java/io/mincong/dvf/service/TransactionBulkEsWriterIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import io.mincong.dvf.model.Transaction; 4 | import java.util.concurrent.ExecutorService; 5 | import java.util.concurrent.Executors; 6 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 7 | import org.junit.After; 8 | import org.junit.Before; 9 | 10 | public class TransactionBulkEsWriterIT extends TransactionEsWriterAbstractIT { 11 | 12 | private ExecutorService executor; 13 | 14 | @Override 15 | @Before 16 | public void setUp() throws Exception { 17 | super.setUp(); 18 | executor = Executors.newSingleThreadExecutor(); 19 | } 20 | 21 | @Override 22 | @After 23 | public void tearDown() throws Exception { 24 | executor.shutdownNow(); 25 | super.tearDown(); 26 | } 27 | 28 | @Override 29 | protected EsWriter newEsWriter() { 30 | return new TransactionBulkEsWriter( 31 | restClient, Transaction.indexNameForYear(year), executor, RefreshPolicy.IMMEDIATE); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /demo-dvf/src/test/java/io/mincong/dvf/service/TransactionCsvReaderTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import static io.mincong.dvf.model.TestModels.*; 4 | import static org.assertj.core.api.Assertions.assertThat; 5 | 6 | import io.mincong.dvf.service.TransactionCsvReader.BulkIterator; 7 | import java.nio.file.Path; 8 | import java.util.List; 9 | import java.util.function.Function; 10 | import org.assertj.core.api.Assertions; 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | 14 | public class TransactionCsvReaderTest { 15 | 16 | private Path csvPath; 17 | 18 | @Before 19 | public void setUp() { 20 | var classLoader = getClass().getClassLoader(); 21 | csvPath = Path.of(classLoader.getResource("dvf-samples.csv").getFile()); 22 | } 23 | 24 | @Test 25 | public void testTransaction() { 26 | var reader = new TransactionCsvReader(2000); 27 | assertThat(reader.readCsv(csvPath)) 28 | .hasSize(1) 29 | .flatExtracting(Function.identity()) 30 | .contains(TRANSACTION_1, TRANSACTION_2, TRANSACTION_3, TRANSACTION_4, TRANSACTION_5); 31 | } 32 | 33 | @Test 34 | public void testBulkIterator2() { 35 | var transactions = 36 | List.of(TRANSACTION_1, TRANSACTION_2, TRANSACTION_3, TRANSACTION_4, TRANSACTION_5); 37 | var iterator = new BulkIterator<>(transactions.iterator(), 2); 38 | Assertions.assertThat(iterator) 39 | .toIterable() 40 | .containsExactly( 41 | List.of(TRANSACTION_1, TRANSACTION_2), 42 | List.of(TRANSACTION_3, TRANSACTION_4), 43 | List.of(TRANSACTION_5)); 44 | } 45 | 46 | @Test 47 | public void testBulkIterator3() { 48 | var transactions = 49 | List.of(TRANSACTION_1, TRANSACTION_2, TRANSACTION_3, TRANSACTION_4, TRANSACTION_5); 50 | var iterator = new BulkIterator<>(transactions.iterator(), 3); 51 | Assertions.assertThat(iterator) 52 | .toIterable() 53 | .containsExactly( 54 | List.of(TRANSACTION_1, TRANSACTION_2, TRANSACTION_3), 55 | List.of(TRANSACTION_4, TRANSACTION_5)); 56 | } 57 | 58 | @Test 59 | public void testBulkIterator4() { 60 | var transactions = 61 | List.of(TRANSACTION_1, TRANSACTION_2, TRANSACTION_3, TRANSACTION_4, TRANSACTION_5); 62 | var iterator = new BulkIterator<>(transactions.iterator(), 4); 63 | Assertions.assertThat(iterator) 64 | .toIterable() 65 | .containsExactly( 66 | List.of(TRANSACTION_1, TRANSACTION_2, TRANSACTION_3, TRANSACTION_4), 67 | List.of(TRANSACTION_5)); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /demo-dvf/src/test/java/io/mincong/dvf/service/TransactionEsAggregatorIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import static io.mincong.dvf.model.TestModels.*; 4 | import static java.util.concurrent.TimeUnit.SECONDS; 5 | 6 | import io.mincong.dvf.model.ImmutableTransaction; 7 | import io.mincong.dvf.model.Transaction; 8 | import java.util.Map; 9 | import java.util.concurrent.ExecutorService; 10 | import java.util.concurrent.Executors; 11 | import org.apache.http.HttpHost; 12 | import org.assertj.core.api.Assertions; 13 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 14 | import org.elasticsearch.client.RestClient; 15 | import org.elasticsearch.client.RestHighLevelClient; 16 | import org.elasticsearch.test.rest.ESRestTestCase; 17 | import org.junit.*; 18 | 19 | public class TransactionEsAggregatorIT extends ESRestTestCase { 20 | 21 | @BeforeClass 22 | public static void setUpBeforeClass() { 23 | System.setProperty("tests.rest.cluster", "localhost:19200"); 24 | } 25 | 26 | @AfterClass 27 | public static void tearDownAfterClass() { 28 | System.clearProperty("tests.rest.cluster"); 29 | } 30 | 31 | private final ImmutableTransaction[] transactions = { 32 | TRANSACTION_1, TRANSACTION_2, TRANSACTION_3, TRANSACTION_4 33 | }; 34 | private final int year = 2020; 35 | 36 | private RestHighLevelClient restClient; 37 | private ExecutorService executor; 38 | 39 | @Before 40 | @Override 41 | public void setUp() throws Exception { 42 | super.setUp(); 43 | 44 | var builder = RestClient.builder(new HttpHost("localhost", 19200, "http")); 45 | restClient = new RestHighLevelClient(builder); 46 | 47 | executor = Executors.newSingleThreadExecutor(); 48 | var writer = 49 | new TransactionBulkEsWriter( 50 | restClient, Transaction.indexNameForYear(year), executor, RefreshPolicy.IMMEDIATE); 51 | writer.createIndex(); 52 | writer.write(transactions).get(10, SECONDS); 53 | } 54 | 55 | @After 56 | public void tearDown() throws Exception { 57 | restClient.close(); 58 | executor.shutdownNow(); 59 | super.tearDown(); 60 | } 61 | 62 | @Test 63 | public void testCountValueAggregation() { 64 | // Given 65 | var searcher = new TransactionEsAggregator(restClient, year); 66 | 67 | // When 68 | var valueCount = searcher.mutationIdsCount(); 69 | 70 | // Then 71 | Assertions.assertThat(valueCount.getValue()).isEqualTo(transactions.length); 72 | } 73 | 74 | @Test 75 | public void testPostalCode() { 76 | // Given 77 | var searcher = new TransactionEsAggregator(restClient, year); 78 | 79 | // When 80 | var stats = searcher.mutationsByPostalCode(); 81 | 82 | // Then 83 | Assertions.assertThat(stats).isEqualTo(Map.of("01340", 2L, "01250", 1L, "01960", 1L)); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /demo-dvf/src/test/java/io/mincong/dvf/service/TransactionEsWriterAbstractIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import static io.mincong.dvf.model.TestModels.*; 4 | import static java.util.concurrent.TimeUnit.SECONDS; 5 | 6 | import io.mincong.dvf.model.ImmutableTransaction; 7 | import io.mincong.dvf.model.Transaction; 8 | import org.apache.http.HttpHost; 9 | import org.assertj.core.api.Assertions; 10 | import org.elasticsearch.action.search.SearchRequest; 11 | import org.elasticsearch.client.RequestOptions; 12 | import org.elasticsearch.client.RestClient; 13 | import org.elasticsearch.client.RestHighLevelClient; 14 | import org.elasticsearch.test.rest.ESRestTestCase; 15 | import org.junit.*; 16 | 17 | public abstract class TransactionEsWriterAbstractIT extends ESRestTestCase { 18 | 19 | @BeforeClass 20 | public static void setUpBeforeClass() { 21 | System.setProperty("tests.rest.cluster", "localhost:19200"); 22 | } 23 | 24 | @AfterClass 25 | public static void tearDownAfterClass() { 26 | System.clearProperty("tests.rest.cluster"); 27 | } 28 | 29 | protected RestHighLevelClient restClient; 30 | 31 | protected abstract EsWriter newEsWriter(); 32 | 33 | protected final int year = 2020; 34 | 35 | @Before 36 | @Override 37 | public void setUp() throws Exception { 38 | super.setUp(); 39 | 40 | var builder = RestClient.builder(new HttpHost("localhost", 19200, "http")); 41 | restClient = new RestHighLevelClient(builder); 42 | } 43 | 44 | @After 45 | public void tearDown() throws Exception { 46 | restClient.close(); 47 | super.tearDown(); 48 | } 49 | 50 | @Test 51 | public void testCreateIndex() throws Exception { 52 | // Given 53 | var writer = newEsWriter(); 54 | 55 | // When, Then 56 | Assertions.assertThatCode(writer::createIndex).doesNotThrowAnyException(); 57 | } 58 | 59 | @Test 60 | public void testWrite() throws Exception { 61 | // Given 62 | var writer = newEsWriter(); 63 | writer.createIndex(); 64 | 65 | // When 66 | var transactions = new ImmutableTransaction[] {TRANSACTION_1, TRANSACTION_2, TRANSACTION_3}; 67 | var count = writer.write(transactions).get(10, SECONDS); 68 | 69 | // Then 70 | Assertions.assertThat(count).isEqualTo(3L); 71 | 72 | var objectMapper = Jackson.newObjectMapper(); 73 | var request = new SearchRequest().indices(Transaction.indexNameForYear(year)); 74 | var response = restClient.search(request, RequestOptions.DEFAULT); 75 | Assertions.assertThat(response.getHits().getHits()) 76 | .extracting( 77 | hit -> objectMapper.readValue(hit.getSourceAsString(), ImmutableTransaction.class)) 78 | .containsExactlyInAnyOrder(transactions); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /demo-dvf/src/test/java/io/mincong/dvf/service/TransactionSimpleEsWriterIT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.dvf.service; 2 | 3 | import io.mincong.dvf.model.Transaction; 4 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 5 | 6 | public class TransactionSimpleEsWriterIT extends TransactionEsWriterAbstractIT { 7 | 8 | @Override 9 | protected EsWriter newEsWriter() { 10 | return new TransactionSimpleEsWriter( 11 | restClient, Transaction.indexNameForYear(year), RefreshPolicy.IMMEDIATE); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /demo-dvf/src/test/resources/dvf-samples.csv: -------------------------------------------------------------------------------- 1 | id_mutation,date_mutation,numero_disposition,nature_mutation,valeur_fonciere,adresse_numero,adresse_suffixe,adresse_nom_voie,adresse_code_voie,code_postal,code_commune,nom_commune,code_departement,ancien_code_commune,ancien_nom_commune,id_parcelle,ancien_id_parcelle,numero_volume,lot1_numero,lot1_surface_carrez,lot2_numero,lot2_surface_carrez,lot3_numero,lot3_surface_carrez,lot4_numero,lot4_surface_carrez,lot5_numero,lot5_surface_carrez,nombre_lots,code_type_local,type_local,surface_reelle_bati,nombre_pieces_principales,code_nature_culture,nature_culture,code_nature_culture_speciale,nature_culture_speciale,surface_terrain,longitude,latitude 2 | 2020-1,2020-01-07,000001,Vente,8000,,,FORTUNAT,B063,01250,01072,Ceyzériat,01,,,01072000AK0216,,,,,,,,,,,,,0,,,,,T,terres,,,1061,5.323522,46.171899 3 | 2020-2,2020-01-07,000001,Vente,75000,,,RUE DE LA CHARTREUSE,0064,01960,01289,Péronnas,01,,,01289000AI0210,,,,,,,,,,,,,0,,,,,AB,terrains a bâtir,,,610,5.226197,46.184538 4 | 2020-3,2020-01-14,000001,Vente,89000,,,VACAGNOLE,B112,01340,01024,Attignat,01,,,01024000AL0120,,,,,,,,,,,,,0,,,,,AB,terrains a bâtir,,,600,, 5 | 2020-3,2020-01-14,000001,Vente,89000,,,VACAGNOLE,B112,01340,01024,Attignat,01,,,01024000AL0132,,,,,,,,,,,,,0,,,,,AB,terrains a bâtir,,,491,, 6 | 2020-4,2020-01-22,000001,Vente,209950,11,,RUE REYER,3340,01000,01053,Bourg-en-Bresse,01,,,01053000AL0074,,,,,,,,,,,,,0,1,Maison,78,5,S,sols,,,242,5.219902,46.196484 7 | 2020-5,2020-01-16,000001,Vente,3000,,,LES GUYOTS,B025,01340,01266,Montrevel-en-Bresse,01,,,012660000B0041,,,,,,,,,,,,,0,,,,,BS,taillis sous futaie,,,2760,5.118438,46.306551 8 | 2020-5,2020-01-16,000001,Vente,3000,,,CUET,B013,01340,01266,Montrevel-en-Bresse,01,,,012660000C0414,,,,,,,,,,,,,0,,,,,T,terres,,,1532,5.111139,46.325479 9 | 2020-5,2020-01-16,000001,Vente,3000,,,CHAMP DE LU,B056,01340,01346,Saint-Didier-d'Aussiat,01,,,01346000AN0085,,,,,,,,,,,,,0,,,,,BS,taillis sous futaie,,,1580,5.089673,46.311589 10 | 2020-5,2020-01-16,000001,Vente,3000,,,BOIS DE L HU,B018,01340,01346,Saint-Didier-d'Aussiat,01,,,01346000AO0092,,,,,,,,,,,,,0,,,,,BT,taillis simples,,,19602,5.089821,46.314201 11 | -------------------------------------------------------------------------------- /demo-dvf/start-elasticsearch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | esdata="${HOME}/dvf-volume-es" 4 | esbackup="${HOME}/es-backup/demo-dvf/" 5 | 6 | # How can I get the source directory of a Bash script from within the script itself? 7 | # https://stackoverflow.com/questions/59895/ 8 | current_dir="$(cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)" 9 | 10 | docker_container_name_elasticsearch="elasticsearch-dvf" 11 | 12 | if [ ! -d "$esdata" ] 13 | then 14 | mkdir -p "$esdata" 15 | fi 16 | 17 | if [ ! -d "$esbackup" ] 18 | then 19 | mkdir -p "$esbackup" 20 | fi 21 | 22 | docker run \ 23 | --rm \ 24 | -p 9200:9200 \ 25 | -p 9300:9300 \ 26 | -e "discovery.type=single-node" \ 27 | -e "cluster.name=es-docker-cluster" \ 28 | -e "path.repo=/opt/elasticsearch/backup" \ 29 | -v "$esdata":/usr/share/elasticsearch/data \ 30 | -v "$esbackup":/opt/elasticsearch/backup \ 31 | -v "${current_dir}/src/main/resources/config/custom.elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml" \ 32 | --name "$docker_container_name_elasticsearch" \ 33 | docker.elastic.co/elasticsearch/elasticsearch:7.12.0 34 | -------------------------------------------------------------------------------- /demo-dvf/start-kibana.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Install Kibana with Docker 4 | # https://www.elastic.co/guide/en/kibana/7.x/docker.html 5 | # 6 | docker_container_name_elasticsearch="elasticsearch-dvf" 7 | docker_container_name_kibana="kibana-dvf" 8 | 9 | docker run \ 10 | --rm \ 11 | -p 5601:5601 \ 12 | --link "${docker_container_name_elasticsearch}:elasticsearch" \ 13 | --name "$docker_container_name_kibana" \ 14 | docker.elastic.co/kibana/kibana:7.12.0 15 | -------------------------------------------------------------------------------- /mapping/README.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch Mapping 2 | 3 | ## Put Template 4 | 5 | ### API 6 | 7 | 8 | 9 | ``` 10 | PUT _template/{name} 11 | ``` 12 | 13 | References: 14 | 15 | - Mapping 16 | - Mapping Management in Index APIs 17 | - Mapping Management in Index APIs (Java High Level REST Client) 18 | -------------------------------------------------------------------------------- /mapping/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | es-demo-parent 7 | io.mincong 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-demo-mapping 13 | Elasticsearch Demos - Mapping 14 | 15 | 16 | 17 | org.elasticsearch.test 18 | framework 19 | test 20 | 21 | 22 | org.elasticsearch.client 23 | elasticsearch-rest-high-level-client 24 | ${elasticsearch.version} 25 | 26 | 27 | org.apache.logging.log4j 28 | log4j-core 29 | test 30 | 31 | 32 | org.assertj 33 | assertj-core 34 | test 35 | 36 | 37 | 38 | 39 | 40 | 41 | io.fabric8 42 | docker-maven-plugin 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /mapping/src/test/java/io/mincong/elasticsearch/DynamicMappingTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.Map; 4 | import org.assertj.core.api.Assertions; 5 | import org.elasticsearch.action.admin.indices.mapping.get.GetFieldMappingsRequest; 6 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 7 | import org.elasticsearch.common.xcontent.XContentType; 8 | import org.elasticsearch.rest.RestStatus; 9 | import org.elasticsearch.test.ESSingleNodeTestCase; 10 | import org.junit.Test; 11 | 12 | /** 13 | * Test dynamic mapping in Elasticsearch. 14 | * 15 | *

Fields and mapping types do not need to be defined before being used. Thanks to dynamic 16 | * mapping, new field names will be added automatically, just by indexing a document. New fields can 17 | * be added both to the top-level mapping type, and to inner object and nested fields. 18 | * 19 | * @author Mincong Huang 20 | */ 21 | public class DynamicMappingTest extends ESSingleNodeTestCase { 22 | 23 | @Test 24 | public void typeText() { 25 | index("{\"aText\": \"Hello world\"}"); 26 | 27 | var mappingResponse = 28 | client() 29 | .admin() 30 | .indices() 31 | .getFieldMappings(new GetFieldMappingsRequest().indices("my_index").fields("aText")) 32 | .actionGet(); 33 | @SuppressWarnings("unchecked") 34 | var messageField = 35 | (Map) 36 | mappingResponse.fieldMappings("my_index", "_doc", "aText").sourceAsMap().get("aText"); 37 | var fields = Map.of("keyword", Map.of("type", "keyword", "ignore_above", 256)); 38 | Assertions.assertThat(messageField) 39 | .hasSize(2) 40 | .containsEntry("type", "text") 41 | .containsEntry("fields", fields); 42 | } 43 | 44 | @Test 45 | public void typeDate() { 46 | index("{\"aDate\": \"2020-04-04T16:00:00\"}"); 47 | 48 | var mappingResponse = 49 | client() 50 | .admin() 51 | .indices() 52 | .getFieldMappings(new GetFieldMappingsRequest().indices("my_index").fields("aDate")) 53 | .actionGet(); 54 | @SuppressWarnings("unchecked") 55 | var messageField = 56 | (Map) 57 | mappingResponse.fieldMappings("my_index", "_doc", "aDate").sourceAsMap().get("aDate"); 58 | Assertions.assertThat(messageField).hasSize(1).containsEntry("type", "date"); 59 | } 60 | 61 | @Test 62 | public void typeLong() { 63 | index("{\"aLong\": 123}"); 64 | 65 | var mappingResponse = 66 | client() 67 | .admin() 68 | .indices() 69 | .getFieldMappings(new GetFieldMappingsRequest().indices("my_index").fields("aLong")) 70 | .actionGet(); 71 | @SuppressWarnings("unchecked") 72 | var messageField = 73 | (Map) 74 | mappingResponse.fieldMappings("my_index", "_doc", "aLong").sourceAsMap().get("aLong"); 75 | Assertions.assertThat(messageField).hasSize(1).containsEntry("type", "long"); 76 | } 77 | 78 | @Test 79 | public void typeDouble() { 80 | index("{\"aFloat\": 123.4}"); 81 | 82 | var mappingResponse = 83 | client() 84 | .admin() 85 | .indices() 86 | .getFieldMappings(new GetFieldMappingsRequest().indices("my_index").fields("aFloat")) 87 | .actionGet(); 88 | @SuppressWarnings("unchecked") 89 | var messageField = 90 | (Map) 91 | mappingResponse.fieldMappings("my_index", "_doc", "aFloat").sourceAsMap().get("aFloat"); 92 | Assertions.assertThat(messageField).hasSize(1).containsEntry("type", "float"); 93 | } 94 | 95 | @Test 96 | public void typeBoolean() { 97 | index("{\"aBoolean\": true}"); 98 | 99 | var mappingResponse = 100 | client() 101 | .admin() 102 | .indices() 103 | .getFieldMappings(new GetFieldMappingsRequest().indices("my_index").fields("aBoolean")) 104 | .actionGet(); 105 | @SuppressWarnings("unchecked") 106 | var messageField = 107 | (Map) 108 | mappingResponse 109 | .fieldMappings("my_index", "_doc", "aBoolean") 110 | .sourceAsMap() 111 | .get("aBoolean"); 112 | Assertions.assertThat(messageField).hasSize(1).containsEntry("type", "boolean"); 113 | } 114 | 115 | private void index(String source) { 116 | var indexResponse = 117 | client() 118 | .prepareIndex() 119 | .setIndex("my_index") 120 | .setSource(source, XContentType.JSON) 121 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 122 | .execute() 123 | .actionGet(); 124 | Assertions.assertThat(indexResponse.status()).isEqualTo(RestStatus.CREATED); 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /mapping/src/test/java/io/mincong/elasticsearch/ExplicitMappingTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.Map; 4 | import org.assertj.core.api.Assertions; 5 | import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; 6 | import org.elasticsearch.action.admin.indices.mapping.get.GetFieldMappingsRequest; 7 | import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; 8 | import org.elasticsearch.common.xcontent.XContentType; 9 | import org.elasticsearch.test.ESSingleNodeTestCase; 10 | import org.junit.Test; 11 | 12 | /** 13 | * Test explicit mapping in Elasticsearch. 14 | * 15 | *

You know more about your data than Elasticsearch can guess, so while dynamic mapping can be 16 | * useful to get started, at some point you will want to specify your own explicit mappings. 17 | * 18 | * @author Mincong Huang 19 | */ 20 | public class ExplicitMappingTest extends ESSingleNodeTestCase { 21 | 22 | @Test 23 | public void createIndexWithExplictMapping() { 24 | var create = 25 | new CreateIndexRequest("my_index") 26 | .mapping( 27 | "_doc", "{\"properties\":{\"message\":{\"type\":\"text\"}}}", XContentType.JSON); 28 | var response = client().admin().indices().create(create).actionGet(); 29 | Assertions.assertThat(response.isAcknowledged()).isTrue(); 30 | 31 | var mappingResponse = 32 | client() 33 | .admin() 34 | .indices() 35 | .getFieldMappings(new GetFieldMappingsRequest().indices("my_index").fields("message")) 36 | .actionGet(); 37 | @SuppressWarnings("unchecked") 38 | var messageField = 39 | (Map) 40 | mappingResponse 41 | .fieldMappings("my_index", "_doc", "message") 42 | .sourceAsMap() 43 | .get("message"); 44 | Assertions.assertThat(messageField).hasSize(1).containsEntry("type", "text"); 45 | } 46 | 47 | @Test 48 | public void addFieldToExistingMapping() { 49 | createIndexWithExplictMapping(); 50 | 51 | var putMapping = 52 | new PutMappingRequest("my_index") 53 | .type("_doc") 54 | .source("{\"properties\":{\"description\":{\"type\":\"text\"}}}", XContentType.JSON); 55 | var response = client().admin().indices().putMapping(putMapping).actionGet(); 56 | Assertions.assertThat(response.isAcknowledged()).isTrue(); 57 | 58 | var mappingResponse = 59 | client() 60 | .admin() 61 | .indices() 62 | .getFieldMappings( 63 | new GetFieldMappingsRequest().indices("my_index").fields("description")) 64 | .actionGet(); 65 | @SuppressWarnings("unchecked") 66 | var descriptionField = 67 | (Map) 68 | mappingResponse 69 | .fieldMappings("my_index", "_doc", "description") 70 | .sourceAsMap() 71 | .get("description"); 72 | Assertions.assertThat(descriptionField).hasSize(1).containsEntry("type", "text"); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /mapping/src/test/java/io/mincong/elasticsearch/IndexStatsTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.elasticsearch.action.index.IndexRequest; 5 | import org.elasticsearch.action.index.IndexResponse; 6 | import org.elasticsearch.common.xcontent.XContentType; 7 | import org.elasticsearch.rest.RestStatus; 8 | import org.elasticsearch.test.ESSingleNodeTestCase; 9 | import org.junit.Before; 10 | import org.junit.Test; 11 | 12 | public class IndexStatsTest extends ESSingleNodeTestCase { 13 | 14 | @Override 15 | @Before 16 | public void setUp() throws Exception { 17 | super.setUp(); 18 | 19 | IndexRequest idxRequest = 20 | new IndexRequest("msg").source("{\"msg\":\"Hello world!\"}", XContentType.JSON); 21 | IndexResponse idxResponse = client().index(idxRequest).actionGet(); 22 | assertEquals("msg", idxResponse.getIndex()); 23 | assertEquals(RestStatus.CREATED, idxResponse.status()); 24 | } 25 | 26 | @Test 27 | public void itShouldReturnEmptyStats() { 28 | var response = client().admin().indices().prepareStats().clear().get(); 29 | Assertions.assertThat(response.getIndices()).containsOnlyKeys("msg"); 30 | var index = response.getIndex("msg"); 31 | // all stats are null, some examples: 32 | Assertions.assertThat(index.getPrimaries().completion).isNull(); 33 | Assertions.assertThat(index.getPrimaries().docs).isNull(); 34 | Assertions.assertThat(index.getPrimaries().fieldData).isNull(); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /mapping/src/test/java/io/mincong/elasticsearch/IndexTemplateTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | import org.assertj.core.api.Assertions; 6 | import org.elasticsearch.common.xcontent.XContentType; 7 | import org.elasticsearch.rest.RestStatus; 8 | import org.elasticsearch.test.ESSingleNodeTestCase; 9 | import org.junit.Test; 10 | 11 | public class IndexTemplateTest extends ESSingleNodeTestCase { 12 | 13 | @Test 14 | public void createTemplate() { 15 | // Given a user template to create 16 | 17 | // When creating it 18 | client() 19 | .admin() 20 | .indices() 21 | .preparePutTemplate("user_template") 22 | .setPatterns(List.of("user*")) 23 | .addMapping( 24 | "_doc", 25 | json("{'properties': {'name': {'type': 'keyword'}, 'age': {'type': 'long'}}}"), 26 | XContentType.JSON) 27 | .get(); 28 | 29 | // Then the creation is successful 30 | var response = client().admin().indices().prepareGetTemplates("user_template").get(); 31 | var metadata = response.getIndexTemplates().get(0); 32 | Assertions.assertThat(metadata.getName()).isEqualTo("user_template"); 33 | } 34 | 35 | /** 36 | * This test demonstrates how index template is used when a new index is created and it matches 37 | * the expression of the index template. 38 | */ 39 | @Test 40 | public void createIndex() { 41 | // Given an existing template 42 | client() 43 | .admin() 44 | .indices() 45 | .preparePutTemplate("user_template") 46 | .setPatterns(List.of("user*")) 47 | .addMapping( 48 | "_doc", 49 | json("{'properties': {'name': {'type': 'keyword'}, 'age': {'type': 'long'}}}"), 50 | XContentType.JSON) 51 | .get(); 52 | 53 | // When creating an index matching this template 54 | var indexResponse = 55 | client() 56 | .prepareIndex() 57 | .setIndex("user_fr") 58 | .setSource(json("{'name': 'First Last', 'age': 30}"), XContentType.JSON) 59 | .execute() 60 | .actionGet(); 61 | 62 | // Then the index is created 63 | Assertions.assertThat(indexResponse.status()).isEqualTo(RestStatus.CREATED); 64 | 65 | // And the mappings are correct 66 | var mappingResponse = client().admin().indices().prepareGetMappings("user_fr").get(); 67 | @SuppressWarnings("unchecked") 68 | var properties = 69 | (Map>) 70 | mappingResponse.mappings().get("user_fr").get("_doc").sourceAsMap().get("properties"); 71 | Assertions.assertThat(properties) 72 | .hasSize(2) 73 | .containsEntry("age", Map.of("type", "long")) 74 | .containsEntry("name", Map.of("type", "keyword")); 75 | } 76 | 77 | String json(String singleQuoted) { 78 | return singleQuoted.replace("'", "\""); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /mapping/src/test/java/io/mincong/elasticsearch/StackOverflow60500157IT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.concurrent.CountDownLatch; 4 | import java.util.concurrent.TimeUnit; 5 | import java.util.concurrent.atomic.AtomicReference; 6 | import org.apache.http.HttpHost; 7 | import org.assertj.core.api.Assertions; 8 | import org.elasticsearch.action.ActionListener; 9 | import org.elasticsearch.action.ActionRequestValidationException; 10 | import org.elasticsearch.action.support.master.AcknowledgedResponse; 11 | import org.elasticsearch.client.RequestOptions; 12 | import org.elasticsearch.client.RestClient; 13 | import org.elasticsearch.client.RestHighLevelClient; 14 | import org.elasticsearch.client.indices.CreateIndexRequest; 15 | import org.elasticsearch.common.xcontent.XContentType; 16 | import org.elasticsearch.test.rest.ESRestTestCase; 17 | import org.junit.*; 18 | 19 | /** 20 | * Put mapping with Elastic Search's High level REST JAVA client asynchronously - deprecated error 21 | * 22 | *

https://stackoverflow.com/questions/60500157 23 | * 24 | * @author Mincong Huang 25 | */ 26 | public class StackOverflow60500157IT extends ESRestTestCase { 27 | 28 | @BeforeClass 29 | public static void setUpBeforeClass() { 30 | System.setProperty("tests.rest.cluster", "localhost:19200"); 31 | } 32 | 33 | @AfterClass 34 | public static void tearDownAfterClass() { 35 | System.clearProperty("tests.rest.cluster"); 36 | } 37 | 38 | private RestHighLevelClient client; 39 | 40 | @Before 41 | @Override 42 | public void setUp() throws Exception { 43 | super.setUp(); 44 | 45 | var builder = RestClient.builder(new HttpHost("localhost", 19200, "http")); 46 | client = new RestHighLevelClient(builder); 47 | 48 | var createRequest = new CreateIndexRequest("contacts"); 49 | var response = client.indices().create(createRequest, RequestOptions.DEFAULT); 50 | Assertions.assertThat(response.isAcknowledged()).isTrue(); 51 | } 52 | 53 | @After 54 | public void tearDown() throws Exception { 55 | client.close(); 56 | super.tearDown(); 57 | } 58 | 59 | @Test 60 | public void oldRequest() { 61 | var source = 62 | "{\"properties\":{\"list_id\":{\"type\":\"integer\"},\"contact_id\":{\"type\":\"integer\"}}}"; 63 | var request = 64 | new org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest("contacts") 65 | .source(source, XContentType.JSON); 66 | Assertions.assertThatExceptionOfType(ActionRequestValidationException.class) 67 | .isThrownBy( 68 | () -> { 69 | @SuppressWarnings("deprecation") 70 | var response = client.indices().putMapping(request, RequestOptions.DEFAULT); 71 | }) 72 | .withMessageContaining("mapping type is missing"); 73 | } 74 | 75 | @Test 76 | public void newRequest() throws Exception { 77 | var source = 78 | "{\"properties\":{\"list_id\":{\"type\":\"integer\"},\"contact_id\":{\"type\":\"integer\"}}}"; 79 | var request = 80 | new org.elasticsearch.client.indices.PutMappingRequest("contacts") 81 | .source(source, XContentType.JSON); 82 | 83 | var response = client.indices().putMapping(request, RequestOptions.DEFAULT); 84 | Assertions.assertThat(response.isAcknowledged()).isTrue(); 85 | } 86 | 87 | @Test 88 | public void newRequestAsync() throws Exception { 89 | var latch = new CountDownLatch(1); 90 | var source = 91 | "{\"properties\":{\"list_id\":{\"type\":\"integer\"},\"contact_id\":{\"type\":\"integer\"}}}"; 92 | var request = 93 | new org.elasticsearch.client.indices.PutMappingRequest("contacts") 94 | .source(source, XContentType.JSON); 95 | 96 | var response = new AtomicReference(); 97 | client 98 | .indices() 99 | .putMappingAsync( 100 | request, 101 | RequestOptions.DEFAULT, 102 | new ActionListener<>() { 103 | @Override 104 | public void onResponse(AcknowledgedResponse r) { 105 | response.set(r); 106 | latch.countDown(); 107 | } 108 | 109 | @Override 110 | public void onFailure(Exception e) { 111 | latch.countDown(); 112 | } 113 | }); 114 | latch.await(3, TimeUnit.SECONDS); 115 | Assertions.assertThat(response.get().isAcknowledged()).isTrue(); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /mapping/src/test/java/io/mincong/elasticsearch/StackOverflow60667649IT.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.Map; 4 | import org.apache.http.HttpHost; 5 | import org.assertj.core.api.Assertions; 6 | import org.elasticsearch.client.RequestOptions; 7 | import org.elasticsearch.client.RestClient; 8 | import org.elasticsearch.client.RestHighLevelClient; 9 | import org.elasticsearch.client.indices.CreateIndexRequest; 10 | import org.elasticsearch.client.indices.GetFieldMappingsRequest; 11 | import org.elasticsearch.client.indices.PutMappingRequest; 12 | import org.elasticsearch.common.xcontent.XContentBuilder; 13 | import org.elasticsearch.common.xcontent.XContentFactory; 14 | import org.elasticsearch.common.xcontent.XContentType; 15 | import org.elasticsearch.test.rest.ESRestTestCase; 16 | import org.junit.*; 17 | 18 | /** 19 | * Put mapping in ElasticSearch by Java API 20 | * 21 | *

https://stackoverflow.com/questions/60667649 22 | * 23 | * @author Mincong Huang 24 | */ 25 | public class StackOverflow60667649IT extends ESRestTestCase { 26 | 27 | @BeforeClass 28 | public static void setUpBeforeClass() { 29 | System.setProperty("tests.rest.cluster", "localhost:19200"); 30 | } 31 | 32 | @AfterClass 33 | public static void tearDownAfterClass() { 34 | System.clearProperty("tests.rest.cluster"); 35 | } 36 | 37 | private RestHighLevelClient restClient; 38 | 39 | @Before 40 | @Override 41 | public void setUp() throws Exception { 42 | super.setUp(); 43 | 44 | var builder = RestClient.builder(new HttpHost("localhost", 19200, "http")); 45 | restClient = new RestHighLevelClient(builder); 46 | 47 | var createRequest = new CreateIndexRequest("my_index"); 48 | var response = restClient.indices().create(createRequest, RequestOptions.DEFAULT); 49 | Assertions.assertThat(response.isAcknowledged()).isTrue(); 50 | } 51 | 52 | @After 53 | public void tearDown() throws Exception { 54 | restClient.close(); 55 | super.tearDown(); 56 | } 57 | 58 | @Test 59 | public void javaSource() throws Exception { 60 | XContentBuilder builder = XContentFactory.jsonBuilder(); 61 | builder.startObject(); 62 | builder.startObject("properties"); 63 | builder.startObject("mje-test-location"); 64 | builder.field("type", "geo_point"); 65 | builder.endObject(); 66 | builder.endObject(); 67 | builder.endObject(); 68 | 69 | var putMapping = new PutMappingRequest("my_index").source(builder); 70 | var putResponse = restClient.indices().putMapping(putMapping, RequestOptions.DEFAULT); 71 | Assertions.assertThat(putResponse.isAcknowledged()).isTrue(); 72 | 73 | var getFieldMapping = 74 | new GetFieldMappingsRequest().indices("my_index").fields("mje-test-location"); 75 | var mappingResponse = 76 | restClient.indices().getFieldMapping(getFieldMapping, RequestOptions.DEFAULT); 77 | 78 | @SuppressWarnings("unchecked") 79 | var field = 80 | (Map) 81 | mappingResponse 82 | .fieldMappings("my_index", "mje-test-location") 83 | .sourceAsMap() 84 | .get("mje-test-location"); 85 | Assertions.assertThat(field).hasSize(1).containsEntry("type", "geo_point"); 86 | } 87 | 88 | @Test 89 | public void stringSource() throws Exception { 90 | var putMapping = 91 | new PutMappingRequest("my_index") 92 | .source( 93 | "{\"properties\":{\"mje-test-location\":{\"type\":\"geo_point\"}}}", 94 | XContentType.JSON); 95 | var putResponse = restClient.indices().putMapping(putMapping, RequestOptions.DEFAULT); 96 | Assertions.assertThat(putResponse.isAcknowledged()).isTrue(); 97 | 98 | var getFieldMapping = 99 | new GetFieldMappingsRequest().indices("my_index").fields("mje-test-location"); 100 | var mappingResponse = 101 | restClient.indices().getFieldMapping(getFieldMapping, RequestOptions.DEFAULT); 102 | 103 | @SuppressWarnings("unchecked") 104 | var field = 105 | (Map) 106 | mappingResponse 107 | .fieldMappings("my_index", "mje-test-location") 108 | .sourceAsMap() 109 | .get("mje-test-location"); 110 | Assertions.assertThat(field).hasSize(1).containsEntry("type", "geo_point"); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # Notebooks 2 | 3 | Different notebooks for Elasticsearch. 4 | -------------------------------------------------------------------------------- /notebooks/partial-update.md: -------------------------------------------------------------------------------- 1 | # Partial Update Document 2 | 3 | Can Elasticsearch handle partial update for documents? 4 | 5 | ## Start Elasticsearch 6 | 7 | ``` 8 | docker run \ 9 | --rm \ 10 | -p 9200:9200 \ 11 | -p 9300:9300 \ 12 | -e "discovery.type=single-node" \ 13 | -e "cluster.name=es-docker-cluster" \ 14 | docker.elastic.co/elasticsearch/elasticsearch:7.14.0 15 | ``` 16 | 17 | ## Create new document 18 | 19 | Request: 20 | 21 | ```sh 22 | curl -X PUT http://localhost:9200/my_index/_doc/1?pretty \ 23 | -H 'Content-Type: application/json' \ 24 | -d '{"key1": "value1"}' 25 | ``` 26 | 27 | Response: 28 | 29 | ```json 30 | { 31 | "_index" : "my_index", 32 | "_type" : "_doc", 33 | "_id" : "1", 34 | "_version" : 1, 35 | "result" : "created", 36 | "_shards" : { 37 | "total" : 2, 38 | "successful" : 1, 39 | "failed" : 0 40 | }, 41 | "_seq_no" : 0, 42 | "_primary_term" : 1 43 | } 44 | ``` 45 | 46 | ## Update existing document 47 | 48 | Request: 49 | 50 | ```sh 51 | curl -X POST 'http://localhost:9200/my_index/_update/1?pretty' \ 52 | -H 'Content-Type: application/json' \ 53 | -d '{ 54 | "doc": { 55 | "key2": "value2" 56 | } 57 | }' 58 | ``` 59 | 60 | Response: 61 | 62 | ```json 63 | { 64 | "_index" : "my_index", 65 | "_type" : "_doc", 66 | "_id" : "1", 67 | "_version" : 2, 68 | "result" : "updated", 69 | "_shards" : { 70 | "total" : 2, 71 | "successful" : 1, 72 | "failed" : 0 73 | }, 74 | "_seq_no" : 1, 75 | "_primary_term" : 1 76 | } 77 | ``` 78 | 79 | Result: 80 | 81 | ``` 82 | curl 'http://localhost:9200/my_index/_doc/1?pretty' 83 | ``` 84 | 85 | ```js 86 | { 87 | "_index" : "my_index", 88 | "_type" : "_doc", 89 | "_id" : "1", 90 | "_version" : 2, 91 | "_seq_no" : 1, 92 | "_primary_term" : 1, 93 | "found" : true, 94 | "_source" : { 95 | "key1" : "value1", // both key1 and key2 are available 96 | "key2" : "value2" 97 | } 98 | } 99 | ``` -------------------------------------------------------------------------------- /ops/gc.md: -------------------------------------------------------------------------------- 1 | # GC 2 | 3 | 4 | -------------------------------------------------------------------------------- /scripts/start-elasticsearch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # How can I get the source directory of a Bash script from within the script itself? 4 | # https://stackoverflow.com/questions/59895/ 5 | current_dir="$(cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)" 6 | 7 | docker run \ 8 | --rm \ 9 | -p 9200:9200 \ 10 | -p 9300:9300 \ 11 | -e "discovery.type=single-node" \ 12 | -e "cluster.name=es-docker-cluster" \ 13 | -v "${current_dir}/../demo-dvf/src/main/resources/config/custom.elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml" \ 14 | docker.elastic.co/elasticsearch/elasticsearch:7.12.0 15 | -------------------------------------------------------------------------------- /scripts/upgrade-es-version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Usage: 4 | # 5 | # scripts/upgrade-es-version.sh 6 | # 7 | # Sample upgrading from 7.8.0 to 7.10.0: 8 | # 9 | # scripts/upgrade-es-version.sh 7.8.0 7.10.0 10 | # 11 | old_version="$1" 12 | new_version="$2" 13 | 14 | if [[ -z $old_version || -z $new_version ]] 15 | then 16 | echo "Missing argument(s). Usage:" 17 | echo 18 | echo " upgrade-es-version.sh 7.8.0 7.10.0" 19 | echo 20 | exit 1 21 | fi 22 | 23 | # Update configuration files 24 | filepaths=($(rg --files-with-matches --glob "**/*.{xml,yml}" CURRENT_ES_VERSION)) 25 | for filepath in "${filepaths[@]}" 26 | do 27 | sed -i '' -e "s/${old_version}/${new_version}/g" $filepath 28 | echo "✅ ${filepath}" 29 | done 30 | 31 | # Update README 32 | start=$(grep -n MANAGED_BLOCK_RUN_ES_START README.md | cut -f 1 -d :) 33 | end=$(grep -n MANAGED_BLOCK_RUN_ES_END README.md | cut -f 1 -d :) 34 | sed -i '' "${start},${end}s/${old_version}/${new_version}/g" README.md 35 | echo "✅ README.md" 36 | 37 | echo "Finished." 38 | -------------------------------------------------------------------------------- /search/README.md: -------------------------------------------------------------------------------- 1 | # Search 2 | 3 | Search APIs () 4 | -------------------------------------------------------------------------------- /search/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | es-demo-parent 7 | io.mincong 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-demo-search 13 | Elasticsearch Demos - Search 14 | 15 | 16 | 17 | org.elasticsearch.test 18 | framework 19 | test 20 | 21 | 22 | org.apache.logging.log4j 23 | log4j-core 24 | test 25 | 26 | 27 | org.assertj 28 | assertj-core 29 | test 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /search/src/test/java/io/mincong/elasticsearch/BlogDisqus4852306721Test.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.Map; 4 | import org.assertj.core.api.Assertions; 5 | import org.elasticsearch.action.index.IndexRequest; 6 | import org.elasticsearch.action.search.SearchRequest; 7 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 8 | import org.elasticsearch.common.xcontent.XContentType; 9 | import org.elasticsearch.index.query.QueryBuilders; 10 | import org.elasticsearch.rest.RestStatus; 11 | import org.elasticsearch.search.SearchHit; 12 | import org.elasticsearch.search.builder.SearchSourceBuilder; 13 | import org.elasticsearch.test.ESSingleNodeTestCase; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | /** 18 | * Tests boolean query via Search API. 19 | * 20 | *

Comment: https://mincong.io/2019/11/24/essinglenodetestcase/#comment-4852306721 21 | * 22 | * @author Mincong Huang 23 | */ 24 | public class BlogDisqus4852306721Test extends ESSingleNodeTestCase { 25 | 26 | @Override 27 | @Before 28 | public void setUp() throws Exception { 29 | super.setUp(); 30 | 31 | var bulkResponse = 32 | client() 33 | .prepareBulk() 34 | .add( 35 | new IndexRequest("transactions") 36 | .id("account1.tx1") 37 | .source( 38 | Map.of( 39 | "transactionDate", "2020-03-19T00:00:00", 40 | "accountId", "1", 41 | "amount", -10.0), 42 | XContentType.JSON)) 43 | .add( 44 | new IndexRequest("transactions") 45 | .id("account1.tx2") 46 | .source( 47 | Map.of( 48 | "transactionDate", "2020-03-20T00:00:00", 49 | "accountId", "1", 50 | "amount", -20.0), 51 | XContentType.JSON)) 52 | .add( 53 | new IndexRequest("transactions") 54 | .id("account2.tx3") 55 | .source( 56 | Map.of( 57 | "transactionDate", "2020-03-21T00:00:00", 58 | "accountId", "2", 59 | "amount", -30.0), 60 | XContentType.JSON)) 61 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 62 | .execute() 63 | .actionGet(); 64 | 65 | assertEquals(RestStatus.OK, bulkResponse.status()); 66 | for (var r : bulkResponse.getItems()) { 67 | assertEquals(RestStatus.CREATED, r.status()); 68 | } 69 | } 70 | 71 | @Test 72 | public void booleanQuery() { 73 | var sourceBuilder = 74 | QueryBuilders.boolQuery() 75 | .must(QueryBuilders.rangeQuery("transactionDate").gte("2020-03-20").lte("2020-03-28")) 76 | .must(QueryBuilders.matchQuery("accountId", "1")); 77 | 78 | var request = new SearchRequest().source(new SearchSourceBuilder().query(sourceBuilder)); 79 | var response = client().search(request).actionGet(); 80 | 81 | Assertions.assertThat(response.getHits().getHits()) 82 | .hasSize(1) 83 | .extracting(SearchHit::getId) 84 | .containsExactly("account1.tx2"); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /search/src/test/java/io/mincong/elasticsearch/SearchScrollTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Map; 5 | import org.elasticsearch.action.index.IndexRequest; 6 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 7 | import org.elasticsearch.common.unit.TimeValue; 8 | import org.elasticsearch.test.ESSingleNodeTestCase; 9 | import org.junit.Before; 10 | import org.junit.Test; 11 | 12 | /** 13 | * Test Elasticsearch Search Scroll API 14 | * 15 | * @author Mincong Huang 16 | * @see Elasticsearch 18 | * REST API: Request Body Search - Scroll 19 | */ 20 | public class SearchScrollTest extends ESSingleNodeTestCase { 21 | 22 | @Override 23 | @Before 24 | public void setUp() throws Exception { 25 | super.setUp(); 26 | var bulkRequest = client().prepareBulk(); 27 | for (int i = 0; i < 300; i++) { 28 | bulkRequest.add(new IndexRequest("my_index").id(String.valueOf(i)).source(Map.of())); 29 | } 30 | bulkRequest.setRefreshPolicy(RefreshPolicy.IMMEDIATE).execute().actionGet(); 31 | } 32 | 33 | @Test 34 | public void scroll() { 35 | var results = new ArrayList(); 36 | 37 | // first request 38 | var searchResponse = 39 | client() 40 | .prepareSearch() 41 | .setIndices("my_index") 42 | .setSize(100) 43 | .setScroll(TimeValue.timeValueMinutes(1)) 44 | .execute() 45 | .actionGet(); 46 | for (var hit : searchResponse.getHits()) { 47 | results.add(hit.getId()); 48 | } 49 | logger.info( 50 | "results={} ({} new), scrollId={}", 51 | results.size(), 52 | results.size(), 53 | searchResponse.getScrollId()); 54 | 55 | // more requests 56 | var scrollId = searchResponse.getScrollId(); 57 | var hasNext = !results.isEmpty(); 58 | while (hasNext) { 59 | var resp = 60 | client() 61 | .prepareSearchScroll(scrollId) 62 | .setScroll(TimeValue.timeValueMinutes(1)) 63 | .execute() 64 | .actionGet(); 65 | var newResults = new ArrayList(); 66 | for (var hit : resp.getHits()) { 67 | newResults.add(hit.getId()); 68 | } 69 | results.addAll(newResults); 70 | logger.info( 71 | "results={} ({} new), scrollId={}", 72 | results.size(), 73 | newResults.size(), 74 | resp.getScrollId()); 75 | 76 | hasNext = !newResults.isEmpty(); 77 | scrollId = resp.getScrollId(); 78 | } 79 | 80 | assertEquals(300, results.size()); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /search/src/test/java/io/mincong/elasticsearch/SearchTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.Map; 4 | import org.elasticsearch.action.bulk.BulkItemResponse; 5 | import org.elasticsearch.action.bulk.BulkResponse; 6 | import org.elasticsearch.action.index.IndexRequest; 7 | import org.elasticsearch.action.search.SearchResponse; 8 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 9 | import org.elasticsearch.common.xcontent.XContentType; 10 | import org.elasticsearch.index.query.QueryBuilders; 11 | import org.elasticsearch.rest.RestStatus; 12 | import org.elasticsearch.search.SearchHits; 13 | import org.elasticsearch.test.ESSingleNodeTestCase; 14 | import org.junit.Before; 15 | import org.junit.Test; 16 | 17 | /** 18 | * Tests Search APIs. 19 | * 20 | * @author Mincong Huang 21 | * @see Search 23 | * APIs | Java REST Client | Elastic 24 | */ 25 | public class SearchTest extends ESSingleNodeTestCase { 26 | 27 | @Override 28 | @Before 29 | public void setUp() throws Exception { 30 | super.setUp(); 31 | 32 | BulkResponse response = 33 | client() 34 | .prepareBulk() 35 | .add( 36 | new IndexRequest("users") 37 | .id("sansa") 38 | .source( 39 | Map.of( 40 | "firstName", "Sansa", 41 | "lastName", "Stark", 42 | "gender", "female", 43 | "house", "House Stark"), 44 | XContentType.JSON)) 45 | .add( 46 | new IndexRequest("users") 47 | .id("arya") 48 | .source( 49 | Map.of( 50 | "firstName", "Arya", 51 | "lastName", "Stark", 52 | "gender", "female", 53 | "house", "House Stark"), 54 | XContentType.JSON)) 55 | .add( 56 | new IndexRequest("users") 57 | .id("tyrion") 58 | .source( 59 | Map.of( 60 | "firstName", 61 | "Tyrion", 62 | "lastName", 63 | "Lannister", 64 | "gender", 65 | "male", 66 | "house", 67 | "House Lannister"), 68 | XContentType.JSON)) 69 | .add( 70 | new IndexRequest("users") 71 | .id("jaime") 72 | .source( 73 | Map.of( 74 | "firstName", "Jaime", 75 | "lastName", "Lannister", 76 | "gender", "male", 77 | "house", "House Lannister"), 78 | XContentType.JSON)) 79 | .add( 80 | new IndexRequest("users") 81 | .id("cersei") 82 | .source( 83 | Map.of( 84 | "firstName", "Cersei", 85 | "lastName", "Lannister", 86 | "gender", "female", 87 | "house", "House Lannister"), 88 | XContentType.JSON)) 89 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 90 | .execute() 91 | .actionGet(); 92 | 93 | assertEquals(RestStatus.OK, response.status()); 94 | for (BulkItemResponse r : response.getItems()) { 95 | assertEquals(RestStatus.CREATED, r.status()); 96 | } 97 | } 98 | 99 | /** 100 | * Search API. 101 | * 102 | * @see Search 104 | * API | Java REST Client | Elastic 105 | */ 106 | @Test 107 | public void searchApi_termQueryQuery() { 108 | SearchResponse response = 109 | client() 110 | .prepareSearch("users") 111 | .setQuery(QueryBuilders.termQuery("lastName", "stark")) 112 | .get(); 113 | 114 | SearchHits hits = response.getHits(); 115 | assertEquals(2L, hits.getTotalHits().value); 116 | assertEquals("sansa", hits.getHits()[0].getId()); 117 | assertEquals("arya", hits.getHits()[1].getId()); 118 | } 119 | 120 | @Test 121 | public void searchApi_allMatchQuery() { 122 | SearchResponse response = 123 | client() // 124 | .prepareSearch("users") 125 | .setQuery(QueryBuilders.matchAllQuery()) 126 | .get(); 127 | 128 | SearchHits hits = response.getHits(); 129 | assertEquals(5L, hits.getTotalHits().value); 130 | } 131 | 132 | @Test 133 | public void searchApi_matchPhraseQuery() { 134 | SearchResponse response = 135 | client() // 136 | .prepareSearch("users") 137 | .setQuery(QueryBuilders.matchPhraseQuery("house", "House Lannister")) 138 | .get(); 139 | 140 | SearchHits hits = response.getHits(); 141 | assertEquals(3L, hits.getTotalHits().value); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /snapshot/README.md: -------------------------------------------------------------------------------- 1 | # Snapshot and Restore 2 | 3 | Snapshot and restore in Elasticsearch. 4 | 5 | ## Prerequisite 6 | 7 | ```sh 8 | # 9 | # Start docker image: 10 | # - Use single-node discovery mode to bypass the bootstrap checks 11 | # - Use /tmp as the root path for the snapshot repositories 12 | # - Publish port 9200 to communicate with docker image 13 | # 14 | docker run \ 15 | -e discovery.type=single-node \ 16 | -e path.repo=/tmp \ 17 | -p 9200:9200 \ 18 | docker.elastic.co/elasticsearch/elasticsearch:7.6.2 19 | ``` 20 | 21 | ## APIs 22 | 23 | API | Method | Description 24 | :--- | :---: | :--- 25 | `/_snapshot/` | GET | List snapshot repositories. 26 | `/_snapshot/_status` | GET | Get all currently running snapshots with detailed status information. 27 | `/_snapshot/{repo}` | GET | Get settings of a snapshot repository. 28 | `/_snapshot/{repo}` | PUT | Add a new snapshot repository or edit the settings of an existing repository. 29 | `/_snapshot/{repo}/_all` | GET | List all snapshots inside the given repository. 30 | `/_snapshot/{repo}/_current` | GET | List all currently running snapshots inside the given repository. 31 | `/_snapshot/{repo}/_status` | GET | Get all currently running snapshots of the given repository with detailed status information. 32 | `/_snapshot/{repo}/{snapshot}` | GET | Get information about a single snapshot or multiple snapshots (using separator "," or wildcard expression "\*") 33 | `/_snapshot/{repo}/{snapshot}` | DELETE | Deletes one or multiple snapshots. 34 | `/_snapshot/{repo}/{snapshot}/_restore` | POST | Restore a snapshot of a cluster. 35 | `/_snapshot/{repo}/{snapshot}/_status` | GET | Get a detailed description of the current state for each shard partitipcating in the snapshot. 36 | `/_cat/snapshots/{repo}` | GET | List snapshots of a repository. 37 | `/_cat/recovery` | GET | List all the recoveries including snapshot recoveries, including restores 38 | 39 | ### Create Snapshot Repository 40 | 41 | Create a new snapshot repository `fs_backup` in local file-system for backup 42 | purpose: 43 | 44 | ``` 45 | PUT /_snapshot/{repository} 46 | { 47 | "type": "fs", 48 | "settings": { 49 | "location": "my_backup_location" 50 | } 51 | } 52 | ``` 53 | 54 | ```sh 55 | curl -X PUT localhost:9200/_snapshot/fs_backup \ 56 | -H 'Content-Type: application/json' \ 57 | -d ' 58 | { 59 | "type": "fs", 60 | "settings": { 61 | "location": "/tmp" 62 | } 63 | }' 64 | # {"acknowledged":true} 65 | ``` 66 | 67 | Other repository backends are available in these official plugins: 68 | 69 | - [repository-s3](https://www.elastic.co/guide/en/elasticsearch/plugins/7.7/repository-s3.html) 70 | for S3 repository support 71 | - [repository-hdfs](https://www.elastic.co/guide/en/elasticsearch/plugins/7.7/repository-hdfs.html) 72 | for HDFS repository support in Hadoop environments 73 | - [repository-azure](https://www.elastic.co/guide/en/elasticsearch/plugins/7.7/repository-azure.html) 74 | for Azure storage repositories 75 | - [repository-gcs](https://www.elastic.co/guide/en/elasticsearch/plugins/7.7/repository-gcs.html) 76 | for Google Cloud Storage repositories 77 | 78 | ### Get Snapshot Repositories 79 | 80 | Retrieve information about all registered snapshot repositories 81 | 82 | ``` 83 | GET /_snapshot 84 | ``` 85 | ``` 86 | GET /_snapshot/_all 87 | ``` 88 | ```sh 89 | curl localhost:9200/_snapshot?pretty 90 | # { 91 | # "fs_backup" : { 92 | # "type" : "fs", 93 | # "settings" : { 94 | # "location" : "/tmp" 95 | # } 96 | # } 97 | # } 98 | ``` 99 | 100 | ### Get Snapshot Repository 101 | 102 | Retrieve information about one snapshot repository. 103 | 104 | ``` 105 | GET /_snapshot/{repository} 106 | ``` 107 | 108 | ### Get Snapshots 109 | 110 | Retrieve information about all snapshots inside one snapshot repository. 111 | 112 | ``` 113 | GET /_snapshot/{repository}/_all 114 | ``` 115 | 116 | ## References 117 | 118 | - Elastic, "Snapshot and restore | Elasticsearch Reference \[7.6\]", _Elastic_, 2020. 119 | 120 | -------------------------------------------------------------------------------- /snapshot/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | es-demo-parent 7 | io.mincong 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | es-demo-snapshot 13 | Elasticsearch Demos - Snapshot 14 | 15 | 16 | 17 | org.elasticsearch.test 18 | framework 19 | test 20 | 21 | 22 | org.apache.logging.log4j 23 | log4j-core 24 | test 25 | 26 | 27 | org.assertj 28 | assertj-core 29 | test 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /snapshot/repository.md: -------------------------------------------------------------------------------- 1 | # Snapshot Repository 2 | 3 | Snapshot repository understanding in Elasticsearch. 4 | 5 | ## Prerequisite 6 | 7 | See 8 | 9 | ## Index 10 | 11 | There is one index "transactions" in the cluster: 12 | 13 | ```sh 14 | curl -s localhost:9200/_cat/indices 15 | ``` 16 | 17 | ``` 18 | yellow open transactions P59v6s2PS-GnaCrXgPv2Zg 1 1 10000 0 2.5mb 2.5mb 19 | ``` 20 | 21 | According to the settings of this index, we can see that 22 | `P59v6s2PS-GnaCrXgPv2Zg` is the UUID of the index: 23 | 24 | ```sh 25 | curl -s localhost:9200/transactions/_settings | jq 26 | ``` 27 | 28 | ```json 29 | { 30 | "transactions": { 31 | "settings": { 32 | "index": { 33 | "routing": { 34 | "allocation": { 35 | "include": { 36 | "_tier_preference": "data_content" 37 | } 38 | } 39 | }, 40 | "number_of_shards": "1", 41 | "provided_name": "transactions", 42 | "creation_date": "1610896271859", 43 | "number_of_replicas": "1", 44 | "uuid": "P59v6s2PS-GnaCrXgPv2Zg", 45 | "version": { 46 | "created": "7100199" 47 | } 48 | } 49 | } 50 | } 51 | } 52 | ``` 53 | 54 | ## Snapshots 55 | 56 | ```sh 57 | curl -s localhost:9200/_snapshot/dvf/_all | jq 58 | ``` 59 | 60 | ```json 61 | { 62 | "snapshots": [ 63 | { 64 | "snapshot": "transactions.2021-01-10", 65 | "uuid": "DsseRXnjTwC_tcV0-VgAww", 66 | "version_id": 7100199, 67 | "version": "7.10.1", 68 | "indices": [ 69 | "transactions" 70 | ], 71 | "data_streams": [], 72 | "include_global_state": false, 73 | "state": "SUCCESS", 74 | "start_time": "2021-01-17T15:11:28.179Z", 75 | "start_time_in_millis": 1610896288179, 76 | "end_time": "2021-01-17T15:11:28.981Z", 77 | "end_time_in_millis": 1610896288981, 78 | "duration_in_millis": 802, 79 | "failures": [], 80 | "shards": { 81 | "total": 1, 82 | "failed": 0, 83 | "successful": 1 84 | } 85 | } 86 | ] 87 | } 88 | ``` 89 | 90 | ## Repository 91 | 92 | _What is the structure of the snapshot repository?_ 93 | 94 | By inspecting the directory of the repository "dvf", we can see the content as 95 | below: 96 | 97 | ```sh 98 | # 99 | # > pwd 100 | # /Users/minconghuang/es-backup/demo-dvf/dvf 101 | # 102 | tree . 103 | . 104 | ├── index-4 105 | ├── index.latest 106 | ├── indices 107 | │   └── toVoOSewT8eO7PbggT7SaA 108 | │   ├── 0 109 | │   │   ├── __7wvBjFfGSouvbPEOI53iMg 110 | │   │   ├── __AsxBUR80T3u6_HCXfAoUjg 111 | │   │   ├── __EjbTOvH4SDOx782QF8L3Ag 112 | │   │   ├── __I3G4iKJ8QvmG44-mX-qw_w 113 | │   │   ├── __KfAc01JpQHW2p9qMTMkeYA 114 | │   │   ├── __Lu_uYoC2RqSYF1Wv5GEchQ 115 | │   │   ├── __NOunPv39SU-IcZHUPjfFSw 116 | │   │   ├── __PhszAfkJRqyfCQme7HXTlw 117 | │   │   ├── __PwGnRH2uRj-E8o5h18l4jA 118 | │   │   ├── __T27uxGqgQoOj_0p86PDyuA 119 | │   │   ├── __YN69iWCtRqqzjFJ9Bgk68w 120 | │   │   ├── __Ywb0Yr67TH-S4tOHjnaoVQ 121 | │   │   ├── __a0QKSHNlT8W-HouU8hVDUw 122 | │   │   ├── __dXx6hK70Q8CuhvqtmOZx4Q 123 | │   │   ├── __fbcWE1PUSXuYUjzvq0hcqA 124 | │   │   ├── __ik7tosUWSSeHFq0Zli8vkA 125 | │   │   ├── index-qXJ7Ux1WSH6w8jELeKiwPA 126 | │   │   └── snap-DsseRXnjTwC_tcV0-VgAww.dat 127 | │   └── meta-l2TmEHcBch0uJWW8rNjp.dat 128 | ├── meta-DsseRXnjTwC_tcV0-VgAww.dat 129 | └── snap-DsseRXnjTwC_tcV0-VgAww.dat 130 | 131 | 3 directories, 23 files 132 | ``` 133 | 134 | File `index-4` contains the names of all the snapshots in the repository. 135 | 136 | ``` 137 | cat index-4 | jq 138 | ``` 139 | 140 | ```js 141 | { 142 | "snapshots": [ 143 | { 144 | "name": "transactions.2021-01-10", 145 | "uuid": "DsseRXnjTwC_tcV0-VgAww", 146 | "state": 1, 147 | "index_metadata_lookup": { 148 | /* 149 | * Index/metadata lookup table, where the key is the index ID and the 150 | * value is ??? 151 | */ 152 | "toVoOSewT8eO7PbggT7SaA": "umyNu_9iRE65F5RYKcS21A-_na_-1-3-1" 153 | }, 154 | "version": "7.10.1" 155 | } 156 | ], 157 | "indices": { 158 | "transactions": { 159 | /* 160 | * This ID is for index "transactions" in the snapshot repository. 161 | * 162 | * Questions: 163 | * - Do we have the timestamps related to this index? 164 | */ 165 | "id": "toVoOSewT8eO7PbggT7SaA", 166 | "snapshots": [ 167 | "DsseRXnjTwC_tcV0-VgAww" 168 | ], 169 | "shard_generations": [ 170 | "qXJ7Ux1WSH6w8jELeKiwPA" 171 | ] 172 | } 173 | }, 174 | "min_version": "7.9.0", 175 | "index_metadata_identifiers": { 176 | /* 177 | * l2TmEHcBch0uJWW8rNjp is the ID of the metadata inside the snapshot 178 | * transactions.2021-01-10 (DsseRXnjTwC_tcV0-VgAww). The relative path 179 | * inside the repository dvf for this file is: 180 | * 181 | * indices/toVoOSewT8eO7PbggT7SaA/meta-l2TmEHcBch0uJWW8rNjp.dat 182 | * 183 | */ 184 | "umyNu_9iRE65F5RYKcS21A-_na_-1-3-1": "l2TmEHcBch0uJWW8rNjp" 185 | } 186 | } 187 | ``` 188 | 189 | ## Next Steps 190 | 191 | How to go further? 192 | 193 | - Find or implement a tool to translate SMILE JSON to normal JSON then inspect 194 | the JSON content. We can do that using Jackson "jackson-dataformat-smile", see 195 | 196 | 197 | ## References 198 | 199 | - Konrad Beiske, "Snapshot And Restore", Elasticsearch, 2014. 200 | 201 | -------------------------------------------------------------------------------- /snapshot/src/test/java/io/mincong/elasticsearch/ConcurrentSnapshotDeletionTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.concurrent.ExecutionException; 4 | import org.assertj.core.api.Assertions; 5 | import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotAction; 6 | import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequestBuilder; 7 | import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotAction; 8 | import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequestBuilder; 9 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 10 | import org.elasticsearch.cluster.metadata.RepositoryMetadata; 11 | import org.elasticsearch.common.settings.Settings; 12 | import org.elasticsearch.common.xcontent.XContentType; 13 | import org.elasticsearch.rest.RestStatus; 14 | import org.elasticsearch.test.ESSingleNodeTestCase; 15 | import org.junit.Before; 16 | import org.junit.Test; 17 | 18 | /** 19 | * Concurrent snapshot deletion in Elasticsearch. 20 | * 21 | * @author Mincong Huang 22 | */ 23 | public class ConcurrentSnapshotDeletionTest extends ESSingleNodeTestCase { 24 | 25 | @Override 26 | @Before 27 | public void setUp() throws Exception { 28 | super.setUp(); 29 | insertDocs(); 30 | createRepo(); 31 | createSnapshots(); 32 | } 33 | 34 | private void insertDocs() { 35 | client() 36 | .prepareIndex() 37 | .setIndex("users") 38 | .setId("user1") 39 | .setSource("{\"name\":\"Tom\"}", XContentType.JSON) 40 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 41 | .execute(); 42 | 43 | client() 44 | .prepareIndex() 45 | .setIndex("companies") 46 | .setId("elastic") 47 | .setSource("{\"name\":\"Elastic\"}", XContentType.JSON) 48 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 49 | .execute(); 50 | } 51 | 52 | public void createRepo() { 53 | // create repository "snapshotRepository" before creating snapshot 54 | var repoStr = client().settings().get("path.repo"); 55 | var settings = 56 | Settings.builder() // 57 | .put("location", repoStr) 58 | .put("compress", true) 59 | .build(); 60 | var acknowledgedResponse = 61 | client() 62 | .admin() 63 | .cluster() 64 | .preparePutRepository("snapshotRepository") 65 | .setType("fs") 66 | .setSettings(settings) 67 | .setVerify(true) 68 | .get(); 69 | Assertions.assertThat(acknowledgedResponse.isAcknowledged()).isTrue(); 70 | var repositories = 71 | client() 72 | .admin() 73 | .cluster() 74 | .prepareGetRepositories("snapshotRepository") 75 | .get() 76 | .repositories(); 77 | Assertions.assertThat(repositories) 78 | .extracting(RepositoryMetadata::name) 79 | .containsExactly("snapshotRepository"); 80 | } 81 | 82 | private void createSnapshots() throws Exception { 83 | var requestU = 84 | new CreateSnapshotRequestBuilder(client(), CreateSnapshotAction.INSTANCE) 85 | .setIndices("users") 86 | .setSnapshot("users-snapshot") 87 | .setRepository("snapshotRepository") 88 | .setWaitForCompletion(true) 89 | .request(); 90 | 91 | var requestC = 92 | new CreateSnapshotRequestBuilder(client(), CreateSnapshotAction.INSTANCE) 93 | .setIndices("companies") 94 | .setSnapshot("companies-snapshot") 95 | .setRepository("snapshotRepository") 96 | .setWaitForCompletion(true) 97 | .request(); 98 | 99 | var responseU = client().admin().cluster().createSnapshot(requestU).get(); 100 | var infoU = responseU.getSnapshotInfo(); 101 | Assertions.assertThat(infoU.failedShards()).isZero(); 102 | Assertions.assertThat(infoU.successfulShards()).isGreaterThan(0); 103 | Assertions.assertThat(infoU.indices()).containsExactly("users"); 104 | Assertions.assertThat(infoU.status()).isEqualTo(RestStatus.OK); 105 | 106 | var responseC = client().admin().cluster().createSnapshot(requestC).get(); 107 | var infoC = responseC.getSnapshotInfo(); 108 | Assertions.assertThat(infoC.failedShards()).isZero(); 109 | Assertions.assertThat(infoC.successfulShards()).isGreaterThan(0); 110 | Assertions.assertThat(infoC.indices()).containsExactly("companies"); 111 | Assertions.assertThat(infoC.status()).isEqualTo(RestStatus.OK); 112 | } 113 | 114 | @Test 115 | public void removeSnapshot() throws ExecutionException, InterruptedException { 116 | var requestU = 117 | new DeleteSnapshotRequestBuilder(client(), DeleteSnapshotAction.INSTANCE) 118 | .setSnapshots("users-snapshot") 119 | .setRepository("snapshotRepository") 120 | .request(); 121 | var requestC = 122 | new DeleteSnapshotRequestBuilder(client(), DeleteSnapshotAction.INSTANCE) 123 | .setSnapshots("companies-snapshot") 124 | .setRepository("snapshotRepository") 125 | .request(); 126 | 127 | var responseU = client().admin().cluster().deleteSnapshot(requestU).get(); 128 | var responseC = client().admin().cluster().deleteSnapshot(requestC).get(); 129 | Assertions.assertThat(responseU.isAcknowledged()).isTrue(); 130 | Assertions.assertThat(responseC.isAcknowledged()).isTrue(); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /snapshot/src/test/java/io/mincong/elasticsearch/SnapshotStateDemoTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import org.assertj.core.api.Assertions; 4 | import org.elasticsearch.action.admin.cluster.state.ClusterStateRequest; 5 | import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; 6 | import org.elasticsearch.cluster.RestoreInProgress; 7 | import org.elasticsearch.cluster.SnapshotDeletionsInProgress; 8 | import org.elasticsearch.cluster.SnapshotsInProgress; 9 | import org.elasticsearch.test.ESSingleNodeTestCase; 10 | import org.junit.Test; 11 | 12 | /** 13 | * Retrieves snapshot operations progress via cluster state: snapshots-in-progress, 14 | * restore-in-progress, snapshot-deletions-in-progress. 15 | * 16 | *

These tests are not real tests, they are written for demo purpose. 17 | * 18 | * @author Mincong Huang 19 | */ 20 | public class SnapshotStateDemoTest extends ESSingleNodeTestCase { 21 | 22 | @Test 23 | public void methodPrepareState() { 24 | ClusterStateResponse r = 25 | client() 26 | .admin() // 27 | .cluster() 28 | .prepareState() 29 | .clear() 30 | .setCustoms(true) 31 | .get(); 32 | 33 | RestoreInProgress restore = r.getState().custom(RestoreInProgress.TYPE); 34 | SnapshotsInProgress snapshots = r.getState().custom(SnapshotsInProgress.TYPE); 35 | SnapshotDeletionsInProgress deletions = r.getState().custom(SnapshotDeletionsInProgress.TYPE); 36 | 37 | Assertions.assertThat(restore).isNull(); 38 | Assertions.assertThat(snapshots).isNull(); 39 | Assertions.assertThat(deletions).isNull(); 40 | } 41 | 42 | @Test 43 | public void methodClusterStateRequest() { 44 | ClusterStateRequest request = new ClusterStateRequest().clear().customs(true); 45 | ClusterStateResponse r = 46 | client() 47 | .admin() // 48 | .cluster() 49 | .state(request) 50 | .actionGet(); 51 | 52 | RestoreInProgress restore = r.getState().custom(RestoreInProgress.TYPE); 53 | SnapshotsInProgress snapshots = r.getState().custom(SnapshotsInProgress.TYPE); 54 | SnapshotDeletionsInProgress deletions = r.getState().custom(SnapshotDeletionsInProgress.TYPE); 55 | 56 | Assertions.assertThat(restore).isNull(); 57 | Assertions.assertThat(snapshots).isNull(); 58 | Assertions.assertThat(deletions).isNull(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /snapshot/src/test/java/io/mincong/elasticsearch/SnapshotTest.java: -------------------------------------------------------------------------------- 1 | package io.mincong.elasticsearch; 2 | 3 | import java.util.concurrent.ExecutionException; 4 | import org.assertj.core.api.Assertions; 5 | import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotAction; 6 | import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotRequestBuilder; 7 | import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotAction; 8 | import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequestBuilder; 9 | import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotAction; 10 | import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotRequestBuilder; 11 | import org.elasticsearch.action.admin.indices.close.CloseIndexResponse.IndexResult; 12 | import org.elasticsearch.action.support.WriteRequest.RefreshPolicy; 13 | import org.elasticsearch.cluster.metadata.RepositoryMetadata; 14 | import org.elasticsearch.common.settings.Settings; 15 | import org.elasticsearch.common.xcontent.XContentType; 16 | import org.elasticsearch.index.Index; 17 | import org.elasticsearch.rest.RestStatus; 18 | import org.elasticsearch.test.ESSingleNodeTestCase; 19 | import org.junit.After; 20 | import org.junit.Before; 21 | import org.junit.Test; 22 | 23 | /** 24 | * Snapshot and restore in Elasticsearch. 25 | * 26 | * @author Mincong Huang 27 | */ 28 | public class SnapshotTest extends ESSingleNodeTestCase { 29 | 30 | @Override 31 | @Before 32 | public void setUp() throws Exception { 33 | super.setUp(); 34 | 35 | client() 36 | .prepareIndex() 37 | .setIndex("users") 38 | .setId("user1") 39 | .setSource("{\"name\":\"Tom\"}", XContentType.JSON) 40 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 41 | .execute(); 42 | 43 | client() 44 | .prepareIndex() 45 | .setIndex("companies") 46 | .setId("elastic") 47 | .setSource("{\"name\":\"Elastic\"}", XContentType.JSON) 48 | .setRefreshPolicy(RefreshPolicy.IMMEDIATE) 49 | .execute(); 50 | } 51 | 52 | @Override 53 | @After 54 | public void tearDown() throws Exception { 55 | removeSnapshot(); 56 | super.tearDown(); 57 | } 58 | 59 | @Test 60 | public void createSnapshot() throws Exception { 61 | // create repository "snapshotRepository" before creating snapshot 62 | var repoStr = client().settings().get("path.repo"); 63 | var settings = 64 | Settings.builder() // 65 | .put("location", repoStr) 66 | .put("compress", true) 67 | .build(); 68 | var acknowledgedResponse = 69 | client() 70 | .admin() 71 | .cluster() 72 | .preparePutRepository("snapshotRepository") 73 | .setType("fs") 74 | .setSettings(settings) 75 | .setVerify(true) 76 | .get(); 77 | Assertions.assertThat(acknowledgedResponse.isAcknowledged()).isTrue(); 78 | var repositories = 79 | client() 80 | .admin() 81 | .cluster() 82 | .prepareGetRepositories("snapshotRepository") 83 | .get() 84 | .repositories(); 85 | Assertions.assertThat(repositories) 86 | .extracting(RepositoryMetadata::name) 87 | .containsExactly("snapshotRepository"); 88 | 89 | // create snapshot using "snapshotRepository" 90 | var createSnapshotRequest = 91 | new CreateSnapshotRequestBuilder(client(), CreateSnapshotAction.INSTANCE) 92 | .setIndices("users", "companies") 93 | .setSnapshot("snapshot1") 94 | .setRepository("snapshotRepository") 95 | .setWaitForCompletion(true) 96 | .request(); 97 | 98 | var createSnapshotResponse = 99 | client().admin().cluster().createSnapshot(createSnapshotRequest).get(); 100 | var snapshotInfo = createSnapshotResponse.getSnapshotInfo(); 101 | Assertions.assertThat(snapshotInfo.failedShards()).isZero(); 102 | Assertions.assertThat(snapshotInfo.successfulShards()).isGreaterThan(0); 103 | Assertions.assertThat(snapshotInfo.indices()).containsExactlyInAnyOrder("users", "companies"); 104 | Assertions.assertThat(snapshotInfo.status()).isEqualTo(RestStatus.OK); 105 | } 106 | 107 | @Test 108 | public void restoreSnapshot() throws Exception { 109 | createSnapshot(); 110 | 111 | /* 112 | * The restore operation can be performed on a functioning cluster. 113 | * However, an existing index can be only restored if it’s closed 114 | * and has the same number of shards as the index in the snapshot. 115 | */ 116 | var closeIndexResponse = client().admin().indices().prepareClose("users", "companies").get(); 117 | Assertions.assertThat(closeIndexResponse.getIndices()) 118 | .extracting(IndexResult::getIndex) 119 | .extracting(Index::getName) 120 | .containsExactlyInAnyOrder("users", "companies"); 121 | 122 | // restore snapshot using "snapshotRepository" 123 | var restoreSnapshotRequest = 124 | new RestoreSnapshotRequestBuilder(client(), RestoreSnapshotAction.INSTANCE) 125 | .setIndices("users", "companies") 126 | .setSnapshot("snapshot1") 127 | .setRepository("snapshotRepository") 128 | .setWaitForCompletion(true) 129 | .request(); 130 | 131 | var restoreSnapshotResponse = 132 | client().admin().cluster().restoreSnapshot(restoreSnapshotRequest).get(); 133 | var restoreInfo = restoreSnapshotResponse.getRestoreInfo(); 134 | Assertions.assertThat(restoreInfo.failedShards()).isZero(); 135 | Assertions.assertThat(restoreInfo.successfulShards()).isGreaterThan(0); 136 | Assertions.assertThat(restoreInfo.indices()).containsExactlyInAnyOrder("users", "companies"); 137 | Assertions.assertThat(restoreInfo.status()).isEqualTo(RestStatus.OK); 138 | } 139 | 140 | private void removeSnapshot() throws ExecutionException, InterruptedException { 141 | var deleteSnapshot = 142 | new DeleteSnapshotRequestBuilder(client(), DeleteSnapshotAction.INSTANCE) 143 | .setSnapshots("snapshot1") 144 | .setRepository("snapshotRepository") 145 | .request(); 146 | 147 | var acknowledgeResponse = client().admin().cluster().deleteSnapshot(deleteSnapshot).get(); 148 | Assertions.assertThat(acknowledgeResponse.isAcknowledged()).isTrue(); 149 | } 150 | } 151 | --------------------------------------------------------------------------------