├── 01-operations.md ├── 02-query-profiling.txt ├── 03a-multi-fields.txt ├── 03b-multi-fields-exercises.txt ├── 04-function-score-diagrams ├── gauss-function-score.svg └── linear-function-score.svg ├── 04a-function-score.txt ├── 04b-function-score.md ├── 04c-function-score-exercises.md ├── 05a-aggregations.txt ├── 05b-aggregations-exercises.md ├── 06-relationship-diagrams ├── ListingUpgrades.svg └── ParentChild.svg ├── 06a-Relationships.md ├── 06b-nested-objects.txt ├── 06c-nested-objects-exercises.md ├── 07-routing-diagrams ├── DefaultRouting.svg ├── DefaultRoutingGet.svg ├── DefaultSearchRouting.svg ├── ExplicitRouting.svg ├── ExplicitRoutingGet.svg └── ExplicitRoutingSearch.svg ├── 07a-routing.md ├── 07b-parent-child.txt ├── 07c-parent-child-exercises.md ├── 08a-collapse.txt ├── 08b-collapse-exercises.md ├── 09a-geo-point.txt ├── 09b-geo-point-exercises.md ├── 10a-geo-shape.txt ├── 10b-geo-shape-excercises.md ├── 11a-term-suggesters.txt ├── 11b-term-suggesters-excercises.md ├── 12a-completion-suggesters.txt ├── 12b-completion-suggesters-exercises.md ├── 13a-percolator.txt ├── 13b-percolator-exercises.md ├── ElasticsearchOps.pptx ├── LICENSE ├── Other-useful-stuff.txt ├── README.md ├── additional-material ├── README.md ├── macos-syslog-logstash │ └── macos-syslog-logstash.config ├── search-as-you-type.txt └── simple-logstash │ └── simple-logstash.config ├── childcare-centres ├── Childcare_Centres.csv ├── README.md ├── feed-index.sh ├── logstash │ └── logstash.config └── mapping-template.json ├── docker-compose.multi-node.yml ├── docker-compose.yml ├── scripts ├── destroy ├── down ├── logstash ├── up └── up-multi-node └── solutions ├── 03-multi-fields-solutions.txt ├── 04-function-score-solutions.txt ├── 05-aggregations-solutions.txt ├── 06-nested-objects-solutions.txt ├── 07-parent-child-solutions.txt ├── 08-collapse-solutions.txt ├── 09-geo-point-solutions.txt ├── 10-geo-shape-solutions.txt ├── 11-term-suggesters-solutions.txt ├── 12-completion-suggesters-solutions.txt └── 13-percolator-solutions.txt /01-operations.md: -------------------------------------------------------------------------------- 1 | 2 | # Running multiple Elasticsearch Nodes 3 | 4 | **NOTE: If running on a mac, you will need to give the Docker VM _lots_ of memory.** 5 | 6 | Feel free to skip this. 7 | 8 | To run multiple instances of Elasticsearch on your local machine: 9 | 10 | ```bash 11 | ./scripts/up-multi-node 12 | ``` 13 | 14 | Wait a while. It will take quite a while for everything to start! 15 | 16 | You will now have: 17 | 18 | - A three-node Elasticsearch cluster for containing user data 19 | - A one-node Elasticsearch cluster for containing monitoring data 20 | - Two Kibana instances: 21 | - One for the main three node cluster at http://localhost:15601/ 22 | - One for the monitoring cluster at http://localhost:25601/ 23 | 24 | Have a look at the Monitoring tab in Monitoring Kibanana 25 | http://localhost:25601/app/monitoring 26 | 27 | # Catalog 28 | 29 | Look at: 30 | 31 | - http://localhost:9200/_cat 32 | - http://localhost:9200/_cat/indices?v 33 | 34 | 35 | # Controlling shard and replica counts 36 | 37 | Open the dev tools tab of the main cluster 38 | 39 | http://localhost:15601/app/dev_tools#/console 40 | 41 | Try the following: 42 | 43 | ``` 44 | DELETE /test 45 | 46 | PUT /test 47 | { 48 | "settings": { 49 | "number_of_replicas": 1, 50 | "number_of_shards": 7 51 | } 52 | } 53 | 54 | POST /test/_doc 55 | { 56 | "title": "test doc" 57 | } 58 | ``` 59 | 60 | Back in the monitoring Kibana: 61 | To see the shard allocation, navigate to: 62 | 63 | `Monitoring > Indices > test` 64 | 65 | (scroll to the bottom of the page). 66 | 67 | ## Updating the replica count 68 | 69 | ``` 70 | PUT /test/_settings 71 | { 72 | "number_of_replicas": 5 73 | } 74 | ``` 75 | 76 | Look at shard allocation again (it may take a little while to update). 77 | 78 | Use the "cat" interface to determine the health: 79 | 80 | ``` 81 | GET /_cat/indices?v 82 | ``` 83 | 84 | Change the replica count back to 1. 85 | 86 | ``` 87 | PUT /test/_settings 88 | { 89 | "number_of_replicas": 1 90 | } 91 | ``` 92 | 93 | And see how the replicas are never allocated to the same nodes as the primaries. 94 | 95 | ## You cannot update the shard count 96 | 97 | Try the following **and see it fail** 98 | 99 | ``` 100 | PUT /test/_settings 101 | { 102 | "number_of_shards": 4 103 | } 104 | ``` 105 | 106 | **You cannot change the shard count on an index.** 107 | 108 | 109 | # Cluster resiliency 110 | 111 | Kill one of the nodes (not the master one yet). 112 | 113 | Look at the allocation of shards to nodes. 114 | 115 | Wait 60 seconds and have another look. 116 | You should find that the shards have been re-allocated. 117 | 118 | Restart the node and have another look. 119 | 120 | 121 | # Master re-election 122 | 123 | Kill the master node. 124 | 125 | Note how the cluster responds fairly quickly to the death of the master node, 126 | but it still takes about a minute for the shards to be rebalanced. 127 | 128 | 129 | # Finally 130 | 131 | Shutdown: 132 | 133 | ``` 134 | ./scripts/destroy 135 | ``` 136 | -------------------------------------------------------------------------------- /02-query-profiling.txt: -------------------------------------------------------------------------------- 1 | DELETE /test 2 | 3 | POST /test/_doc 4 | { 5 | "title": "Elasticsearch Hands On" 6 | } 7 | 8 | # For an explanation, see https://www.elastic.co/guide/en/elasticsearch/reference/7.0/search-profile.html 9 | GET /test/_search 10 | { 11 | "profile": true, 12 | "query": { 13 | "bool": { 14 | "must": [ 15 | { 16 | "match": { 17 | "title": { 18 | "query": "Elasticsearch Tutorial" 19 | } 20 | } 21 | } 22 | ], 23 | "must_not": [ 24 | { 25 | "match": { 26 | "title": "online" 27 | } 28 | } 29 | ] 30 | } 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /03a-multi-fields.txt: -------------------------------------------------------------------------------- 1 | DELETE /listings 2 | 3 | # Refresher on analysers 4 | 5 | # Try this with standard, english, whitespace, and simple 6 | GET /_analyze 7 | { 8 | "analyzer": "standard", 9 | "text": ["1/511 Churches St, Richmond 3056"] 10 | } 11 | 12 | 13 | # Note the three "sub-fields" of address, with three different analysers 14 | PUT /listings 15 | { 16 | "mappings": { 17 | "properties": { 18 | "address": { 19 | "type": "text", 20 | "analyzer": "standard", 21 | "fields": { 22 | "a": {"type": "text", "analyzer": "english"}, 23 | "b": {"type": "text", "analyzer": "whitespace"}, 24 | "c": {"type": "text", "analyzer": "simple"} 25 | } 26 | } 27 | } 28 | } 29 | } 30 | 31 | POST /listings/_doc 32 | { 33 | "address": "511 Church St, Richmond" 34 | } 35 | 36 | # THIS WILL NOT MATCH. 37 | # "Churches" does not match "Church" (standard analyser) 38 | GET /listings/_search 39 | { 40 | "query": { 41 | "match": { 42 | "address": { 43 | "query": "Churches" 44 | } 45 | } 46 | } 47 | } 48 | 49 | # This does match as "address.a" uses the "english" analyser 50 | GET /listings/_search 51 | { 52 | "query": { 53 | "match": { 54 | "address.a": { 55 | "query": "Churches" 56 | } 57 | } 58 | } 59 | } 60 | 61 | -------------------------------------------------------------------------------- /03b-multi-fields-exercises.txt: -------------------------------------------------------------------------------- 1 | # Use the mappings below to create a listings index with different analysers, 2 | # and insert a few documents as shown. 3 | 4 | DELETE /listings 5 | 6 | # Note the three "sub-fields" of address, with three different analysers 7 | PUT /listings 8 | { 9 | "mappings": { 10 | "properties": { 11 | "address": { 12 | "type": "text", 13 | "analyzer": "standard", 14 | "fields": { 15 | "a": {"type": "text", "analyzer": "english"}, 16 | "b": {"type": "text", "analyzer": "whitespace"}, 17 | "c": {"type": "text", "analyzer": "simple"} 18 | } 19 | } 20 | } 21 | } 22 | } 23 | 24 | POST /listings/_doc 25 | { 26 | "address": "1 Church St, Richmond" 27 | } 28 | 29 | POST /listings/_doc 30 | { 31 | "address": "2 Church St, Richmond" 32 | } 33 | 34 | POST /listings/_doc 35 | { 36 | "address": "1 Churches St, Blakeview" 37 | } 38 | 39 | 40 | # Try the following "analyze" command with the "standard", "english", "whitespace", and "simple" analysers to see 41 | # the effects of the different analysers: 42 | 43 | 44 | GET /listings/_analyze 45 | { 46 | "analyzer": "whitespace", 47 | "text": ["1 Churches St"] 48 | } 49 | 50 | 51 | # Look up "match_phrase" and use a "match_phrase" query that only matches "1 Church St" 52 | # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query-phrase.html 53 | GET /listings/_search 54 | { 55 | "query": { 56 | "match_phrase": { 57 | ...... 58 | 59 | 60 | # Use a "bool" query to find listings that match "1 Church St" after English stemming, 61 | # but gives a higher score (by using "should") to those listings that match without stemming 62 | # Reminder for "bool": 63 | # - Use "filter" for stuff that must be true 64 | # - Use "should" to boost 65 | # See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html 66 | GET /listings/_search 67 | { 68 | "query": { 69 | "bool": { 70 | .... 71 | -------------------------------------------------------------------------------- /04-function-score-diagrams/gauss-function-score.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
Score
Score
Price
Price
0
0
$1800
$1800
1.0
1.0
$800
$800
$2800
$2800
"functions": [
{
"gaus": {
"price": {
"origin": 800,
"scale": 1000,
"decay": 0.5
}
}
}
]
"functions": [...
0.5
0.5
Viewer does not support full SVG 1.1
-------------------------------------------------------------------------------- /04-function-score-diagrams/linear-function-score.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
Score
Score
Price
Price
0
0
$1800
$1800
1.0
1.0
0.5
0.5
$800
$800
$2800
$2800
"functions": [
{
"linear": {
"price": {
"origin": 800,
"scale": 1000,
"decay": 0.5
}
}
}
]
"functions": [...
Viewer does not support full SVG 1.1
-------------------------------------------------------------------------------- /04a-function-score.txt: -------------------------------------------------------------------------------- 1 | 2 | DELETE /listings 3 | 4 | # Index example docs 5 | POST /listings/_bulk 6 | { "index": {}} 7 | { "address": "1 Smith St Melbourne Vic 3000", "product": "highlight", "price": 820 } 8 | { "index": {}} 9 | { "address": "12 Smith St Sydney NSW 2000", "product": "highlight", "price": 950 } 10 | { "index": {}} 11 | { "address": "44 Smith St Brisbane QLD 4000", "product": "premier", "price": 900 } 12 | { "index": {}} 13 | { "address": "121 Smith St Perth WA 6000", "product": "premier", "price": 460 } 14 | { "index": {}} 15 | { "address": "3 Smith St Hobart TAS 7000", "product": "standard", "price": 650 } 16 | { "index": {}} 17 | { "address": "454 Smith St Adelaide SA 5000", "product": "standard", "price": 930 } 18 | 19 | 20 | 21 | # Find all listings, but rank according to product: premier, then hightlight, and then the rest 22 | GET /listings/_search 23 | { 24 | "query": { 25 | "function_score": { 26 | "query": { 27 | "match_all": {} 28 | }, 29 | "functions": [ 30 | { 31 | "filter": {"match": { "product": "premier" }}, 32 | "weight": 3 33 | }, 34 | { 35 | "filter": {"match": { "product": "highlight" }}, 36 | "weight": 2 37 | } 38 | ] 39 | } 40 | } 41 | } 42 | 43 | 44 | # Sort by rank, then by price 45 | GET /listings/_search 46 | { 47 | "query": { 48 | "function_score": { 49 | "query": { 50 | "match_all": {} 51 | }, 52 | "functions": [ 53 | { 54 | "filter": {"match": { "product": "premier" }}, 55 | "weight": 3 56 | }, 57 | { 58 | "filter": {"match": { "product": "highlight" }}, 59 | "weight": 2 60 | } 61 | ] 62 | } 63 | }, 64 | "sort": [ 65 | { 66 | "_score": { 67 | "order": "desc" 68 | } 69 | }, 70 | { 71 | "price": "desc" 72 | } 73 | ] 74 | } 75 | 76 | 77 | # List all properties ranked by how close the price is to 800 78 | GET /listings/_search 79 | { 80 | "query": { 81 | "function_score": { 82 | "query": { 83 | "match_all": {} 84 | }, 85 | "functions": [ 86 | { 87 | "linear": { 88 | "price": { 89 | "origin": 800, 90 | "scale": 1000, 91 | "decay": 0.5 92 | } 93 | } 94 | } 95 | ] 96 | } 97 | } 98 | } 99 | 100 | 101 | # Rank Premiers in Brisbane above all else 102 | # Note that, by default, the function score query multiplies the scores together 103 | GET /listings/_search 104 | { 105 | "query": { 106 | "function_score": { 107 | "query": { 108 | "match_all": {} 109 | }, 110 | "functions": [ 111 | { 112 | "filter": {"match": { "product": "premier" }}, 113 | "weight": 3 114 | }, 115 | { 116 | "filter": {"match": { "address": "Brisbane" }}, 117 | "weight": 2 118 | } 119 | ] 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /04b-function-score.md: -------------------------------------------------------------------------------- 1 | # Decay scoring 2 | 3 | ## Linear decay 4 | 5 | ```json 6 | { 7 | "query": { 8 | "function_score": { 9 | "query": { 10 | "match_all": {} 11 | }, 12 | "functions": [ 13 | { 14 | "linear": { 15 | "price": { 16 | "origin": 800, 17 | "scale": 1000, 18 | "decay": 0.5 19 | } 20 | } 21 | } 22 | ] 23 | } 24 | } 25 | } 26 | ``` 27 | 28 | ![Linear Decay](./04-function-score-diagrams/linear-function-score.svg) 29 | 30 | 31 | ## Gaussian decay 32 | 33 | ```json 34 | { 35 | "query": { 36 | "function_score": { 37 | "query": { 38 | "match_all": {} 39 | }, 40 | "functions": [ 41 | { 42 | "gauss": { 43 | "price": { 44 | "origin": 800, 45 | "scale": 1000, 46 | "decay": 0.5 47 | } 48 | } 49 | } 50 | ] 51 | } 52 | } 53 | } 54 | ``` 55 | 56 | ![Gaussian Decay](./04-function-score-diagrams/gauss-function-score.svg) 57 | -------------------------------------------------------------------------------- /04c-function-score-exercises.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch Hands On Part 2 Exercises 2 | 3 | By default, the function_score query multiplies together the scores from the applicable functions. 4 | 5 | You can over-ride that behaviour by specifying a score_mode of "sum": 6 | 7 | eg. 8 | 9 | ``` 10 | GET /listings/_search 11 | { 12 | "query": { 13 | "function_score": { 14 | "query": { 15 | "match_all": {} 16 | }, 17 | "score_mode": "sum", 18 | "functions": [ 19 | {... 20 | ``` 21 | 22 | 23 | With that in mind: 24 | 25 | Write a function_score query that gives a score of 26 | 27 | - 30 + x to premier 28 | - 20 + x to highlight 29 | - 10 + x to standard 30 | 31 | Where x is a value between 0 and 1 that reflects how close the property price is to 800. 32 | For example, 33 | 34 | - A premier property with a price of 800 should get a score of 31.0 35 | - A highlight property with a price of 460 should get a score something like 20.83 36 | (i.e. between 20 and 21) 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /05a-aggregations.txt: -------------------------------------------------------------------------------- 1 | # Delete the `listings` index 2 | DELETE /listings 3 | 4 | # Note the "keyword" type: it means no tokenizing etc., but is required for aggregation. 5 | PUT /listings 6 | { 7 | "settings": { 8 | "number_of_shards": 5 9 | }, 10 | "mappings": { 11 | "properties": { 12 | "price": {"type": "integer"}, 13 | "propertyType": {"type": "keyword"}, 14 | "city": {"type": "keyword"}, 15 | "sold": {"type": "date"} 16 | } 17 | } 18 | } 19 | 20 | 21 | # Index example docs 22 | POST /listings/_bulk 23 | { "index": {}} 24 | { "price" : 600000, "propertyType" : "house", "city" : "hobart", "sold" : "2017-10-28" } 25 | { "index": {}} 26 | { "price" : 800000, "propertyType" : "house", "city" : "hobart", "sold" : "2017-11-05" } 27 | { "index": {}} 28 | { "price" : 300000, "propertyType" : "apartment", "city" : "brisbane", "sold" : "2017-05-18" } 29 | { "index": {}} 30 | { "price" : 450000, "propertyType" : "land", "city" : "melbourne", "sold" : "2017-07-02" } 31 | { "index": {}} 32 | { "price" : 360000, "propertyType" : "apartment", "city" : "melbourne", "sold" : "2017-08-19" } 33 | { "index": {}} 34 | { "price" : 400000, "propertyType" : "house", "city" : "hobart", "sold" : "2017-11-05" } 35 | { "index": {}} 36 | { "price" : 900000, "propertyType" : "house", "city" : "sydney", "sold" : "2017-01-01" } 37 | { "index": {}} 38 | { "price" : 500000, "propertyType" : "land", "city" : "brisbane", "sold" : "2017-02-12" } 39 | 40 | 41 | # Basic aggregation - most popular property types 42 | GET /listings/_search 43 | { 44 | "aggs" : { 45 | "my_aggregation" : { 46 | "terms" : { 47 | "field" : "propertyType" 48 | } 49 | } 50 | } 51 | } 52 | 53 | # Most popular brisbane property types 54 | # Note that the query "hits" returns all "brisbane" listings, and the aggregations are restricted to "brisbane" 55 | GET /listings/_search 56 | { 57 | "query": { 58 | "match": { 59 | "city": "brisbane" 60 | } 61 | }, 62 | "aggs" : { 63 | "my_aggregation" : { 64 | "terms" : { 65 | "field" : "propertyType" 66 | } 67 | } 68 | } 69 | } 70 | 71 | # Use aggregation filtering instead 72 | # This time the query returns everything, but the aggregation is still restricted to "brisbane" 73 | GET /listings/_search 74 | { 75 | "query": { 76 | "match_all": {} 77 | }, 78 | "aggs": { 79 | "my_aggregation": { 80 | "filter": { 81 | "match": { 82 | "city": "brisbane" 83 | } 84 | }, 85 | "aggs": { 86 | "my_sub_aggregation": { 87 | "terms": { 88 | "field": "propertyType" 89 | } 90 | } 91 | } 92 | } 93 | } 94 | } 95 | 96 | 97 | # What is doc_count_error_upper_bound and sum_other_doc_count??? 98 | # sum_other_doc_count: number of documents not represented in the results (due to "size") 99 | # doc_count_error_upper_bound: maximum potential document count of a term not included in the results 100 | # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-approximate-counts 101 | GET /listings/_search 102 | { 103 | "aggs" : { 104 | "my_aggregation" : { 105 | "terms" : { 106 | "field" : "propertyType", 107 | "size": 1, 108 | "shard_size": 1 109 | } 110 | } 111 | } 112 | } 113 | 114 | # Use "show_term_doc_count_error" to get worst case error per term 115 | # - i.e. by how much each count may be an under-estimate of the real value 116 | GET /listings/_search 117 | { 118 | "aggs" : { 119 | "my_aggregation" : { 120 | "terms" : { 121 | "field" : "propertyType", 122 | "size": 1, 123 | "shard_size": 1, 124 | "show_term_doc_count_error": true 125 | } 126 | } 127 | } 128 | } 129 | 130 | 131 | # Find the average price of each property type 132 | GET /listings/_search 133 | { 134 | "aggs": { 135 | "my_aggregation": { 136 | "terms": { 137 | "field": "propertyType" 138 | }, 139 | "aggs": { 140 | "my_sub_aggregation": { 141 | "avg": { 142 | "field": "price" 143 | } 144 | } 145 | } 146 | } 147 | } 148 | } 149 | 150 | # Order by average price ascending 151 | GET /listings/_search 152 | { 153 | "aggs": { 154 | "my_aggregation": { 155 | "terms": { 156 | "field": "propertyType", 157 | "order" : { "my_sub_aggregation" : "asc" } 158 | }, 159 | "aggs": { 160 | "my_sub_aggregation": { 161 | "avg": { 162 | "field": "price" 163 | } 164 | } 165 | } 166 | } 167 | } 168 | } 169 | 170 | 171 | # Nest a second bucket to determine top cities per property type 172 | GET /listings/_search 173 | { 174 | "aggs": { 175 | "my_aggregation": { 176 | "terms": { 177 | "field": "propertyType" 178 | }, 179 | "aggs": { 180 | "my_sub_aggregation": { 181 | "avg": { 182 | "field": "price" 183 | } 184 | }, 185 | "my_other_sub_aggregation": { 186 | "terms": { 187 | "field": "city" 188 | } 189 | } 190 | } 191 | } 192 | } 193 | } 194 | 195 | 196 | # Finally, add some extra metrics to determine min/max price per city, per property type 197 | GET /listings/_search 198 | { 199 | "aggs": { 200 | "my_aggregation": { 201 | "terms": { 202 | "field": "propertyType" 203 | }, 204 | "aggs": { 205 | "my_sub_aggregation": { "avg": { "field": "price" } 206 | }, 207 | "my_other_sub_aggregation" : { 208 | "terms" : { 209 | "field" : "city" 210 | }, 211 | "aggs" : { 212 | "my_sub_sub_agg" : { "min": { "field": "price"} }, 213 | "my_other_sub_sub_agg" : { "max": { "field": "price"} } 214 | } 215 | } 216 | } 217 | } 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /05b-aggregations-exercises.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch Hands On Part 2 Exercises 2 | 3 | - Write an aggregation to find the most popular cities 4 | 5 | - Write an aggregation to find the maximum price for each city 6 | 7 | -------------------------------------------------------------------------------- /06-relationship-diagrams/ListingUpgrades.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
Premiere
Premiere
Premiere
Premiere
Highlight
Highlight
Highlight
Highlight
Listing 1
Listing 1
Listing 2
Listing 2
Mar 1st
Mar...
Apr
1st
Apr...
May
1st
May...
Apr
1st
Apr...
Viewer does not support full SVG 1.1
-------------------------------------------------------------------------------- /06-relationship-diagrams/ParentChild.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
id: 1
Address: 42 Smith St
Join: {name: Listing}
[Not supported by viewer]
id: 2
Product: Premiere
Dates: 7th March to 21 March 
Join: {name: Upgrade, Parent: 1}
[Not supported by viewer]
id: 3
Product: Highlight
Dates: 22nd March to 31 April 
Join: {name: Upgrade, Parent: 1}
[Not supported by viewer]
id: 4
Address: 2 Jones St
Join: Listing
[Not supported by viewer]
id: 5
Product: Premiere
Dates: 28th March to 10th April 
Join: {name: Upgrade, Parent: 4}
[Not supported by viewer]
id: 6
Product: Highlight
Dates: 11th April to 19th May 
Join: {name: Upgrade, Parent: 4}
[Not supported by viewer]
-------------------------------------------------------------------------------- /06a-Relationships.md: -------------------------------------------------------------------------------- 1 | # Representing relationships 2 | 3 | Consider listing upgrades. 4 | 5 | Which listings had a premiere upgrade on April 1st? 6 | 7 | ![listing upgrades](./06-relationship-diagrams/ListingUpgrades.svg) 8 | 9 | ## Nested array of objects 10 | 11 | Note: you will need to explicitly declare `upgrade` as being of type `nested`, and will have to use `nested` queries. 12 | 13 | ```json 14 | { 15 | "address": "1 Smith St Melbourne Vic 3000", 16 | "upgrade": [ 17 | { 18 | "product": "premier", 19 | "start": "2015-03-07", 20 | "end": "2015-03-21" 21 | }, 22 | { 23 | "product": "highlight", 24 | "start": "2015-03-22", 25 | "end": "2015-04-13" 26 | } 27 | ] 28 | } 29 | ``` 30 | 31 | ## Parent / Child 32 | 33 | Upgrades represented as separate documents but mixed together in the same index as the listings. 34 | Special "join" field is used to connect them. 35 | 36 | ![Parent Child](./06-relationship-diagrams/ParentChild.svg) 37 | 38 | ## Field Collapsing 39 | 40 | De-normalise the relationship. 41 | e.g. duplicate the listing information in every document. 42 | 43 | ```json 44 | {"listingId": 1, "address": "5 Smith St", "product": "premier", "start": "2015-03-07", "end": "2015-03-21"} 45 | {"listingId": 1, "address": "5 Smith St", "product": "highlight", "start": "2015-03-22", "end": "2015-04-13"} 46 | {"listingId": 2, "address": "7 Jones St", "product": "premier", "start": "2015-03-28", "end": "2015-04-10"} 47 | {"listingId": 2, "address": "7 Jones St", "product": "highlight", "start": "2015-04-11", "end": "2015-05-19"} 48 | ``` 49 | 50 | Query can "collapse" results on a field (e.g. the `listingId`). 51 | -------------------------------------------------------------------------------- /06b-nested-objects.txt: -------------------------------------------------------------------------------- 1 | DELETE /listings 2 | 3 | # Index example docs 4 | PUT /listings/_doc/1 5 | { 6 | "address": "1 Smith St Melbourne Vic 3000", 7 | "upgrade": [ 8 | { 9 | "product": "premier", 10 | "start": "2015-03-07", 11 | "end": "2015-03-21" 12 | }, 13 | { 14 | "product": "highlight", 15 | "start": "2015-03-22", 16 | "end": "2015-04-13" 17 | } 18 | ] 19 | } 20 | 21 | PUT /listings/_doc/2 22 | { 23 | "address": "2 Jones St Sydney NSW 2000", 24 | "upgrade": [ 25 | { 26 | "product": "premier", 27 | "start": "2015-03-28", 28 | "end": "2015-04-10" 29 | }, 30 | { 31 | "product": "highlight", 32 | "start": "2015-04-11", 33 | "end": "2015-05-19" 34 | } 35 | ] 36 | } 37 | 38 | # Look at the mapping 39 | GET /listings/_mapping 40 | 41 | 42 | # Find listings that were premier on April 1. 43 | # NOTE HOW THIS DOESN'T WORK!!! 44 | # It includes "1 Smith St" even though it's premier status ended before April 1. 45 | GET /listings/_search 46 | { 47 | "query": { 48 | "bool": { 49 | "filter": [ 50 | { 51 | "match": { 52 | "upgrade.product": "premier" 53 | } 54 | }, 55 | { 56 | "range": { 57 | "upgrade.start": { 58 | "lte": "2015-04-01" 59 | } 60 | } 61 | }, 62 | { 63 | "range": { 64 | "upgrade.end": { 65 | "gte": "2015-04-01" 66 | } 67 | } 68 | } 69 | ] 70 | } 71 | } 72 | } 73 | 74 | # Use Nested Objects 75 | 76 | DELETE /listings 77 | 78 | PUT /listings 79 | { 80 | "mappings": { 81 | "properties": { 82 | "address": {"type": "text"}, 83 | "upgrade": { 84 | "type": "nested", 85 | "properties": { 86 | "product": {"type": "keyword"}, 87 | "start": {"type": "date"}, 88 | "end": {"type": "date"} 89 | } 90 | } 91 | } 92 | } 93 | } 94 | 95 | 96 | # Index example docs again. 97 | PUT /listings/_doc/1 98 | { 99 | "address": "1 Smith St Melbourne Vic 3000", 100 | "upgrade": [ 101 | { 102 | "product": "premier", 103 | "start": "2015-03-07", 104 | "end": "2015-03-21" 105 | }, 106 | { 107 | "product": "highlight", 108 | "start": "2015-03-22", 109 | "end": "2015-04-13" 110 | } 111 | ] 112 | } 113 | 114 | PUT /listings/_doc/2 115 | { 116 | "address": "2 Jones St Sydney NSW 2000", 117 | "upgrade": [ 118 | { 119 | "product": "premier", 120 | "start": "2015-03-28", 121 | "end": "2015-04-10" 122 | }, 123 | { 124 | "product": "highlight", 125 | "start": "2015-04-11", 126 | "end": "2015-05-19" 127 | } 128 | ] 129 | } 130 | 131 | # Use a "nested" query 132 | GET /listings/_search 133 | { 134 | "query": { 135 | "nested": { 136 | "path": "upgrade", 137 | "query": { 138 | "bool": { 139 | "filter": [ 140 | { 141 | "match": { 142 | "upgrade.product": "premier" 143 | } 144 | }, 145 | { 146 | "range": { 147 | "upgrade.start": { 148 | "lte": "2015-04-01" 149 | } 150 | } 151 | }, 152 | { 153 | "range": { 154 | "upgrade.end": { 155 | "gte": "2015-04-01" 156 | } 157 | } 158 | } 159 | ] 160 | } 161 | } 162 | } 163 | } 164 | } 165 | 166 | # Use inner_hits to see which products actually matched 167 | GET /listings/_search 168 | { 169 | "query": { 170 | "nested": { 171 | "inner_hits" : {}, 172 | "path": "upgrade", 173 | "query": { 174 | "bool": { 175 | "filter": [ 176 | { 177 | "match": { 178 | "upgrade.product": "premier" 179 | } 180 | }, 181 | { 182 | "range": { 183 | "upgrade.start": { 184 | "lte": "2015-04-01" 185 | } 186 | } 187 | }, 188 | { 189 | "range": { 190 | "upgrade.end": { 191 | "gte": "2015-04-01" 192 | } 193 | } 194 | } 195 | ] 196 | } 197 | } 198 | } 199 | } 200 | } 201 | 202 | -------------------------------------------------------------------------------- /06c-nested-objects-exercises.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch Hands On Part 2 Exercises 2 | 3 | Try the following: 4 | 5 | ``` 6 | DELETE /agencies 7 | 8 | PUT /agencies/_doc/1 9 | { 10 | "name": "Nelson Alexander", 11 | "agents": [ 12 | {"firstName": "Mary", "lastName": "Smith"}, 13 | {"firstName": "Joe", "lastName": "Blogs"} 14 | ] 15 | } 16 | 17 | 18 | PUT /agencies/_doc/2 19 | { 20 | "name": "MacGrath", 21 | "agents": [ 22 | {"firstName": "Mary", "lastName": "Blogs"}, 23 | {"firstName": "Joe", "lastName": "Smith"} 24 | ] 25 | } 26 | 27 | 28 | GET /agencies/_search 29 | { 30 | "query": { 31 | "bool": { 32 | "filter": [ 33 | { 34 | "match": { 35 | "agents.firstName": "Mary" 36 | } 37 | }, 38 | { 39 | "match": { 40 | "agents.lastName": "Blogs" 41 | } 42 | } 43 | ] 44 | } 45 | } 46 | } 47 | ``` 48 | 49 | Are the results what you expected? 50 | 51 | Delete the index and explicitly give it a mapping so that 52 | you can use a "nested" query to find only those agencies 53 | that have an agent called "Mary Blogs". 54 | 55 | **NOTE: For names, you should use a "text" type so as to get case insensitive matching etc.** 56 | -------------------------------------------------------------------------------- /07-routing-diagrams/DefaultRouting.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
Shard 0
Shard 0
Shard 1
Shard 1
Shard 2
Shard 2<br>
Document
id = ABCXYZ

[Not supported by viewer]
Hash of ABCXYZ mod 3
<font style="font-size: 16px">Hash of ABCXYZ mod 3</font>
PUT /agency/_doc/ABCXYZ
<font style="font-size: 16px">PUT /agency/_doc/ABCXYZ</font>
-------------------------------------------------------------------------------- /07-routing-diagrams/DefaultRoutingGet.svg: -------------------------------------------------------------------------------- 1 | 2 |
Shard 1
Shard 1
Shard 2
Shard 2<br>
Hash of ABCXYZ mod 3
<font style="font-size: 16px">Hash of ABCXYZ mod 3</font>
GET /agency/_doc/ABCXYZ
<font style="font-size: 16px">GET /agency/_doc/ABCXYZ</font>
Shard  0
Shard  0
Document
id = ABCXYZ
Document<br>id = ABCXYZ<br>
-------------------------------------------------------------------------------- /07-routing-diagrams/DefaultSearchRouting.svg: -------------------------------------------------------------------------------- 1 | 2 |
Shard 0
Shard 0
Shard 1
Shard 1
Shard 2
Shard 2<br>
GET /agency/_search
<font style="font-size: 16px">GET /agency/_search</font>
-------------------------------------------------------------------------------- /07-routing-diagrams/ExplicitRouting.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
Shard 0
Shard 0
Shard 1
Shard 1
Shard 2
Shard 2<br>
Document
id = ABCXYZ

[Not supported by viewer]
Hash of brunswick mod 3
<font style="font-size: 16px">Hash of brunswick mod 3</font>
PUT /agency/_doc/ABCXYZ?routing=brunswick
<font style="font-size: 16px">PUT /agency/_doc/ABCXYZ?routing=brunswick</font>
-------------------------------------------------------------------------------- /07-routing-diagrams/ExplicitRoutingGet.svg: -------------------------------------------------------------------------------- 1 | 2 |
Shard 1
Shard 1
Shard 0
Shard 0<br>
Hash of brunswick mod 3
<font style="font-size: 16px">Hash of brunswick mod 3</font>
GET /agency/_doc/ABCXYZ?routing=brunswick
<font style="font-size: 16px">GET /agency/_doc/ABCXYZ?routing=brunswick</font>
Shard  2
Shard  2
Document
id = ABCXYZ
Document<br>id = ABCXYZ<br>
-------------------------------------------------------------------------------- /07-routing-diagrams/ExplicitRoutingSearch.svg: -------------------------------------------------------------------------------- 1 | 2 |
Shard 1
Shard 1
Shard 0
Shard 0<br>
Hash of brunswick mod 3
<font style="font-size: 16px">Hash of brunswick mod 3</font>
GET /agency/_search?routing=brunswick
<font style="font-size: 16px">GET /agency/_search?routing=brunswick</font>
Shard  2
Shard  2
Document
id = ABCXYZ
Document<br>id = ABCXYZ<br>
-------------------------------------------------------------------------------- /07a-routing.md: -------------------------------------------------------------------------------- 1 | # Routing 2 | 3 | ## Default Routing 4 | 5 | ### Insert 6 | When a document is inserted, by default it is _routed_ to a shard based on a hash of its document id. 7 | 8 | ``` 9 | PUT /agency/_doc/ABCXYZ 10 | { 11 | "name": "Nelson Alexander" 12 | } 13 | ``` 14 | 15 | ![Default Routing](./07-routing-diagrams/DefaultRouting.svg) 16 | 17 | ### Get by Id 18 | When fetching a document, the same hashing is used so the fetch can be directed to the correct shard. 19 | 20 | ``` 21 | GET /agency/_doc/ABCXYZ 22 | ``` 23 | 24 | ![Default Routing Fetch by Id](./07-routing-diagrams/DefaultRoutingGet.svg) 25 | 26 | ### Search 27 | Searches are forwarded to all shards 28 | 29 | ``` 30 | GET /agency/_search 31 | ``` 32 | 33 | ![Default Search Routing](./07-routing-diagrams/DefaultSearchRouting.svg) 34 | 35 | ## Explicit Routing 36 | 37 | You can over-ride the routing by specifying a _routing_ parameter when you save the document. 38 | 39 | Suppose you want all agency documents belonging to the same suburb to be co-located in the same shard, 40 | then you could use the suburb name as the routing parameter 41 | 42 | ### Insert 43 | 44 | ``` 45 | PUT /agency/_doc/ABCXYZ?routing=brunswick 46 | { 47 | "name": "Nelson Alexander" 48 | } 49 | ``` 50 | 51 | ![Explicit Routing](./07-routing-diagrams/ExplicitRouting.svg) 52 | 53 | ### Get by Id 54 | 55 | You **must** specify the routing parameter when fetching the document. 56 | 57 | ``` 58 | GET /agency/_doc/ABCXYZ?routing=brunswick 59 | ``` 60 | 61 | ![Fetching a routed document](./07-routing-diagrams/ExplicitRoutingGet.svg) 62 | 63 | ### Search 64 | 65 | By default, it will still forward the search to all shards. 66 | 67 | But, you **may** specify the routing parameter when searching (not mandatory). 68 | 69 | ``` 70 | GET /agency/_search?routing=brunswick 71 | { 72 | "query": { 73 | "term": { 74 | "suburb": { 75 | "value": "brunswick" 76 | } 77 | } 78 | } 79 | } 80 | ``` 81 | 82 | NOTE: `GET /agency/_search?routing=brunswick` **does not limit the query to "brunswick" documents**. 83 | It will return all documents in the same shard as the "brunswick" documents. 84 | 85 | ![Explicit search routing](./07-routing-diagrams/ExplicitRoutingSearch.svg) 86 | 87 | This can potentially increase your throughput (searches per second). 88 | -------------------------------------------------------------------------------- /07b-parent-child.txt: -------------------------------------------------------------------------------- 1 | DELETE /listings 2 | 3 | # title and address are "listing" fields 4 | # product, start, and end are "upgrade" fields 5 | # Here, they are all mixed together. 6 | # A join field is used to relate listing docs with upgrade docs. 7 | PUT /listings 8 | { 9 | "settings": { 10 | "number_of_shards": 5 11 | }, 12 | "mappings": { 13 | "properties": { 14 | 15 | "title": { 16 | "type": "text" 17 | }, 18 | "address": { 19 | "type": "text" 20 | }, 21 | 22 | "product": { 23 | "type": "keyword" 24 | }, 25 | "start": { 26 | "type": "date" 27 | }, 28 | "end": { 29 | "type": "date" 30 | }, 31 | 32 | "my_join_field": { 33 | "type": "join", 34 | "relations": { 35 | "listing": "upgrade" 36 | } 37 | } 38 | } 39 | } 40 | } 41 | 42 | 43 | 44 | # Index example docs 45 | # Note: 46 | # - you need to specify a "routing" query parameter (more later) 47 | # - you need to specify that it is a "listing" 48 | PUT /listings/_doc/1?routing=1 49 | { 50 | "title": "Sea Views", 51 | "address": "1 Smith St Melbourne Vic 3000", 52 | "my_join_field": { 53 | "name": "listing" 54 | } 55 | } 56 | 57 | # Note: 58 | # - parents and children need the same routing parameter (consider using the parent id) 59 | # - you have to specify the parent id when indexing a child. 60 | PUT /listings/_doc/100?routing=1 61 | { 62 | "product": "premier", 63 | "start": "2015-03-07", 64 | "end": "2015-03-21", 65 | "my_join_field": { 66 | "name": "upgrade", 67 | "parent": "1" 68 | } 69 | } 70 | 71 | PUT /listings/_doc/101?routing=1 72 | { 73 | "product": "highlight", 74 | "start": "2015-03-22", 75 | "end": "2015-04-13", 76 | "my_join_field": { 77 | "name": "upgrade", 78 | "parent": "1" 79 | } 80 | } 81 | 82 | 83 | PUT /listings/_doc/2?routing=2 84 | { 85 | "title": "Renovators Delight", 86 | "address": "2 Jones St Sydney NSW 2000", 87 | "my_join_field": { 88 | "name": "listing" 89 | } 90 | } 91 | 92 | PUT /listings/_doc/200?routing=2 93 | { 94 | "product": "premier", 95 | "start": "2015-03-28", 96 | "end": "2015-04-10", 97 | "my_join_field": { 98 | "name": "upgrade", 99 | "parent": "2" 100 | } 101 | } 102 | 103 | PUT /listings/_doc/201?routing=2 104 | { 105 | "product": "highlight", 106 | "start": "2015-04-11", 107 | "end": "2015-05-19", 108 | "my_join_field": { 109 | "name": "upgrade", 110 | "parent": "2" 111 | } 112 | } 113 | 114 | # You may optionally specify a "routing" parameter at query time 115 | # BEWARE THAT IT WILL INCLUDE LISTINGS WITH DIFFERENT ROUTING PARAMETERS IF 116 | # THE HASHING ALGORITHM PUTS THEM IN THE SAME SHARD. 117 | GET /listings/_search?routing=2 118 | 119 | 120 | # Use "has_child" query 121 | # Note that you don't get to see the upgrades 122 | GET /listings/_search 123 | { 124 | "query": { 125 | "has_child": { 126 | "type": "upgrade", 127 | "query": { 128 | "bool": { 129 | "filter": [ 130 | { 131 | "match": { 132 | "product": "premier" 133 | } 134 | }, 135 | { 136 | "range": { 137 | "start": { 138 | "lte": "2015-04-01" 139 | } 140 | } 141 | }, 142 | { 143 | "range": { 144 | "end": { 145 | "gte": "2015-04-01" 146 | } 147 | } 148 | } 149 | ] 150 | } 151 | } 152 | } 153 | } 154 | } 155 | 156 | # Use inner_hits to include upgrades in the results. 157 | GET /listings/_search 158 | { 159 | "query": { 160 | "has_child": { 161 | "type": "upgrade", 162 | "inner_hits": {}, 163 | "query": { 164 | "bool": { 165 | "filter": [ 166 | { 167 | "match": { 168 | "product": "premier" 169 | } 170 | }, 171 | { 172 | "range": { 173 | "start": { 174 | "lte": "2015-04-01" 175 | } 176 | } 177 | }, 178 | { 179 | "range": { 180 | "end": { 181 | "gte": "2015-04-01" 182 | } 183 | } 184 | } 185 | ] 186 | } 187 | } 188 | } 189 | } 190 | } 191 | 192 | # How about has_parent? 193 | GET /listings/_search 194 | { 195 | "query": { 196 | "has_parent": { 197 | "parent_type": "listing", 198 | "inner_hits": {}, 199 | "query": { 200 | "match": { 201 | "address": "Sydney" 202 | } 203 | } 204 | } 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /07c-parent-child-exercises.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch Hands On Part 2 Exercises 2 | 3 | Re-create the "agencies" index from 02a-nested-objects-exercises.md 4 | but using a parent/child relationship. 5 | 6 | You will need a join field for the agency/agent relationship: 7 | 8 | - agency: whose only field is a "agencyName" field of type text. 9 | - agent: whose only fields are "firstName" and "lastName" 10 | 11 | Inserting an agency should look like this: 12 | 13 | ``` 14 | PUT /agencies/_doc/1?routing=1 15 | { 16 | "agencyName": "Nelson Alexander", 17 | "my_join_field": { 18 | "name": "agency" 19 | } 20 | } 21 | ``` 22 | 23 | Inserting an agent with id of `a` that belongs to agency `1` should look like this: 24 | 25 | ``` 26 | PUT /agencies/_doc/a?routing=1 27 | { 28 | "firstName": "Mary", 29 | "lastName": "Smith", 30 | "my_join_field": { 31 | "name": "agent", 32 | "parent": "1" 33 | } 34 | } 35 | 36 | PUT /agencies/_doc/b?routing=1 37 | { 38 | "firstName": "Joe", 39 | "lastName": "Blogs", 40 | "my_join_field": { 41 | "name": "agent", 42 | "parent": "1" 43 | } 44 | } 45 | ``` 46 | 47 | Also for MacGrath: 48 | 49 | ``` 50 | PUT /agencies/_doc/2?routing=2 51 | { 52 | "agencyName": "MacGrath", 53 | "my_join_field": { 54 | "name": "agency" 55 | } 56 | } 57 | 58 | PUT /agencies/_doc/x?routing=2 59 | { 60 | "firstName": "Mary", 61 | "lastName": "Blogs", 62 | "my_join_field": { 63 | "name": "agent", 64 | "parent": "2" 65 | } 66 | } 67 | 68 | PUT /agencies/_doc/y?routing=2 69 | { 70 | "firstName": "Joe", 71 | "lastName": "Smith", 72 | "my_join_field": { 73 | "name": "agent", 74 | "parent": "2" 75 | } 76 | } 77 | 78 | .... 79 | 80 | 81 | ``` 82 | 83 | 84 | 85 | Once again, find only those agencies that have an agent called "Mary Blogs". 86 | -------------------------------------------------------------------------------- /08a-collapse.txt: -------------------------------------------------------------------------------- 1 | DELETE /listing 2 | 3 | PUT /listing 4 | { 5 | "mappings": { 6 | "properties": { 7 | "projectId": {"type": "keyword"}, 8 | "projectName": {"type": "text"}, 9 | 10 | "myOrdering": {"type": "integer"}, 11 | "listingTitle": {"type": "text"}, 12 | "listingPrice": {"type": "long"} 13 | } 14 | } 15 | } 16 | 17 | # Note how the project name is duplicated in every listing in a project 18 | PUT /listing/_doc/1 19 | { 20 | "projectId": "project-1", 21 | "projectName": "Kings Landing", 22 | "myOrdering": 1, 23 | "listingTitle": "Blacksmithery", 24 | "listingPrice": 100 25 | } 26 | 27 | PUT /listing/_doc/2 28 | { 29 | "projectId": "project-1", 30 | "projectName": "Kings Landing", 31 | "myOrdering": 2, 32 | "listingTitle": "Bakery", 33 | "listingPrice": 200 34 | } 35 | 36 | PUT /listing/_doc/3 37 | { 38 | "projectId": "project-1", 39 | "projectName": "Kings Landing", 40 | "myOrdering": 3, 41 | "listingTitle": "Iron Throne", 42 | "listingPrice": 1000000 43 | } 44 | 45 | PUT /listing/_doc/4 46 | { 47 | "projectId": "project-2", 48 | "projectName": "Winterfell", 49 | "myOrdering": 1, 50 | "listingTitle": "Blacksmithery", 51 | "listingPrice": 50 52 | } 53 | 54 | PUT /listing/_doc/5 55 | { 56 | "projectId": "project-2", 57 | "projectName": "Winterfell", 58 | "myOrdering": 2, 59 | "listingTitle": "Bakery", 60 | "listingPrice": 150 61 | } 62 | 63 | PUT /listing/_doc/6 64 | { 65 | "projectId": "project-2", 66 | "projectName": "Winterfell", 67 | "myOrdering": 3, 68 | "listingTitle": "Great Keep", 69 | "listingPrice": 10000 70 | } 71 | 72 | # Ordinary search without field collapsing 73 | GET /listing/_search 74 | { 75 | "query": { 76 | "range": { 77 | "listingPrice": { 78 | "gte": 200 79 | } 80 | } 81 | }, 82 | "sort": [ 83 | { 84 | "listingPrice": { 85 | "order": "desc" 86 | } 87 | } 88 | ] 89 | } 90 | 91 | # Field collapsing only returns the first listing for each project 92 | GET /listing/_search 93 | { 94 | "query": { 95 | "range": { 96 | "listingPrice": { 97 | "gte": 200 98 | } 99 | } 100 | }, 101 | "sort": [ 102 | { 103 | "listingPrice": { 104 | "order": "desc" 105 | } 106 | } 107 | ], 108 | "collapse": { 109 | "field" : "projectId" 110 | } 111 | } 112 | 113 | # Inner-hits can be used to group the other children 114 | GET /listing/_search 115 | { 116 | "query": { 117 | "range": { 118 | "listingPrice": { 119 | "gte": 200 120 | } 121 | } 122 | }, 123 | "sort": [ 124 | { 125 | "listingPrice": { 126 | "order": "desc" 127 | } 128 | } 129 | ], 130 | "collapse": { 131 | "field" : "projectId", 132 | "inner_hits": { 133 | "name": "children" 134 | } 135 | } 136 | } 137 | 138 | # You can even specify size and sort order for the inner hits 139 | GET /listing/_search 140 | { 141 | "query": { 142 | "range": { 143 | "listingPrice": { 144 | "gte": 200 145 | } 146 | } 147 | }, 148 | "sort": [ 149 | { 150 | "listingPrice": { 151 | "order": "desc" 152 | } 153 | } 154 | ], 155 | "collapse": { 156 | "field" : "projectId", 157 | "inner_hits": { 158 | "name": "children", 159 | "size": 5, 160 | "sort": [{ "myOrdering": "asc" }] 161 | } 162 | } 163 | } 164 | 165 | -------------------------------------------------------------------------------- /08b-collapse-exercises.md: -------------------------------------------------------------------------------- 1 | # Field Collapsing Exercise 2 | 3 | Create and populate an "agent" index whose purpose is to 4 | track the year-to-date number of sales per agent 5 | (with one document per agent). 6 | Note how the agency name and is duplicated for each agent in an agency. 7 | 8 | Once you have done this, see the exercises at the end. 9 | 10 | ``` 11 | DELETE /agent 12 | 13 | PUT /agent 14 | { 15 | "mappings": { 16 | "properties": { 17 | "agentId": {"type": "keyword"}, 18 | "agentName": {"type": "text"}, 19 | "agencyId": {"type": "keyword"}, 20 | "agencyName": {"type": "text"}, 21 | "sales": {"type": "integer"} 22 | } 23 | } 24 | } 25 | 26 | PUT /agent/_doc/agent-1 27 | { 28 | "agentId": "agent-1", 29 | "agentName": "Mary Smith", 30 | "agencyId": "agency-1", 31 | "agencyName": "Nelson Alexander", 32 | "sales": 18 33 | } 34 | 35 | PUT /agent/_doc/agent-2 36 | { 37 | "agentId": "agent-2", 38 | "agentName": "Joe Blogs", 39 | "agencyId": "agency-1", 40 | "agencyName": "Nelson Alexander", 41 | "sales": 7 42 | } 43 | 44 | PUT /agent/_doc/agent-3 45 | { 46 | "agentId": "agent-3", 47 | "agentName": "Mary Blogs", 48 | "agencyId": "agency-2", 49 | "agencyName": "MacGrath", 50 | "sales": 20 51 | } 52 | 53 | PUT /agent/_doc/agent-4 54 | { 55 | "agentId": "agent-4", 56 | "agentName": "Joe Smith", 57 | "agencyId": "agency-2", 58 | "agencyName": "MacGrath", 59 | "sales": 13 60 | } 61 | 62 | ``` 63 | 64 | Write a query to find those agents that have had 65 | more than 5 sales ordered by the number of sales descending. 66 | 67 | Use field collapsing to ensure that only one agent is returned per agency. 68 | Note: you should not need to change the index mapping. 69 | -------------------------------------------------------------------------------- /09a-geo-point.txt: -------------------------------------------------------------------------------- 1 | # Geo Point: 2 | # - Good for storing lat/lon points 3 | # - Supports polygon and distance searches 4 | # - Supports sort-by-distance 5 | # - Supports various aggregation operations 6 | 7 | DELETE /listings 8 | 9 | # Declare a geo-point type for the address. 10 | PUT /listings 11 | { 12 | "mappings": { 13 | "properties": { 14 | "address": { 15 | "type": "object", 16 | "properties": { 17 | "fullAddress": {"type": "text"}, 18 | "geocode": {"type": "geo_point"} 19 | } 20 | } 21 | } 22 | } 23 | } 24 | 25 | 26 | 27 | # Index example docs 28 | PUT /listings/_doc/1 29 | { 30 | "address": { 31 | "fullAddress": "511 Church St, Richmond, VIC 3121", 32 | "geocode": { 33 | "lat": -37.829, 34 | "lon": 144.995 35 | } 36 | } 37 | } 38 | 39 | PUT /listings/_doc/2 40 | { 41 | "address": { 42 | "fullAddress": "Kirribilli House, Kiribilli Ave, Kirribilli NSW 2061", 43 | "geocode": { 44 | "lat": -33.851, 45 | "lon": 151.217 46 | } 47 | } 48 | } 49 | 50 | # Bounding box search 51 | GET /listings/_search 52 | { 53 | "query": { 54 | "geo_bounding_box": { 55 | "address.geocode": { 56 | "top_left": { 57 | "lat": -37, 58 | "lon": 144 59 | }, 60 | "bottom_right": { 61 | "lat": -38, 62 | "lon": 145 63 | } 64 | } 65 | } 66 | } 67 | } 68 | 69 | # Radial/distance search 70 | GET /listings/_search 71 | { 72 | "query": { 73 | "geo_distance": { 74 | "distance": "10km", 75 | "address.geocode": { 76 | "lat": -37.8, 77 | "lon": 145 78 | } 79 | } 80 | } 81 | } 82 | 83 | # Sort by distance 84 | GET /listings/_search 85 | { 86 | "query": { 87 | "match_all": {} 88 | }, 89 | "sort": [ 90 | { 91 | "_geo_distance": { 92 | "address.geocode": { 93 | "lat": -37.8, 94 | "lon": 145 95 | }, 96 | "order": "asc", 97 | "unit": "km" 98 | } 99 | } 100 | ] 101 | } 102 | 103 | # Score by distance 104 | # Scale is the distance at which the score will halve 105 | GET /listings/_search 106 | { 107 | "query": { 108 | "function_score": { 109 | "functions": [ 110 | { 111 | "gauss": { 112 | "address.geocode": { 113 | "origin": { 114 | "lat": -37.8, 115 | "lon": 145 116 | }, 117 | "scale": "1000km" 118 | } 119 | } 120 | } 121 | ] 122 | } 123 | } 124 | } 125 | 126 | # NEW to Elasticsearch 7.2 127 | # Score by distance using "distance_feature" 128 | # Potentially more efficient than "function_score" 129 | # unless "track_total_hits" is set to true. 130 | GET /listings/_search 131 | { 132 | "query": { 133 | "distance_feature": { 134 | "field": "address.geocode", 135 | "pivot": "1000km", 136 | "origin": [ 137 | 145, 138 | -37.8 139 | ] 140 | } 141 | } 142 | } 143 | 144 | # Polygon search. 145 | GET /listings/_search 146 | { 147 | "query": { 148 | "geo_polygon": { 149 | "address.geocode": { 150 | "points": [ 151 | { 152 | "lat": -37, 153 | "lon": 144 154 | }, 155 | { 156 | "lat": -37, 157 | "lon": 145 158 | }, 159 | { 160 | "lat": -40, 161 | "lon": 145 162 | } 163 | ] 164 | } 165 | } 166 | } 167 | } 168 | 169 | -------------------------------------------------------------------------------- /09b-geo-point-exercises.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch Hands On Part 2 Exercises 2 | 3 | See ./childcare-centres/README.md 4 | 5 | 6 | -------------------------------------------------------------------------------- /10a-geo-shape.txt: -------------------------------------------------------------------------------- 1 | # Geo Shape: 2 | # - Good for storing shapes and points 3 | # - Supports multi-polygons 4 | # - Supports queries like: "Does this shape over-lap this other shape" 5 | # - Has no aggregation support 6 | # - Stored as a "triangle mesh" in a "BKD tree" 7 | 8 | DELETE /listings 9 | 10 | # Declare a geo-shape type for the address. 11 | PUT /listings 12 | { 13 | "mappings": { 14 | "properties": { 15 | "address": { 16 | "type": "object", 17 | "properties": { 18 | "fullAddress": {"type": "text"}, 19 | "geocode": {"type": "geo_shape"} 20 | } 21 | } 22 | } 23 | } 24 | } 25 | 26 | 27 | # Index example docs 28 | # Using the geo-shape array format 29 | # NOTE HOW THE LONGITUDE COMES BEFORE THE LATITUDE 30 | PUT /listings/_doc/1 31 | { 32 | "address": { 33 | "fullAddress": "511 Church St, Richmond, VIC 3121", 34 | "geocode": { 35 | "type" : "point", 36 | "coordinates" : [144.995, -37.829] 37 | } 38 | } 39 | } 40 | 41 | PUT /listings/_doc/2 42 | { 43 | "address": { 44 | "fullAddress": "Kirribilli House, Kiribilli Ave, Kirribilli NSW 2061", 45 | "geocode": { 46 | "type" : "point", 47 | "coordinates" : [151.217, -33.851] 48 | } 49 | } 50 | } 51 | 52 | 53 | # Geo-shape query (polygon) 54 | GET /listings/_search 55 | { 56 | "query": { 57 | "geo_shape": { 58 | "address.geocode": { 59 | "shape": { 60 | "type": "polygon", 61 | "coordinates": [[[144,-37], [145,-37], [145, -40], [144,-37]]] 62 | }, 63 | "relation": "within" 64 | } 65 | } 66 | } 67 | } 68 | 69 | 70 | # INTERSECTS - (default) Return all documents whose geo_shape field intersects the query geometry. 71 | # DISJOINT - Return all documents whose geo_shape field has nothing in common with the query geometry. 72 | # WITHIN - Return all documents whose geo_shape field is within the query geometry. 73 | # CONTAINS - Return all documents whose geo_shape field contains the query geometry (since 7.5) 74 | -------------------------------------------------------------------------------- /10b-geo-shape-excercises.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch Hands On Part 2 Exercises 2 | 3 | Modify the childcare-centres example to use geo-shapes instead of geo-points. 4 | 5 | You will need to: 6 | 7 | In ../childcare-centres/mapping-template.json 8 | - Replace "geo_point" with "geo_shape" 9 | 10 | In ../childcare-centres/logstash/logstash.config 11 | - Replace the "mutate" section with the following two mutate sections: 12 | 13 | ``` 14 | mutate { 15 | add_field => { "[geocode][type]" => "point" } 16 | add_field => { "[geocode][coordinates]" => ["%{[lon]}", "%{[lat]}"] } 17 | } 18 | 19 | # This converts all the array elements from strings to floats. 20 | mutate { 21 | convert => { "[geocode][coordinates]" => "float" } 22 | } 23 | ``` 24 | 25 | See https://www.elastic.co/guide/en/logstash/current/plugins-filters-mutate.html#plugins-filters-mutate-add_field 26 | It should generate something a little like this. 27 | Note how the longitude comes before the latitide. This is standard Geo Json. 28 | 29 | ``` 30 | "geocode": { 31 | "type" : "point", 32 | "coordinates" : [144.995, -37.829] 33 | } 34 | ``` 35 | 36 | Re-run the ../childcare-centres/feed-index.sh script. 37 | 38 | Use a geo-shape query to find the three child care centres within the following bounding box: 39 | 40 | Top Left: 41 | 42 | - Lat: -37.81 43 | - Lon: 144.96 44 | 45 | Bottom Right: 46 | - Lat: -37.82 47 | - Lon: 144.97 48 | -------------------------------------------------------------------------------- /11a-term-suggesters.txt: -------------------------------------------------------------------------------- 1 | DELETE addresses 2 | 3 | POST /addresses/_bulk 4 | { "index" : {} } 5 | { "address": "31 Smith St, Brunswick, VIC 3056" } 6 | { "index" : {} } 7 | { "address": "25 Smith St, Brunswick, VIC 3056" } 8 | { "index" : {} } 9 | { "address": "25 Smyth St, Coburg, VIC 3058" } 10 | { "index" : {} } 11 | { "address": "55 Station St, Fitzroy, VIC 3065" } 12 | 13 | # Suggest alternative spellings of words that have no matches 14 | # Note: computed per shard and so can result in suggestions when the 15 | # alternative spelling is in a shard that doesn't have the spelling provided. 16 | GET /addresses/_search 17 | { 18 | "suggest": { 19 | "text": "30 Smyth St, Brunwick, VIC 3058", 20 | "my-suggestion": { 21 | "term": { 22 | "field": "address" 23 | } 24 | } 25 | } 26 | } 27 | 28 | 29 | # Find suggestions even for terms that match (e.g. "Smith" and "Smyth") 30 | # max_term_freq - used for ignoring "common" terms. Defaults to 0.01 31 | GET /addresses/_search 32 | { 33 | "suggest": { 34 | "text": "30 Smith St, Brunwick, VIC 3058", 35 | "my-suggestion": { 36 | "term": { 37 | "field": "address", 38 | "suggest_mode": "always", 39 | "max_term_freq": 0.75 40 | } 41 | } 42 | } 43 | } 44 | 45 | # Only suggest if the term is more popular 46 | GET /addresses/_search 47 | { 48 | "suggest": { 49 | "text": "30 Smith St, Brunswick, VIC 3058", 50 | "my-suggestion": { 51 | "term": { 52 | "field": "address", 53 | "suggest_mode": "popular", 54 | "max_term_freq": 0.75 55 | } 56 | } 57 | } 58 | } 59 | 60 | # Perform a search and a suggest query in a single combined request 61 | GET /addresses/_search 62 | { 63 | "query": { 64 | "match": { 65 | "address": { 66 | "query": "30 Smyth Brunwick VIC 3058", 67 | "operator": "and" 68 | } 69 | } 70 | }, 71 | "suggest": { 72 | "text": "30 Smyth Brunwick VIC 3058", 73 | "my-street-suggestions": { 74 | "term": { 75 | "field": "address", 76 | "suggest_mode": "always", 77 | "max_term_freq": 0.75 78 | } 79 | } 80 | } 81 | } 82 | 83 | -------------------------------------------------------------------------------- /11b-term-suggesters-excercises.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch Hands On Part 2 Exercises 2 | 3 | Try the following query (assuming you still have the data from ./childcare-centres) 4 | 5 | **Note: don't expect to get any matches** 6 | 7 | ``` 8 | GET /childcare-centres/_search 9 | { 10 | "query": { 11 | "match": { 12 | "name": { 13 | "query": "Powlet Reserve Center", 14 | "operator": "and" 15 | } 16 | } 17 | } 18 | } 19 | ``` 20 | 21 | Add a "suggest" section to provide alternative spellings. 22 | 23 | 24 | -------------------------------------------------------------------------------- /12a-completion-suggesters.txt: -------------------------------------------------------------------------------- 1 | 2 | # As an alternative, see "search as you type" 3 | # https://www.elastic.co/guide/en/elasticsearch/reference/7.13/search-as-you-type.html 4 | 5 | DELETE addresses 6 | 7 | # Declare a "suggest" field to be of type "completion" 8 | PUT /addresses 9 | { 10 | "mappings": { 11 | "properties": { 12 | "my-suggest": { 13 | "type": "completion", 14 | "analyzer": "standard" 15 | } 16 | } 17 | } 18 | } 19 | 20 | # Ingest some data 21 | PUT addresses/_doc/1 22 | { 23 | "my-suggest": { 24 | "input": "50 Smith St, Brunswick, VIC 3056", 25 | "weight": 42 26 | } 27 | } 28 | 29 | PUT addresses/_doc/2 30 | { 31 | "my-suggest": { 32 | "input": "53 Green St, Brunswick, VIC 3056", 33 | "weight": 10 34 | } 35 | } 36 | 37 | PUT addresses/_doc/3 38 | { 39 | "my-suggest": { 40 | "input": "51 Green St, Brunswick, VIC 3056", 41 | "weight": 5 42 | } 43 | } 44 | 45 | # Request completions of the text "50 smi" 46 | GET addresses/_search 47 | { 48 | "suggest": { 49 | "my-suggest-query": { 50 | "prefix": "50 smi", 51 | "completion": { 52 | "field": "my-suggest" 53 | } 54 | } 55 | } 56 | } 57 | 58 | # Ordering is by "weight" 59 | GET addresses/_search 60 | { 61 | "suggest": { 62 | "my-suggest-query": { 63 | "prefix": "5", 64 | "completion": { 65 | "field": "my-suggest" 66 | } 67 | } 68 | } 69 | } 70 | 71 | # Fuzzy matching 72 | GET addresses/_search 73 | { 74 | "suggest": { 75 | "my-suggest-query": { 76 | "prefix": "50 smy", 77 | "completion": { 78 | "field": "my-suggest", 79 | "fuzzy": { 80 | "fuzziness": 1 81 | } 82 | } 83 | } 84 | } 85 | } 86 | 87 | 88 | # Exact matches are boosted (only since 5.0) 89 | GET addresses/_search 90 | { 91 | "suggest": { 92 | "my-suggest-query": { 93 | "prefix": "51 Gre", 94 | "completion": { 95 | "field": "my-suggest", 96 | "fuzzy": { 97 | "fuzziness": 1 98 | } 99 | } 100 | } 101 | } 102 | } 103 | 104 | # By default, will not fuzzy match on first letter. 105 | GET addresses/_search 106 | { 107 | "suggest": { 108 | "my-suggest-query": { 109 | "prefix": "61 Gre", 110 | "completion": { 111 | "field": "my-suggest", 112 | "fuzzy": { 113 | "fuzziness": 1 114 | } 115 | } 116 | } 117 | } 118 | } 119 | 120 | # Can force fuzzy matching on the first letter via prefix-length 121 | GET addresses/_search 122 | { 123 | "suggest": { 124 | "my-suggest-query": { 125 | "prefix": "61 Gre", 126 | "completion": { 127 | "field": "my-suggest", 128 | "fuzzy": { 129 | "fuzziness": 1, 130 | "prefix_length": 0 131 | } 132 | } 133 | } 134 | } 135 | } 136 | 137 | # Go to https://www.realestate.com.au/property and type "3 Anderson Street, Bent" 138 | # Note how it matches "1-3 Anderson Street, Bentleigh, Vic 3204" 139 | # This is not via fuzzy matching, but instead via multiple "input" variations 140 | # Variations can be supported by using an array 141 | PUT addresses/_doc/10 142 | { 143 | "my-suggest": { 144 | "input": [ 145 | "1-3 Anderson Street, Bentleigh, Vic 3204", 146 | "1 Anderson Street, Bentleigh, Vic 3204", 147 | "3 Anderson Street, Bentleigh, Vic 3204" 148 | ] 149 | } 150 | } 151 | 152 | GET addresses/_search 153 | { 154 | "suggest": { 155 | "my-suggest-query": { 156 | "prefix": "3 Ander", 157 | "completion": { 158 | "field": "my-suggest" 159 | } 160 | } 161 | } 162 | } 163 | 164 | # The document can include other useful information: 165 | PUT addresses/_doc/10 166 | { 167 | "my-suggest": { 168 | "input": [ 169 | "1-3 Anderson Street, Bentleigh, Vic 3204", 170 | "1 Anderson Street, Bentleigh, Vic 3204", 171 | "3 Anderson Street, Bentleigh, Vic 3204" 172 | ] 173 | }, 174 | "address": "1-3 Anderson Street, Bentleigh, Vic 3204", 175 | "status": "For sale" 176 | } 177 | 178 | GET addresses/_search 179 | { 180 | "suggest": { 181 | "my-suggest-query": { 182 | "prefix": "3 Ander", 183 | "completion": { 184 | "field": "my-suggest" 185 | } 186 | } 187 | } 188 | } 189 | 190 | 191 | # If you don't care about weights and variations, you can use a simpler document format 192 | PUT addresses/_doc/10 193 | { 194 | "my-suggest": "511 Church St, Richmond, Vic 3121" 195 | } 196 | 197 | GET addresses/_search 198 | { 199 | "suggest": { 200 | "my-suggest-query": { 201 | "prefix": "511 Chur", 202 | "completion": { 203 | "field": "my-suggest" 204 | } 205 | } 206 | } 207 | } 208 | 209 | # Weights are 24 bit precision floats. 210 | 211 | # Declare a "suggest" field to be of type "completion" 212 | DELETE /foo 213 | 214 | PUT /foo 215 | { 216 | "mappings": { 217 | "properties": { 218 | "foo": { 219 | "type": "completion" 220 | } 221 | } 222 | } 223 | } 224 | 225 | POST /foo/_bulk 226 | {"index":{}} 227 | {"foo": {"input": "aa", "weight": 1000000001}} 228 | {"index":{}} 229 | {"foo": {"input": "ab", "weight": 1000000004}} 230 | {"index":{}} 231 | {"foo": {"input": "ac", "weight": 1000000003}} 232 | {"index":{}} 233 | {"foo": {"input": "ad", "weight": 1000000002}} 234 | 235 | # Note how they all get a score of 1000000000 due to floating point rounding: 236 | GET /foo/_search 237 | { 238 | "suggest": { 239 | "my-suggest-query": { 240 | "prefix": "a", 241 | "completion": { 242 | "field": "foo" 243 | } 244 | } 245 | } 246 | } 247 | -------------------------------------------------------------------------------- /12b-completion-suggesters-exercises.md: -------------------------------------------------------------------------------- 1 | # Elasticsearch Hands On Part 2 Exercises 2 | 3 | Modify the index mapping and logstash.config 4 | for the childcare-centres index so that 5 | it can support completion suggestion queries 6 | on the child care centres' names. 7 | 8 | -------------------------------------------------------------------------------- /13a-percolator.txt: -------------------------------------------------------------------------------- 1 | DELETE /listings 2 | 3 | # Declare a listings index 4 | PUT /listings 5 | { 6 | "mappings": { 7 | "properties": { 8 | "description": {"type": "text", "analyzer": "english"}, 9 | "suburb": {"type": "keyword"}, 10 | "bedrooms": {"type": "integer"} 11 | } 12 | } 13 | } 14 | 15 | # Insert a couple of listings 16 | PUT /listings/_doc/1000 17 | { 18 | "suburb": "Coburg VIC 3058", 19 | "bedrooms": 1, 20 | "description": "Above ground swimming pool. Spacious kitchen" 21 | } 22 | 23 | PUT /listings/_doc/1001 24 | { 25 | "suburb": "Brunswick VIC 3056", 26 | "bedrooms": 4, 27 | "description": "Garage, swimming pool, and close to schools" 28 | } 29 | 30 | 31 | # Perform a query 32 | 33 | GET /listings/_search 34 | { 35 | "query": { 36 | "bool": { 37 | "filter": [ 38 | { 39 | "term": { 40 | "suburb": "Brunswick VIC 3056" 41 | } 42 | }, 43 | { 44 | "term": { 45 | "bedrooms": 4 46 | } 47 | }, 48 | { 49 | "match_phrase": { 50 | "description": "swim pool" 51 | } 52 | } 53 | ] 54 | } 55 | } 56 | } 57 | 58 | # An index in which to save your queries 59 | # NOTE: You need to duplicate the mappings for listings 60 | DELETE /saved-searches 61 | 62 | PUT /saved-searches 63 | { 64 | "mappings": { 65 | "properties": { 66 | "user-id": {"type": "keyword"}, 67 | "my-saved-query-name": {"type": "keyword"}, 68 | "my-query-field": {"type": "percolator"}, 69 | 70 | "description": {"type": "text", "analyzer": "english"}, 71 | "suburb": {"type": "keyword"}, 72 | "bedrooms": {"type": "integer"} 73 | } 74 | } 75 | } 76 | 77 | 78 | # Save a couple of queries 79 | PUT /saved-searches/_doc/1 80 | { 81 | "user-id": "1234", 82 | "my-saved-query-name": "my first saved query", 83 | "my-query-field": { 84 | "bool": { 85 | "filter": [ 86 | { 87 | "term": { 88 | "suburb": "Brunswick VIC 3056" 89 | } 90 | }, 91 | { 92 | "term": { 93 | "bedrooms": 4 94 | } 95 | }, 96 | { 97 | "match_phrase": { 98 | "description": "swim pool" 99 | } 100 | } 101 | ] 102 | } 103 | } 104 | } 105 | 106 | 107 | PUT /saved-searches/_doc/2 108 | { 109 | "user-id": "1234", 110 | "my-saved-query-name": "my second saved query", 111 | "my-query-field": { 112 | "bool": { 113 | "filter": [ 114 | { 115 | "term": { 116 | "suburb": "Coburg VIC 3058" 117 | } 118 | }, 119 | { 120 | "term": { 121 | "bedrooms": 1 122 | } 123 | }, 124 | { 125 | "match_phrase": { 126 | "description": "swim pool" 127 | } 128 | } 129 | ] 130 | } 131 | } 132 | } 133 | 134 | # Now a new listing comes along and we find the saved searches that match that new listing 135 | GET /saved-searches/_search 136 | { 137 | "query": { 138 | "percolate": { 139 | "field": "my-query-field", 140 | "document": { 141 | "suburb": "Brunswick VIC 3056", 142 | "bedrooms": 4, 143 | "description": "Large garden, swimming pool" 144 | } 145 | } 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /13b-percolator-exercises.md: -------------------------------------------------------------------------------- 1 | # Percolator Exercises 2 | 3 | Perform the following to create an index for holding queries to find child care centres. 4 | 5 | Note the field names, types, and declared analysers. 6 | 7 | ``` 8 | PUT /childcare-queries 9 | { 10 | "mappings": { 11 | "properties": { 12 | "saved_query": { 13 | "type": "percolator" 14 | }, 15 | "name": { 16 | "type": "text", 17 | "analyzer": "english" 18 | }, 19 | "contact_ph": { 20 | "type": "keyword" 21 | }, 22 | "geocode": { 23 | "type": "geo_point" 24 | } 25 | } 26 | } 27 | } 28 | ``` 29 | 30 | Save some queries as follows: 31 | 32 | ``` 33 | # Childcare centres whose name includes the word "goat" 34 | PUT /childcare-queries/_doc/1 35 | { 36 | "saved_query": { 37 | "match": { 38 | "name": "goat" 39 | } 40 | } 41 | } 42 | 43 | # Childcare centres whose name includes the word "goodstart" 44 | PUT /childcare-queries/_doc/2 45 | { 46 | "saved_query": { 47 | "match": { 48 | "name": "goodstart" 49 | } 50 | } 51 | } 52 | 53 | # Childcare centres whose name includes the word "goodstart" 54 | # OR is within 40km of the location 38 South, 145 East 55 | PUT /childcare-queries/_doc/3 56 | { 57 | "saved_query": { 58 | "bool": { 59 | "should": [ 60 | { 61 | "match": { 62 | "name": "goodstart" 63 | } 64 | }, 65 | { 66 | "geo_distance": { 67 | "distance": "40km", 68 | "geocode": { 69 | "lat": -38, 70 | "lon": 145 71 | } 72 | } 73 | } 74 | ] 75 | } 76 | } 77 | } 78 | 79 | ``` 80 | 81 | 82 | Use a `percolate` query to find those saved queries that match this document: 83 | 84 | 85 | ```json 86 | { 87 | "name": "Billy Goat's Child Care", 88 | "contact_ph": "03 9999 2222", 89 | "geocode": { 90 | "lat": -37.80932522, 91 | "lon": 144.96026152 92 | } 93 | } 94 | ``` 95 | 96 | 97 | It should look a bit like this.... 98 | 99 | ``` 100 | GET /childcare-queries/_search 101 | { 102 | "query": { 103 | "percolate": { 104 | .... 105 | } 106 | } 107 | ``` 108 | -------------------------------------------------------------------------------- /ElasticsearchOps.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/davidbkemp/advanced-elasticsearch-training/7f6f5b4810bb15dc4136cf9732b918cfe8bbbf60/ElasticsearchOps.pptx -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 REA Group Ltd. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Other-useful-stuff.txt: -------------------------------------------------------------------------------- 1 | # A grab bag of useful stuff 2 | 3 | 4 | ## To move all primary shards off of a node: 5 | 6 | PUT /_cluster/settings 7 | { 8 | "transient" :{ 9 | "cluster.routing.allocation.exclude._name" : "" 10 | } 11 | } 12 | 13 | 14 | ## Documentation on reading/writing documents: 15 | 16 | https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-replication.html 17 | 18 | Of note for writing to Elasticsearch, the write request is forwarded to the primary shard, 19 | but the client doesn't receive a response until it has been replicated to all of 20 | the currently in-sync replicas. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Advanced Elasticsearch Training 2 | 3 | Warning: 4 | This training material has not really been designed for self guidance. 5 | It assumes an instructor will verbally explain a lot of the material. 6 | 7 | This material also assumes that you have already covered the basics including 8 | simple queries, index mappings, relevancy scoring, and custom analysers. 9 | 10 | See, for example, https://jamesfer.me/intro-elasticsearch-training/#1 11 | 12 | ## Topics 13 | 14 | - Nodes, shards, replicas, master nodes 15 | - Query profiling 16 | - Aggregations 17 | - Nested objects 18 | - Parent/child docs 19 | - Field Collapsing 20 | - Geospatial data 21 | - Logstash 22 | - Suggesters 23 | - Controlling scores 24 | - Percolators 25 | 26 | ## Prerequisites 27 | 28 | You need to install various software tools. 29 | 30 | Requirements: 31 | 32 | - Docker (if on OSX, Docker for Mac is recommended) 33 | - docker-compose 34 | 35 | 36 | ## Notes 37 | 38 | There are scripts in the `scripts` directory that use docker-compose to create, stop and destroy an Elasticsearch + Kibana environment. 39 | 40 | If you have previously run a different version of Elasticsearch with these scripts, 41 | especially anything before version 7.17.5 42 | then it may be best to destroy your existing Elasticsearch data with `scripts/destroy`. 43 | 44 | Create an environment with: 45 | ```bash 46 | scripts/up 47 | ``` 48 | 49 | Elasticsearch should now be available on `http://localhost:9200` 50 | 51 | Kibana should now be available on `http://localhost:5601` 52 | 53 | Stop the environment with: 54 | ```bash 55 | scripts/down 56 | ``` 57 | Data is persisted so a subsequent `scripts/up` will start up ES and Kibana with your data still there. 58 | 59 | Cleanup the environment and delete all data with: 60 | ```bash 61 | scripts/destroy 62 | ``` 63 | -------------------------------------------------------------------------------- /additional-material/README.md: -------------------------------------------------------------------------------- 1 | Try out logstash: 2 | 3 | The following will place some documents into an index called `simple-log`: 4 | 5 | ```bash 6 | LOGSTASH_CONFIG_DIR="${PWD}/simple-logstash" ../scripts/logstash < "/var/log/system.log" 8 | codec => multiline { 9 | pattern => "^\s" 10 | what => "previous" 11 | } 12 | } 13 | } 14 | 15 | filter { 16 | grok { 17 | match => { 18 | "message" => "%{MONTH:month} +%{MONTHDAY:day} %{TIME:time} %{HOSTNAME:hostname} +(?[^\[]+)" 19 | } 20 | } 21 | } 22 | 23 | 24 | output { 25 | elasticsearch { 26 | manage_template => false 27 | hosts => ["localhost"] 28 | } 29 | 30 | # stdout { codec => json } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /additional-material/search-as-you-type.txt: -------------------------------------------------------------------------------- 1 | DELETE addresses 2 | 3 | # Declare a "suggest" field to be of type "completion" 4 | PUT /addresses 5 | { 6 | "mappings": { 7 | "properties": { 8 | "my-suggest": { 9 | "type": "search_as_you_type" 10 | } 11 | } 12 | } 13 | } 14 | 15 | # Ingest some data 16 | PUT addresses/_doc/1 17 | { 18 | "my-suggest": "50 Smith St, Brunswick, VIC 3056" 19 | } 20 | 21 | PUT addresses/_doc/2 22 | { 23 | "my-suggest": "53 Green St, Brunswick, VIC 3056" 24 | } 25 | 26 | PUT addresses/_doc/3 27 | { 28 | "my-suggest": "51 Green St, Brunswick, VIC 3056" 29 | } 30 | 31 | # Request completions of the text "10 smi" 32 | GET addresses/_search 33 | { 34 | "query": { 35 | "multi_match": { 36 | "query": "10 smith", 37 | "type": "bool_prefix", 38 | "fields": [ 39 | "my-suggest", 40 | "my-suggest._2gram", 41 | "my-suggest._3gram", 42 | "my-suggest._index_prefix" 43 | ] 44 | } 45 | } 46 | } 47 | 48 | 49 | # Fuzzy matching 50 | GET addresses/_search 51 | { 52 | "query": { 53 | "multi_match": { 54 | "query": "50 smy", 55 | "type": "bool_prefix", 56 | "fields": [ 57 | "my-suggest", 58 | "my-suggest._2gram", 59 | "my-suggest._3gram", 60 | "my-suggest._index_prefix" 61 | ] 62 | } 63 | } 64 | } 65 | 66 | # Exact matches are boosted 67 | GET addresses/_search 68 | { 69 | "query": { 70 | "multi_match": { 71 | "query": "51 Gre", 72 | "type": "bool_prefix", 73 | "fields": [ 74 | "my-suggest", 75 | "my-suggest._2gram", 76 | "my-suggest._3gram", 77 | "my-suggest._index_prefix" 78 | ] 79 | } 80 | } 81 | } 82 | 83 | -------------------------------------------------------------------------------- /additional-material/simple-logstash/simple-logstash.config: -------------------------------------------------------------------------------- 1 | # Logstash config for ingesting standard-in into elasticsearch without any processing 2 | 3 | input { stdin { } } 4 | 5 | 6 | output { 7 | elasticsearch { 8 | manage_template => false 9 | hosts => ["localhost"] 10 | index => "simple-log" 11 | document_type => "item" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /childcare-centres/Childcare_Centres.csv: -------------------------------------------------------------------------------- 1 | name,lat,lon,contact_ph,url,ref,location 2 | Powlett Reserve Childrens Centre,-37.81152228,144.98850987,9428 0896,,530624,"(-37.81152228, 144.98850987)" 3 | Lady Huntingfield Children's Centre,-37.7984345,144.94495729,03 9328 2083,,104584,"(-37.7984345, 144.94495729)" 4 | Kool Kidz@Goods Shed Docklands,-37.82020176,144.9507406,03 9614 2900,,651853,"(-37.82020176, 144.9507406)" 5 | Hotham Hub Children's Centre,-37.79271146,144.94202167,03 9326 8655,,617536,"(-37.79271146, 144.94202167)" 6 | Kids On Collins,-37.81871725,144.95487441,03 9629 4099,http://littleflyers.vic.edu.au/locations/kids-on-c,507495,"(-37.81871725, 144.95487441)" 7 | Only About Children,-37.82180897,144.96287085,,,612634,"(-37.82180897, 144.96287085)" 8 | Little Assets,-37.82227454,144.95359746,03 9620 2887,http://www.littleassets.com.au,667449,"(-37.82227454, 144.95359746)" 9 | Penguin Childcare,-37.82047019,144.9488124,03 9600 3456,http://www.penguinchildcare.com.au,632666,"(-37.82047019, 144.9488124)" 10 | Bambini Early Learning Centre,-37.79545923,144.95025171,13 0094 5745,http://www.bambinielc.com.au,661102,"(-37.79545923, 144.95025171)" 11 | Try Youth & Community Child Care Centre,-37.78217027,144.91605471,03 9347 2655,http://www.tryyouth.org.au,629135,"(-37.78217027, 144.91605471)" 12 | Thinking Kids Children's Centre,-37.77888928,144.94321742,03 8481 1100,http://www.ecms.org.au/thinking-kids-childrens-centre,633400,"(-37.77888928, 144.94321742)" 13 | Co.As.It Tomasso Fiaschi Child Care Centre,-37.79601466,144.97410803,03 9347 1484,http://www.coasit.com.au,109056,"(-37.79601466, 144.97410803)" 14 | Central Carlton Childrens Centre,-37.7940351,144.9694206,03 9347 4788,http://www.dhs.vic.gov.au,102696,"(-37.7940351, 144.9694206)" 15 | The Harbour Family and Childrens Centre,-37.82054335,144.94306719,03 8624 1000,http://www.gowrie-melbourne.com.au,619559,"(-37.82054335, 144.94306719)" 16 | Goodstart Early Learning Melbourne Flinders Street,-37.81794575,144.96528614,03 9671 3539,http://www.goodstart.org.au,592707,"(-37.81794575, 144.96528614)" 17 | Lemon Tree Children's Centre,-37.8008196,144.96956172,03 9347 4949,http://www.lemontreecc.com.au,104488,"(-37.8008196, 144.96956172)" 18 | Little Stars At Bourke Children Centre,-37.81530033,144.95903479,03 8622 1400,http://www.ku.com.au/ku.littlestars,639691,"(-37.81530033, 144.95903479)" 19 | RMIT University Childcare Centre,-37.80805707,144.96131546,03 9662 1295,http://www.lab.3000.com.au,547408,"(-37.80805707, 144.96131546)" 20 | North Melbourne Maternal & Child Health Centre,-37.79307895,144.94051513,03 9320 4724,http://www.melbourne.vic.gov.au,617520,"(-37.79307895, 144.94051513)" 21 | Mission Australia Early Learning,-37.82895695,144.96968174,18 0075 7343,http://www.missionaustralia.com.au,664202,"(-37.82895695, 144.96968174)" 22 | Sentia Early Learning,-37.81919421,144.95964143,03 9629 9860,http://www.sentia.net.au,641425,"(-37.81919421, 144.95964143)" 23 | Early Learning Centre,-37.83119287,144.98610383,03 9862 9200,http://www.mggs.vic.edu.au,109894,"(-37.83119287, 144.98610383)" 24 | Headstart Early Learning East Melbourne,-37.81778068,144.9789247,03 9639 9778,http://www.headstartelc.com.au,546191,"(-37.81778068, 144.9789247)" 25 | Wimble Street Child Care Co-Operative,-37.79499106,144.95595487,03 9347 9103,http://www.wimblest.com.au,110181,"(-37.79499106, 144.95595487)" 26 | Melbourne University Family Club Co-Operative,-37.79610216,144.96605617,03 9347 3518,http://www.familyclub.org.au,101626,"(-37.79610216, 144.96605617)" 27 | University Of Melbourne Child Care,-37.79625086,144.96487025,03 9035 4300,http://www.pb.unimelb.edu.au/ehs/,109351,"(-37.79625086, 144.96487025)" 28 | Melbourne City Childcare And Kindergarten,-37.81834866,144.95615597,03 9629 4099,http://www.littleflyers.vic.edu.au,595596,"(-37.81834866, 144.95615597)" 29 | Little Stars @ Southern Cross,-37.81788857,144.95002508,03 9670 8278,http://www.ku.com.au,647066,"(-37.81788857, 144.95002508)" 30 | Fawkner Park Childrens Centre Co Operative,-37.83755884,144.9797901,9820 2758,http://www.fawknerpark.com,579282,"(-37.83755884, 144.9797901)" 31 | Kensington Community Children's Co-Operative,-37.79710602,144.92552634,03 9376 4565,http://www.kccc.org.au,563554,"(-37.79710602, 144.92552634)" 32 | MAMA Midwives and Mothers Australia,-37.79241689,144.91946965,03 9376 7474,http://www.midwivesandmothers.com.au,104202,"(-37.79241689, 144.91946965)" 33 | Queensberry Childrens Centre,-37.80416898,144.96092293,03 8344 9621,http://www.pb.unimelb.edu.au/ehs/,579072,"(-37.80416898, 144.96092293)" 34 | North Melbourne Maternal & Child Health Centre,-37.79436687,144.94682844,03 9320 4724,http://www.melbourne.vic.gov.au,100083,"(-37.79436687, 144.94682844)" 35 | Goodstart Childcare,-37.81023722,144.96241708,03 9663 2881,http://www.goodstart.org.au,601469,"(-37.81023722, 144.96241708)" 36 | Curzon Street Childrens Centre,-37.80328788,144.94790431,9328 3127,http://www.wesley.org.au,509120,"(-37.80328788, 144.94790431)" 37 | QV Children's Centre,-37.81042484,144.96662283,03 8616 0350,http://www.dexus.com.au,589843,"(-37.81042484, 144.96662283)" 38 | Yarra Park Children's Centre,-37.81862706,144.98854292,9428 0896,http://emcc.org.au/yarra.php,101064,"(-37.81862706, 144.98854292)" 39 | Alfred Child Care Centre,-37.84664583,144.98114769,03 9076 2000,http://www.ku.com.au,587207,"(-37.84664583, 144.98114769)" 40 | Jumbo Early Education,-37.82495079,144.94978342,03 9099 1360,http://www.jumboearlyeducation.com.au,626588,"(-37.82495079, 144.94978342)" 41 | St Catherines Early Learing Centre,-37.79622515,144.94433562,03 9320 2560,http://www.stcatherines-eec.com.au,504891,"(-37.79622515, 144.94433562)" 42 | Penguin Childcare Parkville,-37.77819829,144.93969096,03 9380 4333,http://www.penguinchildcare.com.au,610471,"(-37.77819829, 144.93969096)" 43 | North Melbourne Children's Centre,-37.80458408,144.9540423,03 9328 1592,http://www.careforkids.com.au,105036,"(-37.80458408, 144.9540423)" 44 | Melbourne City Child Care Centre,-37.80932522,144.96026152,03 9329 9561,http://www.melbourne.vic.gov.au,100168,"(-37.80932522, 144.96026152)" 45 | -------------------------------------------------------------------------------- /childcare-centres/README.md: -------------------------------------------------------------------------------- 1 | # Childcare centres 2 | 3 | If you are using Docker, then simply run 4 | 5 | ```bash 6 | ./feed-index.sh 7 | ``` 8 | 9 | Now write a query to find child care centres within 1km of the Melbourne GPO 10 | 11 | - Lat: -37.8138932 12 | - Lon: 144.9611874 13 | 14 | 15 | NOTE: 16 | Childcare_Centres.csv is from https://data.melbourne.vic.gov.au/Assets-Infrastructure/Childcare-Centres/rdi4-j8zh 17 | 18 | ## If not using Docker 19 | 20 | You will need to have installed Logstash. 21 | 22 | If you already have a `childcare-centres` index then you will need to manually delete it. 23 | 24 | ``` 25 | DELETE /childcare-centres 26 | ``` 27 | 28 | Then, use logstash to ingest the data. 29 | 30 | Note the use of `--pipeline.workers 1` to work around a concurrency problem with the csv input. 31 | See https://discuss.elastic.co/t/event-dependent-configuration-broken-with-multiple-pipelines/141869 32 | 33 | ``` 34 | XPACK_MONITORING_ENABLED=false \ 35 | INDEX=childcare-centres \ 36 | ELASTICSEARCH_URL='http://localhost:9200' \ 37 | TEMPLATE_PATH="$PWD/mapping-template.json" \ 38 | logstash --pipeline.workers 1 -f ./logstash/logstash.config \ 39 | < Childcare_Centres.csv 40 | ``` 41 | 42 | NOTE: If you are not using a locally running Elasticsearch server (on port 9200), 43 | then you will need to set the `ELASTICSEARCH_URL`. 44 | Make sure you include a port number, even if it is the default port 80 or 443 for `http` and `https` 45 | 46 | e.g. 47 | 48 | ``` 49 | ELASTICSEARCH_URL='https://search-elasticsearch-training-abcdef.ap-southeast-2.es.amazonaws.com:443' ./feed-index.sh 50 | ``` 51 | -------------------------------------------------------------------------------- /childcare-centres/feed-index.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euv -o pipefail 4 | 5 | INDEX=${INDEX:-childcare-centres} 6 | 7 | SRC_DIR="$PWD/$(dirname $0)" 8 | MOUNT_DIR="/usr/share/logstash/childcare-centres" 9 | 10 | ELASTICSEARCH_URL=${ELASTICSEARCH_URL:-http://localhost:9200} 11 | 12 | echo "Delete the index if it already exists" 13 | curl -XDELETE "$ELASTICSEARCH_URL/$INDEX?pretty=true" ||: 14 | 15 | # Need to set pipeline.workers to work around bugs in the CSV filter :-( 16 | # See https://discuss.elastic.co/t/event-dependent-configuration-broken-with-multiple-pipelines/141869 17 | PIPELINE_WORKERS=1 18 | 19 | docker run \ 20 | -v "$SRC_DIR":"$MOUNT_DIR" \ 21 | -e XPACK_MONITORING_ENABLED=false \ 22 | -e ELASTICSEARCH_URL="$ELASTICSEARCH_URL" \ 23 | -e INDEX="$INDEX" \ 24 | -e TEMPLATE_PATH="$MOUNT_DIR/mapping-template.json" \ 25 | --rm -i \ 26 | --network="container:elasticsearch-hands-on-2" \ 27 | docker.elastic.co/logstash/logstash:7.17.5 \ 28 | --pipeline.workers $PIPELINE_WORKERS \ 29 | --path.config "$MOUNT_DIR/logstash" \ 30 | < "$SRC_DIR/Childcare_Centres.csv" 31 | -------------------------------------------------------------------------------- /childcare-centres/logstash/logstash.config: -------------------------------------------------------------------------------- 1 | # Logstash config for ingesting Childcare_Centres.csv 2 | 3 | input { stdin { } } 4 | 5 | filter { 6 | 7 | csv { 8 | autodetect_column_names => true 9 | } 10 | 11 | # name,lat,lon,contact_ph,url,ref,location, geocode{lat, lon} 12 | 13 | mutate { 14 | add_field => { "[geocode][lat]" => "%{[lat]}" } 15 | add_field => { "[geocode][lon]" => "%{[lon]}" } 16 | } 17 | } 18 | 19 | # output { stdout { codec => json } } 20 | 21 | output { 22 | elasticsearch { 23 | manage_template => true 24 | template_overwrite => true 25 | template => "${TEMPLATE_PATH}" 26 | hosts => ["${ELASTICSEARCH_URL}"] 27 | index => "${INDEX}" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /childcare-centres/mapping-template.json: -------------------------------------------------------------------------------- 1 | { 2 | "index_patterns": ["childcare-centres*"], 3 | "mappings": { 4 | "properties": { 5 | "geocode": { 6 | "type": "geo_point" 7 | } 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /docker-compose.multi-node.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | elasticsearch1: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:7.17.5 6 | container_name: elasticsearch1 7 | environment: 8 | - cluster.name=docker-cluster 9 | - node.name=elasticsearch1 10 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 11 | - xpack.monitoring.collection.enabled=true 12 | - xpack.monitoring.exporters.remote.type=http 13 | - xpack.monitoring.exporters.remote.host=monitor 14 | - discovery.seed_hosts=elasticsearch1,elasticsearch2,elasticsearch3 15 | - cluster.initial_master_nodes=elasticsearch1,elasticsearch2,elasticsearch3 16 | - xpack.security.enabled=false 17 | ports: 18 | - "19200:9200" 19 | 20 | elasticsearch2: 21 | image: docker.elastic.co/elasticsearch/elasticsearch:7.17.5 22 | container_name: elasticsearch2 23 | environment: 24 | - cluster.name=docker-cluster 25 | - node.name=elasticsearch2 26 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 27 | - xpack.monitoring.collection.enabled=true 28 | - xpack.monitoring.exporters.remote.type=http 29 | - xpack.monitoring.exporters.remote.host=monitor 30 | - discovery.seed_hosts=elasticsearch1,elasticsearch2,elasticsearch3 31 | - cluster.initial_master_nodes=elasticsearch1,elasticsearch2,elasticsearch3 32 | - xpack.security.enabled=false 33 | ports: 34 | - "29200:9200" 35 | 36 | elasticsearch3: 37 | image: docker.elastic.co/elasticsearch/elasticsearch:7.17.5 38 | container_name: elasticsearch3 39 | environment: 40 | - cluster.name=docker-cluster 41 | - node.name=elasticsearch3 42 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 43 | - xpack.monitoring.collection.enabled=true 44 | - xpack.monitoring.exporters.remote.type=http 45 | - xpack.monitoring.exporters.remote.host=monitor 46 | - discovery.seed_hosts=elasticsearch1,elasticsearch2,elasticsearch3 47 | - cluster.initial_master_nodes=elasticsearch1,elasticsearch2,elasticsearch3 48 | - xpack.security.enabled=false 49 | ports: 50 | - "39200:9200" 51 | 52 | monitor: 53 | image: docker.elastic.co/elasticsearch/elasticsearch:7.17.5 54 | container_name: monitor 55 | environment: 56 | - cluster.name=docker-monitor-cluster 57 | - node.name=monitor 58 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 59 | - xpack.monitoring.collection.enabled=false 60 | - discovery.seed_hosts=monitor 61 | - cluster.initial_master_nodes=monitor 62 | - xpack.security.enabled=false 63 | ports: 64 | - "9200:9200" 65 | 66 | kibana-devtools: 67 | image: docker.elastic.co/kibana/kibana:7.17.5 68 | container_name: kibana-devtools 69 | environment: 70 | ELASTICSEARCH_HOSTS: http://elasticsearch3:9200 71 | ports: 72 | - "15601:5601" 73 | 74 | kibana-dashboard: 75 | image: docker.elastic.co/kibana/kibana:7.17.5 76 | container_name: kibana-dashboard 77 | environment: 78 | ELASTICSEARCH_HOSTS: http://monitor:9200 79 | ports: 80 | - "25601:5601" 81 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | 3 | services: 4 | elasticsearch: 5 | image: docker.elastic.co/elasticsearch/elasticsearch:7.17.5 6 | container_name: elasticsearch-hands-on-2 7 | environment: 8 | - "ES_JAVA_OPTS=-Xms512m -Xmx512m" 9 | - "discovery.type=single-node" 10 | - xpack.monitoring.collection.enabled=true 11 | - cluster.routing.allocation.disk.threshold_enabled=false 12 | - xpack.security.enabled=false 13 | ports: 14 | - "9200:9200" 15 | 16 | kibana: 17 | image: docker.elastic.co/kibana/kibana:7.17.5 18 | environment: 19 | - "ELASTICSEARCH_HOSTS=http://elasticsearch-hands-on-2:9200" 20 | - xpack.security.enabled=false 21 | ports: 22 | - "5601:5601" 23 | depends_on: 24 | - elasticsearch 25 | 26 | -------------------------------------------------------------------------------- /scripts/destroy: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | # 3 | # Delete elasticsearch and kibana+sense along with all data 4 | 5 | cd "$(dirname $0)"/.. 6 | 7 | docker-compose -f docker-compose.multi-node.yml down --volumes --remove-orphans 8 | docker-compose down --volumes --remove-orphans 9 | -------------------------------------------------------------------------------- /scripts/down: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | # 3 | # Stop elasticsearch and kibana+sense but keep data 4 | 5 | cd "$(dirname $0)"/.. 6 | 7 | docker-compose -f docker-compose.multi-node.yml down 8 | docker-compose down 9 | -------------------------------------------------------------------------------- /scripts/logstash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # USAGE: 4 | # LOGSTASH_CONFIG_DIR= ./logstash 5 | 6 | set -euv -o pipefail 7 | 8 | ELASTICSEARCH_URL=${ELASTICSEARCH_URL:-http://localhost:9200} 9 | 10 | # Need to set pipeline.workers to work around bugs in the CSV filter :-( 11 | # See https://discuss.elastic.co/t/event-dependent-configuration-broken-with-multiple-pipelines/141869 12 | PIPELINE_WORKERS=1 13 | 14 | docker run -e XPACK_MONITORING_ENABLED=false \ 15 | -e ELASTICSEARCH_URL="$ELASTICSEARCH_URL" \ 16 | --rm -i \ 17 | -v "${LOGSTASH_CONFIG_DIR}":/usr/share/logstash/pipeline/ \ 18 | --network="container:elasticsearch-hands-on-2" \ 19 | docker.elastic.co/logstash/logstash:7.17.5 \ 20 | --pipeline.workers $PIPELINE_WORKERS 21 | -------------------------------------------------------------------------------- /scripts/up: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | # 3 | # Run elasticsearch and kibana+sense 4 | 5 | cd "$(dirname $0)"/.. 6 | 7 | trap "docker-compose down --volumes --remove-orphans" 0 8 | 9 | docker-compose up 10 | -------------------------------------------------------------------------------- /scripts/up-multi-node: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | # 3 | # Run a multi-node cluster 4 | 5 | cd "$(dirname $0)"/.. 6 | 7 | trap "docker-compose down --volumes --remove-orphans" 0 8 | 9 | #docker-compose -f docker-compose.multi-node.yml up -d 10 | docker-compose -f docker-compose.multi-node.yml up 11 | -------------------------------------------------------------------------------- /solutions/03-multi-fields-solutions.txt: -------------------------------------------------------------------------------- 1 | # Look up "match_phrase" and use a "match_phrase" query that only matches "1 Church St" 2 | # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query-phrase.html 3 | 4 | GET /listings/_search 5 | { 6 | "query": { 7 | "match_phrase": { 8 | "address": "1 Church St" 9 | } 10 | } 11 | } 12 | 13 | # Use a "bool" query to find listings that match "1 Church St" after English stemming, 14 | # but gives a higher score (by using "should") to those listings that match without stemming 15 | 16 | GET /listings/_search 17 | { 18 | "query": { 19 | "bool": { 20 | "filter": [ 21 | { 22 | "match_phrase": { 23 | "address.a": "1 Church St" 24 | } 25 | } 26 | ], 27 | "should": [ 28 | { 29 | "match_phrase": { 30 | "address": "1 Church St" 31 | } 32 | } 33 | ] 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /solutions/04-function-score-solutions.txt: -------------------------------------------------------------------------------- 1 | # Write a function_score query that gives a score of 2 | # 3 | # - 30.x to premier 4 | # - 20.x to highlight 5 | # - 10.x to standard 6 | # 7 | # Where x is reflects how close the property price is to 800. For example, 8 | # 9 | # - A premier property with a price of 820 should get a score something like 30.99 10 | # (i.e. close to, but below 31) 11 | # - A highlight property with a price of 460 should get a score something like 20.83 12 | # (i.e. between 20 and 21) 13 | # 14 | 15 | # Two approaches: 16 | 17 | # 1. just add it as another function 18 | 19 | GET /listings/_search 20 | { 21 | "query": { 22 | "function_score": { 23 | "score_mode": "sum", 24 | "functions": [ 25 | { 26 | "filter": {"match": { "product": "premier" }}, 27 | "weight": 30 28 | }, 29 | { 30 | "filter": {"match": { "product": "highlight" }}, 31 | "weight": 20 32 | }, 33 | { 34 | "filter": {"match": { "product": "standard" }}, 35 | "weight": 10 36 | }, 37 | { 38 | "gauss": { 39 | "price": { 40 | "origin": 800, 41 | "scale": 1000, 42 | "decay": 0.5 43 | } 44 | } 45 | } 46 | ] 47 | } 48 | } 49 | } 50 | 51 | # 2: Use a nested query and add the scores: 52 | 53 | GET /listings/_search 54 | { 55 | "query": { 56 | "function_score": { 57 | "boost_mode": "sum", 58 | "query": { 59 | "function_score": { 60 | "query": { 61 | "match_all": {} 62 | }, 63 | "functions": [ 64 | { 65 | "gauss": { 66 | "price": { 67 | "origin": 800, 68 | "scale": 1000, 69 | "decay": 0.5 70 | } 71 | } 72 | } 73 | ] 74 | } 75 | }, 76 | "functions": [ 77 | { 78 | "filter": { 79 | "match": { 80 | "product": "premier" 81 | } 82 | }, 83 | "weight": 30 84 | }, 85 | { 86 | "filter": { 87 | "match": { 88 | "product": "highlight" 89 | } 90 | }, 91 | "weight": 20 92 | }, 93 | { 94 | "filter": { 95 | "match": { 96 | "product": "standard" 97 | } 98 | }, 99 | "weight": 10 100 | } 101 | ] 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /solutions/05-aggregations-solutions.txt: -------------------------------------------------------------------------------- 1 | 2 | # Cities ordered by popularity 3 | GET /listings/_search 4 | { 5 | "size" : 0, 6 | "aggs" : { 7 | "popular_cities" : { 8 | "terms" : { 9 | "field" : "city" 10 | } 11 | } 12 | } 13 | } 14 | 15 | 16 | # Max price per city 17 | GET /listings/_search 18 | { 19 | "size": 0, 20 | "aggs": { 21 | "popular_cities": { 22 | "terms": { 23 | "field": "city" 24 | }, 25 | "aggs": { 26 | "max_price": { 27 | "max": { 28 | "field": "price" 29 | } 30 | } 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /solutions/06-nested-objects-solutions.txt: -------------------------------------------------------------------------------- 1 | DELETE /agencies 2 | 3 | PUT /agencies 4 | { 5 | "mappings": { 6 | "properties": { 7 | "name": {"type": "text"}, 8 | "agents": { 9 | "type": "nested", 10 | "properties": { 11 | "firstName": {"type": "text"}, 12 | "lastName": {"type": "text"} 13 | } 14 | } 15 | } 16 | } 17 | } 18 | 19 | PUT /agencies/_doc/1 20 | { 21 | "name": "Nelson Alexander", 22 | "agents": [ 23 | {"firstName": "Mary", "lastName": "Smith"}, 24 | {"firstName": "Joe", "lastName": "Blogs"} 25 | ] 26 | } 27 | 28 | 29 | PUT /agencies/_doc/2 30 | { 31 | "name": "MacGrath", 32 | "agents": [ 33 | {"firstName": "Mary", "lastName": "Blogs"}, 34 | {"firstName": "Joe", "lastName": "Smith"} 35 | ] 36 | } 37 | 38 | GET /agencies/_search 39 | { 40 | "query": { 41 | "nested": { 42 | "path": "agents", 43 | "query": { 44 | "bool": { 45 | "filter": [ 46 | { 47 | "match": { 48 | "agents.firstName": "Mary" 49 | } 50 | }, 51 | { 52 | "match": { 53 | "agents.lastName": "Blogs" 54 | } 55 | } 56 | ] 57 | } 58 | } 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /solutions/07-parent-child-solutions.txt: -------------------------------------------------------------------------------- 1 | # Re-create the "agencies" index from 02a-nested-objects-exercises.md 2 | # but using a parent/child relationship. 3 | 4 | DELETE /agencies 5 | 6 | PUT /agencies 7 | { 8 | "mappings": { 9 | "properties": { 10 | "agencyName": {"type": "text"}, 11 | "firstName": {"type": "text"}, 12 | "lastName": {"type": "text"}, 13 | "join": { 14 | "type": "join", 15 | "relations": { 16 | "agency": "agent" 17 | } 18 | } 19 | } 20 | } 21 | } 22 | 23 | 24 | 25 | PUT /agencies/_doc/1?routing=1 26 | { 27 | "agencyName": "Nelson Alexander", 28 | "join": { 29 | "name": "agency" 30 | } 31 | } 32 | 33 | PUT /agencies/_doc/a?routing=1 34 | { 35 | "firstName": "Mary", 36 | "lastName": "Smith", 37 | "join": { 38 | "name": "agent", 39 | "parent": "1" 40 | } 41 | } 42 | 43 | PUT /agencies/_doc/b?routing=1 44 | { 45 | "firstName": "Joe", 46 | "lastName": "Blogs", 47 | "join": { 48 | "name": "agent", 49 | "parent": "1" 50 | } 51 | } 52 | 53 | PUT /agencies/_doc/2?routing=2 54 | { 55 | "agencyName": "MacGrath", 56 | "join": { 57 | "name": "agency" 58 | } 59 | } 60 | 61 | PUT /agencies/_doc/x?routing=2 62 | { 63 | "firstName": "Mary", 64 | "lastName": "Blogs", 65 | "join": { 66 | "name": "agent", 67 | "parent": "2" 68 | } 69 | } 70 | 71 | 72 | PUT /agencies/_doc/y?routing=2 73 | { 74 | "firstName": "Joe", 75 | "lastName": "Smith", 76 | "join": { 77 | "name": "agent", 78 | "parent": "2" 79 | } 80 | } 81 | 82 | # Find only those agencies that have an agent called "Mary Blogs". 83 | 84 | GET /agencies/_search 85 | { 86 | "query": { 87 | "has_child": { 88 | "type": "agent", 89 | "query": { 90 | "bool": { 91 | "filter": [ 92 | { 93 | "match": { 94 | "firstName": "Mary" 95 | } 96 | }, 97 | { 98 | "match": { 99 | "lastName": "Blogs" 100 | } 101 | } 102 | ] 103 | } 104 | } 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /solutions/08-collapse-solutions.txt: -------------------------------------------------------------------------------- 1 | Write a query to find those agents that have had 2 | more than 5 sales ordered by the number of sales descending. 3 | 4 | Use field collapsing to ensure that only one agent is returned per agency. 5 | 6 | 7 | GET /agent/_search 8 | { 9 | "query": { 10 | "range": { 11 | "sales": { 12 | "gt": 5 13 | } 14 | } 15 | }, 16 | "sort": [ 17 | { 18 | "sales": { 19 | "order": "desc" 20 | } 21 | } 22 | ], 23 | "collapse": { 24 | "field" : "agencyId" 25 | } 26 | } 27 | 28 | 29 | -------------------------------------------------------------------------------- /solutions/09-geo-point-solutions.txt: -------------------------------------------------------------------------------- 1 | # write a query to find child care centres within 1km of the Melbourne GPO 2 | # - Lat: -37.8138932 3 | # - Lon: 144.9611874 4 | 5 | GET /childcare-centres/_search 6 | { 7 | "query": { 8 | "geo_distance": { 9 | "distance": "1km", 10 | "geocode": { 11 | "lat": -37.8138932, 12 | "lon": 144.9611874 13 | } 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /solutions/10-geo-shape-solutions.txt: -------------------------------------------------------------------------------- 1 | 2 | Use a geo-shape query to find child care centres within the following bounding box: 3 | 4 | Top Left: 5 | - Lat: -37.81 6 | - Lon: 144.96 7 | Bottom Right: 8 | - Lat: -37.82 9 | - Lon: 144.97 10 | 11 | GET childcare-centres/_search 12 | { 13 | "query": { 14 | "geo_shape": { 15 | "geocode": { 16 | "shape": { 17 | "type": "polygon", 18 | "coordinates" : [[ 19 | [144.96, -37.81], 20 | [144.97, -37.81], 21 | [144.97, -37.82], 22 | [144.96, -37.82], 23 | [144.96, -37.81] 24 | ]] 25 | }, 26 | "relation": "within" 27 | } 28 | } 29 | } 30 | } 31 | 32 | 33 | Note: The mutate filter could have added the lon and lat values as separate steps! 34 | 35 | 36 | mutate { 37 | add_field => { "[geocode][type]" => "point" } 38 | add_field => { "[geocode][coordinates]" => ["%{[lon]}", "%{[lat]}"] } 39 | } 40 | 41 | # This converts all the array elements from strings to floats. 42 | mutate { 43 | convert => { "[geocode][coordinates]" => "float" } 44 | } 45 | -------------------------------------------------------------------------------- /solutions/11-term-suggesters-solutions.txt: -------------------------------------------------------------------------------- 1 | 2 | # Find matches & alternative spellings for terms in "Powlet Reserve Center" 3 | 4 | GET /childcare-centres/_search 5 | { 6 | "query": { 7 | "match": { 8 | "name": { 9 | "query": "Powlet Reserve Center", 10 | "operator": "and" 11 | } 12 | } 13 | }, 14 | "suggest": { 15 | "text": "Powlet Reserve Center", 16 | "name-suggestions": { 17 | "term": { 18 | "field": "name", 19 | "suggest_mode": "always", 20 | "max_term_freq": 0.99 21 | } 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /solutions/12-completion-suggesters-solutions.txt: -------------------------------------------------------------------------------- 1 | Replace ../childcare-centres/mapping-template.json, with: 2 | 3 | { 4 | "index_patterns": ["childcare-centres"], 5 | "mappings": { 6 | "properties": { 7 | "suggest": { 8 | "type": "completion", 9 | "preserve_separators": false, 10 | "analyzer": "standard" 11 | } 12 | } 13 | } 14 | } 15 | 16 | 17 | Replace the mutate filter in ../childcare-centres/logstash/logstash.config with 18 | 19 | mutate { 20 | add_field => { "[suggest]" => "%{[name]}" } 21 | } 22 | 23 | Re-run the feed-index.sh script. 24 | 25 | Try the following search: 26 | 27 | GET /childcare-centres/_search 28 | { 29 | "suggest": { 30 | "my-suggest": { 31 | "text": "Litt", 32 | "completion": { 33 | "field": "suggest" 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /solutions/13-percolator-solutions.txt: -------------------------------------------------------------------------------- 1 | DELETE /childcare-queries 2 | 3 | # Create a mapping that includes a percolator field. 4 | PUT /childcare-queries 5 | { 6 | "mappings": { 7 | "properties": { 8 | "saved_query": { 9 | "type": "percolator" 10 | }, 11 | "name": { 12 | "type": "text", 13 | "analyzer": "english" 14 | }, 15 | "contact_ph": { 16 | "type": "keyword" 17 | }, 18 | "geocode": { 19 | "type": "geo_point" 20 | } 21 | } 22 | } 23 | } 24 | 25 | # Childcare centres whose name includes the word "goat" 26 | PUT /childcare-queries/_doc/1 27 | { 28 | "saved_query": { 29 | "match": { 30 | "name": "goat" 31 | } 32 | } 33 | } 34 | 35 | # Childcare centres whose name includes the word "goodstart" 36 | PUT /childcare-queries/_doc/2 37 | { 38 | "saved_query": { 39 | "match": { 40 | "name": "goodstart" 41 | } 42 | } 43 | } 44 | 45 | # Childcare centres whose name includes the word "goodstart" 46 | # OR is within 40km of the location 38 South, 145 East 47 | PUT /childcare-queries/_doc/3 48 | { 49 | "saved_query": { 50 | "bool": { 51 | "should": [ 52 | { 53 | "match": { 54 | "name": "goodstart" 55 | } 56 | }, 57 | { 58 | "geo_distance": { 59 | "distance": "40km", 60 | "geocode": { 61 | "lat": -38, 62 | "lon": 145 63 | } 64 | } 65 | } 66 | ] 67 | } 68 | } 69 | } 70 | 71 | # Determine if this new document matches. 72 | GET /childcare-queries/_search 73 | { 74 | "query": { 75 | "percolate": { 76 | "field": "saved_query", 77 | "document": { 78 | "name": "Billy Goat's Child Care", 79 | "contact_ph": "03 9999 2222", 80 | "geocode": { 81 | "lat": -37.80932522, 82 | "lon": 144.96026152 83 | } 84 | } 85 | } 86 | } 87 | } 88 | 89 | --------------------------------------------------------------------------------