├── README.md ├── mapping.json └── populate.sh /README.md: -------------------------------------------------------------------------------- 1 | This repository contains the code examples from the book [*Elasticsearch in Action*][2]. 2 | 3 | When do I need them? 4 | -------------------- 5 | 6 | Because it's nice to have some sample data to run various searches from the 7 | book's listings. Throughout most chapters, there's an example use-case of 8 | running a get-together site, where you have people organizing themselves into 9 | groups and hosting events. Code samples provide you with some data and a script 10 | to index that data. 11 | 12 | How do I index the sample data? 13 | ------------------------------- 14 | 15 | # clone the repository 16 | git clone https://github.com/dakrone/elasticsearch-in-action.git 17 | 18 | # switch to a branch that matches your version. Master works with 1.x and 2.x 19 | # but we currently support 5.x, 6.x and 7.x as well: 20 | git clone https://github.com/dakrone/elasticsearch-in-action.git -b 7.x 21 | 22 | # index the sample data 23 | elasticsearch-in-action/populate.sh 24 | 25 | I have questions or suggestions for these samples 26 | ------------------------------------------------- 27 | 28 | You have feedback? We'd be glad to hear it from you on the book's forum. To get there, go to the [live book][1] and click on the discuss button on the top-right. You can also send pull requests or open issues on this repository. 29 | 30 | Happy reading and Elasticsearch-ing! 31 | 32 | [1]: https://livebook.manning.com/book/elasticsearch-in-action/ 33 | [2]: http://manning.com/hinman 34 | -------------------------------------------------------------------------------- /mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings" : { 3 | "number_of_shards" : 2, 4 | "number_of_replicas" : 1, 5 | "index": { 6 | "analysis": { 7 | "analyzer": { 8 | "myCustomAnalyzer": { 9 | "type": "custom", 10 | "tokenizer": "myCustomTokenizer", 11 | "filter": ["myCustomFilter1", "myCustomFilter2"], 12 | "char_filter": ["myCustomCharFilter"] 13 | } 14 | }, 15 | "tokenizer": { 16 | "myCustomTokenizer": { 17 | "type": "letter" 18 | }, 19 | "myCustomNGramTokenizer": { 20 | "type" : "ngram", 21 | "min_gram" : 2, 22 | "max_gram" : 3 23 | } 24 | 25 | }, 26 | "filter": { 27 | "myCustomFilter1": { 28 | "type": "lowercase" 29 | }, 30 | "myCustomFilter2": { 31 | "type": "kstem" 32 | } 33 | }, 34 | "char_filter": { 35 | "myCustomCharFilter": { 36 | "type": "mapping", 37 | "mappings": ["ph=>f", " u => you ", "ES=>Elasticsearch"] 38 | } 39 | } 40 | } 41 | } 42 | }, 43 | "mappings" : { 44 | "group" : { 45 | "_source" : { 46 | "enabled" : true 47 | }, 48 | "_all" : { 49 | "enabled" : true 50 | }, 51 | "properties" : { 52 | "organizer" : { "type" : "string" }, 53 | "name" : { "type" : "string" }, 54 | "description" : { 55 | "type" : "string", 56 | "term_vector": "with_positions_offsets" 57 | }, 58 | "created_on" : { 59 | "type" : "date", 60 | "format" : "yyyy-MM-dd" 61 | }, 62 | "tags" : { 63 | "type" : "string", 64 | "index" : "analyzed", 65 | "fields": { 66 | "verbatim" : { 67 | "type" : "string", 68 | "index" : "not_analyzed" 69 | } 70 | } 71 | }, 72 | "members" : { "type" : "string" }, 73 | "location_group" : { "type" : "string" } 74 | } 75 | }, 76 | "event" : { 77 | "_source" : { 78 | "enabled" : true 79 | }, 80 | "_all" : { 81 | "enabled" : false 82 | }, 83 | "_parent" : { 84 | "type" : "group" 85 | }, 86 | "properties" : { 87 | "host" : { "type" : "string" }, 88 | "title" : { "type" : "string" }, 89 | "description" : { 90 | "type" : "string", 91 | "term_vector": "with_positions_offsets" 92 | }, 93 | "attendees" : { "type" : "string" }, 94 | "date" : { 95 | "type" : "date", 96 | "format" : "date_hour_minute" 97 | }, 98 | "reviews" : { 99 | "type" : "integer", 100 | "null_value" : 0 101 | }, 102 | "location_event": { 103 | "type" : "object", 104 | "properties" : { 105 | "name" : { "type" : "string" }, 106 | "geolocation" : { "type" : "geo_point" } 107 | } 108 | } 109 | } 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /populate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ADDRESS=$1 4 | 5 | if [ -z $ADDRESS ]; then 6 | ADDRESS="localhost:9200" 7 | fi 8 | 9 | # Check that Elasticsearch is running 10 | curl -s "http://$ADDRESS" 2>&1 > /dev/null 11 | if [ $? != 0 ]; then 12 | echo "Unable to contact Elasticsearch at $ADDRESS" 13 | echo "Please ensure Elasticsearch is running and can be reached at http://$ADDRESS/" 14 | exit -1 15 | fi 16 | 17 | echo "WARNING, this script will delete the 'get-together' and the 'myindex' indices and re-index all data!" 18 | echo "Press Control-C to cancel this operation." 19 | echo 20 | echo "Press [Enter] to continue." 21 | read 22 | 23 | # Delete the old index, swallow failures if it doesn't exist 24 | curl -s -XDELETE "$ADDRESS/get-together" > /dev/null 25 | 26 | # Create the next index using mapping.json 27 | echo "Creating 'get-together' index..." 28 | curl -s -XPOST "$ADDRESS/get-together" -d@$(dirname $0)/mapping.json 29 | 30 | # Wait for index to become yellow 31 | curl -s "$ADDRESS/get-together/_health?wait_for_status=yellow&timeout=10s" > /dev/null 32 | echo 33 | echo "Done creating 'get-together' index." 34 | 35 | echo 36 | echo "Indexing data..." 37 | 38 | echo "Indexing groups..." 39 | curl -s -XPOST "$ADDRESS/get-together/group/1" -d'{ 40 | "name": "Denver Clojure", 41 | "organizer": ["Daniel", "Lee"], 42 | "description": "Group of Clojure enthusiasts from Denver who want to hack on code together and learn more about Clojure", 43 | "created_on": "2012-06-15", 44 | "tags": ["clojure", "denver", "functional programming", "jvm", "java"], 45 | "members": ["Lee", "Daniel", "Mike"], 46 | "location_group": "Denver, Colorado, USA" 47 | }' 48 | 49 | echo 50 | curl -s -XPOST "$ADDRESS/get-together/group/2" -d'{ 51 | "name": "Elasticsearch Denver", 52 | "organizer": "Lee", 53 | "description": "Get together to learn more about using Elasticsearch, the applications and neat things you can do with ES!", 54 | "created_on": "2013-03-15", 55 | "tags": ["denver", "elasticsearch", "big data", "lucene", "solr"], 56 | "members": ["Lee", "Mike"], 57 | "location_group": "Denver, Colorado, USA" 58 | }' 59 | 60 | echo 61 | curl -s -XPOST "$ADDRESS/get-together/group/3" -d'{ 62 | "name": "Elasticsearch San Francisco", 63 | "organizer": "Mik", 64 | "description": "Elasticsearch group for ES users of all knowledge levels", 65 | "created_on": "2012-08-07", 66 | "tags": ["elasticsearch", "big data", "lucene", "open source"], 67 | "members": ["Lee", "Igor"], 68 | "location_group": "San Francisco, California, USA" 69 | }' 70 | 71 | echo 72 | curl -s -XPOST "$ADDRESS/get-together/group/4" -d'{ 73 | "name": "Boulder/Denver big data get-together", 74 | "organizer": "Andy", 75 | "description": "Come learn and share your experience with nosql & big data technologies, no experience required", 76 | "created_on": "2010-04-02", 77 | "tags": ["big data", "data visualization", "open source", "cloud computing", "hadoop"], 78 | "members": ["Greg", "Bill"], 79 | "location_group": "Boulder, Colorado, USA" 80 | }' 81 | 82 | echo 83 | curl -s -XPOST "$ADDRESS/get-together/group/5" -d'{ 84 | "name": "Enterprise search London get-together", 85 | "organizer": "Tyler", 86 | "description": "Enterprise search get-togethers are an opportunity to get together with other people doing search.", 87 | "created_on": "2009-11-25", 88 | "tags": ["enterprise search", "apache lucene", "solr", "open source", "text analytics"], 89 | "members": ["Clint", "James"], 90 | "location_group": "London, England, UK" 91 | }' 92 | 93 | echo 94 | echo "Done indexing groups." 95 | 96 | echo "Indexing events..." 97 | 98 | curl -s -XPOST "$ADDRESS/get-together/event/100?parent=1" -d'{ 99 | "host": ["Lee", "Troy"], 100 | "title": "Liberator and Immutant", 101 | "description": "We will discuss two different frameworks in Clojure for doing different things. Liberator is a ring-compatible web framework based on Erlang Webmachine. Immutant is an all-in-one enterprise application based on JBoss.", 102 | "attendees": ["Lee", "Troy", "Daniel", "Tom"], 103 | "date": "2013-09-05T18:00", 104 | "location_event": { 105 | "name": "Stoneys Full Steam Tavern", 106 | "geolocation": "39.752337,-105.00083" 107 | }, 108 | "reviews": 4 109 | }' 110 | echo 111 | curl -s -XPOST "$ADDRESS/get-together/event/101?parent=1" -d'{ 112 | "host": "Sean", 113 | "title": "Sunday, Surly Sunday", 114 | "description": "Sort out any setup issues and work on Surlybird issues. We can use the EC2 node as a bounce point for pairing.", 115 | "attendees": ["Daniel", "Michael", "Sean"], 116 | "date": "2013-07-21T18:30", 117 | "location_event": { 118 | "name": "IRC, #denofclojure" 119 | }, 120 | "reviews": 2 121 | }' 122 | 123 | echo 124 | curl -s -XPOST "$ADDRESS/get-together/event/102?parent=1" -d'{ 125 | "host": "Daniel", 126 | "title": "10 Clojure coding techniques you should know, and project openbike", 127 | "description": "What are ten Clojure coding techniques that you wish everyone knew? We will also check on the status of Project Openbike.", 128 | "attendees": ["Lee", "Tyler", "Daniel", "Stuart", "Lance"], 129 | "date": "2013-07-11T18:00", 130 | "location_event": { 131 | "name": "Stoneys Full Steam Tavern", 132 | "geolocation": "39.752337,-105.00083" 133 | }, 134 | "reviews": 3 135 | }' 136 | 137 | echo 138 | curl -s -XPOST "$ADDRESS/get-together/event/103?parent=2" -d'{ 139 | "host": "Lee", 140 | "title": "Introduction to Elasticsearch", 141 | "description": "An introduction to ES and each other. We can meet and greet and I will present on some Elasticsearch basics and how we use it.", 142 | "attendees": ["Lee", "Martin", "Greg", "Mike"], 143 | "date": "2013-04-17T19:00", 144 | "location_event": { 145 | "name": "Stoneys Full Steam Tavern", 146 | "geolocation": "39.752337,-105.00083" 147 | }, 148 | "reviews": 5 149 | }' 150 | 151 | echo 152 | curl -s -XPOST "$ADDRESS/get-together/event/104?parent=2" -d'{ 153 | "host": "Lee", 154 | "title": "Queries and Filters", 155 | "description": "A get together to talk about different ways to query Elasticsearch, what works best for different kinds of applications.", 156 | "attendees": ["Lee", "Greg", "Richard"], 157 | "date": "2013-06-17T18:00", 158 | "location_event": { 159 | "name": "Stoneys Full Steam Tavern", 160 | "geolocation": "39.752337,-105.00083" 161 | }, 162 | "reviews": 1 163 | }' 164 | 165 | echo 166 | curl -s -XPOST "$ADDRESS/get-together/event/105?parent=2" -d'{ 167 | "host": "Lee", 168 | "title": "Elasticsearch and Logstash", 169 | "description": "We can get together and talk about Logstash - http://logstash.net with a sneak peek at Kibana", 170 | "attendees": ["Lee", "Greg", "Mike", "Delilah"], 171 | "date": "2013-07-17T18:30", 172 | "location_event": { 173 | "name": "Stoneys Full Steam Tavern", 174 | "geolocation": "39.752337,-105.00083" 175 | }, 176 | "reviews": null 177 | }' 178 | 179 | echo 180 | curl -s -XPOST "$ADDRESS/get-together/event/106?parent=3" -d'{ 181 | "host": "Mik", 182 | "title": "Social management and monitoring tools", 183 | "description": "Shay Banon will be there to answer questions and we can talk about management tools.", 184 | "attendees": ["Shay", "Mik", "John", "Chris"], 185 | "date": "2013-03-06T18:00", 186 | "location_event": { 187 | "name": "Quid Inc", 188 | "geolocation": "37.798442,-122.399801" 189 | }, 190 | "reviews": 5 191 | }' 192 | 193 | echo 194 | curl -s -XPOST "$ADDRESS/get-together/event/107?parent=3" -d'{ 195 | "host": "Mik", 196 | "title": "Logging and Elasticsearch", 197 | "description": "Get a deep dive for what Elasticsearch is and how it can be used for logging with Logstash as well as Kibana!", 198 | "attendees": ["Shay", "Rashid", "Erik", "Grant", "Mik"], 199 | "date": "2013-04-08T18:00", 200 | "location_event": { 201 | "name": "Salesforce headquarters", 202 | "geolocation": "37.793592,-122.397033" 203 | }, 204 | "reviews": 3 205 | }' 206 | 207 | echo 208 | curl -s -XPOST "$ADDRESS/get-together/event/108?parent=3" -d'{ 209 | "host": "Elyse", 210 | "title": "Piggyback on Elasticsearch training in San Francisco", 211 | "description": "We can piggyback on training by Elasticsearch to have some Q&A time with the ES devs", 212 | "attendees": ["Shay", "Igor", "Uri", "Elyse"], 213 | "date": "2013-05-23T19:00", 214 | "location_event": { 215 | "name": "NoSQL Roadshow", 216 | "geolocation": "37.787742,-122.398964" 217 | }, 218 | "reviews": 5 219 | }' 220 | 221 | echo 222 | curl -s -XPOST "$ADDRESS/get-together/event/109?parent=4" -d'{ 223 | "host": "Andy", 224 | "title": "Hortonworks, the future of Hadoop and big data", 225 | "description": "Presentation on the work that hortonworks is doing on Hadoop", 226 | "attendees": ["Andy", "Simon", "David", "Sam"], 227 | "date": "2013-06-19T18:00", 228 | "location_event": { 229 | "name": "SendGrid Denver office", 230 | "geolocation": "39.748477,-104.998852" 231 | }, 232 | "reviews": 2 233 | }' 234 | 235 | echo 236 | curl -s -XPOST "$ADDRESS/get-together/event/110?parent=4" -d'{ 237 | "host": "Andy", 238 | "title": "Big Data and the cloud at Microsoft", 239 | "description": "Discussion about the Microsoft Azure cloud and HDInsight.", 240 | "attendees": ["Andy", "Michael", "Ben", "David"], 241 | "date": "2013-07-31T18:00", 242 | "location_event": { 243 | "name": "Bing Boulder office", 244 | "geolocation": "40.018528,-105.275806" 245 | }, 246 | "reviews": 1 247 | }' 248 | 249 | echo 250 | curl -s -XPOST "$ADDRESS/get-together/event/111?parent=4" -d'{ 251 | "host": "Andy", 252 | "title": "Moving Hadoop to the mainstream", 253 | "description": "Come hear about how Hadoop is moving to the main stream", 254 | "attendees": ["Andy", "Matt", "Bill"], 255 | "date": "2013-07-21T18:00", 256 | "location_event": { 257 | "name": "Courtyard Boulder Louisville", 258 | "geolocation": "39.959409,-105.163497" 259 | }, 260 | "reviews": 4 261 | }' 262 | 263 | echo 264 | curl -s -XPOST "$ADDRESS/get-together/event/112?parent=5" -d'{ 265 | "host": "Dave Nolan", 266 | "title": "real-time Elasticsearch", 267 | "description": "We will discuss using Elasticsearch to index data in real time", 268 | "attendees": ["Dave", "Shay", "John", "Harry"], 269 | "date": "2013-02-18T18:30", 270 | "location_event": { 271 | "name": "SkillsMatter Exchange", 272 | "geolocation": "51.524806,-0.099095" 273 | }, 274 | "reviews": 3 275 | }' 276 | 277 | echo 278 | curl -s -XPOST "$ADDRESS/get-together/event/113?parent=5" -d'{ 279 | "host": "Dave", 280 | "title": "Elasticsearch at Rangespan and Exonar", 281 | "description": "Representatives from Rangespan and Exonar will come and discuss how they use Elasticsearch", 282 | "attendees": ["Dave", "Andrew", "David", "Clint"], 283 | "date": "2013-06-24T18:30", 284 | "location_event": { 285 | "name": "Alumni Theatre", 286 | "geolocation": "51.51558,-0.117699" 287 | }, 288 | "reviews": 3 289 | }' 290 | 291 | echo 292 | curl -s -XPOST "$ADDRESS/get-together/event/114?parent=5" -d'{ 293 | "host": "Yann", 294 | "title": "Using Hadoop with Elasticsearch", 295 | "description": "We will walk through using Hadoop with Elasticsearch for big data crunching!", 296 | "attendees": ["Yann", "Bill", "James"], 297 | "date": "2013-09-09T18:30", 298 | "location_event": { 299 | "name": "SkillsMatter Exchange", 300 | "geolocation": "51.524806,-0.099095" 301 | }, 302 | "reviews": 2 303 | }' 304 | 305 | echo 306 | echo "Done indexing events." 307 | 308 | # Refresh so data is available 309 | curl -s -XPOST "$ADDRESS/get-together/_refresh" 310 | 311 | echo 312 | echo "Done indexing data." 313 | echo 314 | 315 | echo 316 | echo "Creating Templates." 317 | curl -s -XPUT "http://$ADDRESS/_template/logging_index_all" -d'{ 318 | "template" : "logstash-09-*", 319 | "order" : 1, 320 | "settings" : { 321 | "number_of_shards" : 2, 322 | "number_of_replicas" : 1 323 | }, 324 | "mappings" : { 325 | "date" : { "store": false } 326 | }, 327 | "alias" : { "november" : {} } 328 | }' 329 | 330 | echo 331 | curl -s -XPUT "http://$ADDRESS/_template/logging_index" -d '{ 332 | "template" : "logstash-*", 333 | "order" : 0, 334 | "settings" : { 335 | "number_of_shards" : 2, 336 | "number_of_replicas" : 1 337 | }, 338 | "mappings" : { 339 | "date" : { "store": true } 340 | } 341 | }' 342 | echo 343 | echo "Done Creating Templates." 344 | 345 | 346 | echo 347 | echo "Adding Dynamic Mapping" 348 | curl -s -XDELETE "http://$ADDRESS/myindex" > /dev/null 349 | curl -s -XPUT "http://$ADDRESS/myindex" -d' 350 | { 351 | "mappings" : { 352 | "my_type" : { 353 | "dynamic_templates" : [{ 354 | "UUID" : { 355 | "match" : "*_guid", 356 | "match_mapping_type" : "string", 357 | "mapping" : { 358 | "type" : "string", 359 | "index" : "not_analyzed" 360 | } 361 | } 362 | }] 363 | } 364 | } 365 | }' 366 | echo 367 | echo "Done Adding Dynamic Mapping" 368 | 369 | echo 370 | echo "Adding Aliases" 371 | curl -s -XDELETE "http://$ADDRESS/november_2014_invoices" > /dev/null 372 | curl -s -XDELETE "http://$ADDRESS/december_2014_invoices" > /dev/null 373 | curl -s -XPOST "http://$ADDRESS/november_2014_invoices" -d'{}' 374 | echo 375 | curl -s -XPOST "http://$ADDRESS/december_2014_invoices" -d' 376 | { 377 | "mappings" : 378 | { 379 | "invoice" : 380 | { 381 | "properties" : 382 | { 383 | "revenue" : { "type" : "integer" } 384 | } 385 | } 386 | } 387 | }' 388 | 389 | echo 390 | 391 | curl -s -XPOST "http://$ADDRESS/_aliases" -d' 392 | { 393 | "actions" : [ 394 | { 395 | "add" : 396 | { 397 | "index" : "november_2014_invoices", 398 | "alias" : "2014_invoices" 399 | }, 400 | "add" : 401 | { 402 | "index" : "december_2014_invoices", 403 | "alias" : "2014_invoices" 404 | }, 405 | "remove" : 406 | { 407 | "index" : "myindex", 408 | "alias" : "december_2014_invoices" 409 | } 410 | } 411 | ] 412 | }' 413 | echo 414 | echo "Done Adding Aliases" 415 | 416 | echo "Adding Filter Alias" 417 | curl -s -XPOST "http://$ADDRESS/_aliases" -d ' 418 | { 419 | "actions" : [ 420 | { 421 | "add" : { 422 | "index" : "december_2014_invoices", 423 | "alias" : "bigmoney", 424 | "filter" : 425 | { 426 | "range" : 427 | { 428 | "revenue" : 429 | { 430 | "gt" : 1000 431 | } 432 | 433 | } 434 | } 435 | } 436 | } 437 | ] 438 | }' 439 | echo 440 | echo "Done Adding Filter Alias" 441 | 442 | echo 443 | echo "Adding Routing Alias" 444 | curl -s -XPOST "http://$ADDRESS/_aliases" -d ' 445 | { 446 | "actions" : [ 447 | { 448 | "add" : { 449 | "index" : "december_2014_invoices", 450 | "alias" : "2014_invoices", 451 | "search_routing" : "en,es", 452 | "index_routing" : "en" 453 | } 454 | } 455 | ] 456 | }' 457 | echo 458 | echo "Done Adding Routing Alias" 459 | 460 | echo 461 | --------------------------------------------------------------------------------