├── .github └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── 00_Getting_started.asciidoc ├── 010_Intro.asciidoc ├── 010_Intro ├── 05_What_is_it.asciidoc ├── 10_Installing_ES.asciidoc ├── 15_API.asciidoc ├── 20_Document.asciidoc ├── 25_Tutorial_Indexing.asciidoc ├── 30_Tutorial_Search.asciidoc ├── 35_Tutorial_Aggregations.asciidoc ├── 40_Tutorial_Conclusion.asciidoc ├── 45_Distributed.asciidoc └── 50_Conclusion.asciidoc ├── 01_Search_in_depth.asciidoc ├── 020_Distributed_Cluster.asciidoc ├── 020_Distributed_Cluster ├── 00_Intro.asciidoc ├── 05_Empty_cluster.asciidoc ├── 10_Cluster_health.asciidoc ├── 15_Add_an_index.asciidoc ├── 20_Add_failover.asciidoc ├── 25_Scale_horizontally.asciidoc ├── 30_Scale_more.asciidoc └── 35_Coping_with_failure.asciidoc ├── 02_Dealing_with_language.asciidoc ├── 030_Data ├── 00_Intro.asciidoc ├── 05_Document.asciidoc ├── 10_Index.asciidoc ├── 15_Get.asciidoc ├── 20_Exists.asciidoc ├── 25_Update.asciidoc ├── 30_Create.asciidoc ├── 35_Delete.asciidoc ├── 40_Version_control.asciidoc ├── 45_Partial_update.asciidoc ├── 50_Mget.asciidoc └── 55_Bulk.asciidoc ├── 030_Data_In_Data_Out.asciidoc ├── 03_Aggregations.asciidoc ├── 040_Distributed_CRUD.asciidoc ├── 040_Distributed_CRUD ├── 00_Intro.asciidoc ├── 05_Routing.asciidoc ├── 10_Shard_interaction.asciidoc ├── 15_Create_index_delete.asciidoc ├── 20_Retrieving.asciidoc ├── 25_Partial_updates.asciidoc ├── 30_Bulk_requests.asciidoc └── 35_Bulk_format.asciidoc ├── 04_Geolocation.asciidoc ├── 050_Search.asciidoc ├── 050_Search ├── 00_Intro.asciidoc ├── 05_Empty_search.asciidoc ├── 10_Multi_index_multi_type.asciidoc ├── 15_Pagination.asciidoc └── 20_Query_string.asciidoc ├── 052_Mapping_Analysis.asciidoc ├── 052_Mapping_Analysis ├── 25_Data_type_differences.asciidoc ├── 30_Exact_vs_full_text.asciidoc ├── 35_Inverted_index.asciidoc ├── 40_Analysis.asciidoc ├── 45_Mapping.asciidoc └── 50_Complex_datatypes.asciidoc ├── 054_Query_DSL.asciidoc ├── 054_Query_DSL ├── 55_Request_body_search.asciidoc ├── 60_Query_DSL.asciidoc ├── 65_Queries_vs_filters.asciidoc ├── 70_Important_clauses.asciidoc ├── 75_Combining_queries_together.asciidoc └── 80_Validating_queries.asciidoc ├── 056_Sorting.asciidoc ├── 056_Sorting ├── 85_Sorting.asciidoc ├── 88_String_sorting.asciidoc ├── 90_What_is_relevance.asciidoc └── 95_Docvalues.asciidoc ├── 060_Distributed_Search.asciidoc ├── 060_Distributed_Search ├── 00_Intro.asciidoc ├── 05_Query_phase.asciidoc ├── 10_Fetch_phase.asciidoc ├── 15_Search_options.asciidoc └── 20_Scroll.asciidoc ├── 06_Modeling_your_data.asciidoc ├── 070_Index_Mgmt.asciidoc ├── 070_Index_Mgmt ├── 05_Create_Delete.asciidoc ├── 10_Settings.asciidoc ├── 15_Configure_Analyzer.asciidoc ├── 20_Custom_Analyzers.asciidoc ├── 25_Mappings.asciidoc ├── 30_Root_Object.asciidoc ├── 31_Metadata_source.asciidoc ├── 32_Metadata_all.asciidoc ├── 33_Metadata_ID.asciidoc ├── 35_Dynamic_Mapping.asciidoc ├── 40_Custom_Dynamic_Mapping.asciidoc ├── 45_Default_Mapping.asciidoc ├── 50_Reindexing.asciidoc └── 55_Aliases.asciidoc ├── 075_Inside_a_shard.asciidoc ├── 075_Inside_a_shard ├── 10_Intro.asciidoc ├── 20_Making_text_searchable.asciidoc ├── 30_Dynamic_indices.asciidoc ├── 40_Near_real_time.asciidoc ├── 50_Persistent_changes.asciidoc └── 60_Segment_merging.asciidoc ├── 07_Admin.asciidoc ├── 080_Structured_Search.asciidoc ├── 080_Structured_Search ├── 00_structuredsearch.asciidoc ├── 05_term.asciidoc ├── 10_compoundfilters.asciidoc ├── 15_terms.asciidoc ├── 20_contains.asciidoc ├── 25_ranges.asciidoc ├── 30_existsmissing.asciidoc └── 40_bitsets.asciidoc ├── 100_Full_Text_Search.asciidoc ├── 100_Full_Text_Search ├── 00_Intro.asciidoc ├── 05_Match_query.asciidoc ├── 10_Multi_word_queries.asciidoc ├── 15_Combining_queries.asciidoc ├── 20_How_match_uses_bool.asciidoc ├── 25_Boosting_clauses.asciidoc ├── 30_Controlling_analysis.asciidoc └── 35_Relevance_is_broken.asciidoc ├── 110_Multi_Field_Search.asciidoc ├── 110_Multi_Field_Search ├── 00_Intro.asciidoc ├── 05_Multiple_query_strings.asciidoc ├── 10_Single_query_string.asciidoc ├── 15_Best_field.asciidoc ├── 20_Tuning_best_field_queries.asciidoc ├── 25_Multi_match_query.asciidoc ├── 30_Most_fields.asciidoc ├── 35_Entity_search.asciidoc ├── 40_Field_centric.asciidoc ├── 45_Custom_all.asciidoc ├── 50_Cross_field.asciidoc └── 55_Not_analyzed.asciidoc ├── 120_Proximity_Matching.asciidoc ├── 120_Proximity_Matching ├── 00_Intro.asciidoc ├── 05_Phrase_matching.asciidoc ├── 10_Slop.asciidoc ├── 15_Multi_value_fields.asciidoc ├── 20_Scoring.asciidoc ├── 25_Relevance.asciidoc ├── 30_Performance.asciidoc └── 35_Shingles.asciidoc ├── 130_Partial_Matching.asciidoc ├── 130_Partial_Matching ├── 00_Intro.asciidoc ├── 05_Postcodes.asciidoc ├── 10_Prefix_query.asciidoc ├── 15_WildcardRegexp.asciidoc ├── 20_Match_phrase_prefix.asciidoc ├── 25_Index_time.asciidoc ├── 30_Ngram_intro.asciidoc ├── 35_Search_as_you_type.asciidoc └── 40_Compound_words.asciidoc ├── 170_Relevance.asciidoc ├── 170_Relevance ├── 05_Intro.asciidoc ├── 10_Scoring_theory.asciidoc ├── 15_Practical_scoring.asciidoc ├── 20_Query_time_boosting.asciidoc ├── 25_Query_scoring.asciidoc ├── 30_Not_quite_not.asciidoc ├── 35_Ignoring_TFIDF.asciidoc ├── 40_Function_score_query.asciidoc ├── 45_Popularity.asciidoc ├── 50_Boosting_filtered_subsets.asciidoc ├── 55_Random_scoring.asciidoc ├── 60_Decay_functions.asciidoc ├── 65_Script_score.asciidoc ├── 70_Pluggable_similarities.asciidoc ├── 75_Changing_similarities.asciidoc └── 80_Conclusion.asciidoc ├── 200_Language_intro.asciidoc ├── 200_Language_intro ├── 00_Intro.asciidoc ├── 10_Using.asciidoc ├── 20_Configuring.asciidoc ├── 30_Language_pitfalls.asciidoc ├── 40_One_language_per_doc.asciidoc ├── 50_One_language_per_field.asciidoc └── 60_Mixed_language_fields.asciidoc ├── 210_Identifying_words.asciidoc ├── 210_Identifying_words ├── 00_Intro.asciidoc ├── 10_Standard_analyzer.asciidoc ├── 20_Standard_tokenizer.asciidoc ├── 30_ICU_plugin.asciidoc ├── 40_ICU_tokenizer.asciidoc └── 50_Tidying_text.asciidoc ├── 220_Token_normalization.asciidoc ├── 220_Token_normalization ├── 00_Intro.asciidoc ├── 10_Lowercasing.asciidoc ├── 20_Removing_diacritics.asciidoc ├── 30_Unicode_world.asciidoc ├── 40_Case_folding.asciidoc ├── 50_Character_folding.asciidoc └── 60_Sorting_and_collations.asciidoc ├── 230_Stemming.asciidoc ├── 230_Stemming ├── 00_Intro.asciidoc ├── 10_Algorithmic_stemmers.asciidoc ├── 20_Dictionary_stemmers.asciidoc ├── 30_Hunspell_stemmer.asciidoc ├── 40_Choosing_a_stemmer.asciidoc ├── 50_Controlling_stemming.asciidoc └── 60_Stemming_in_situ.asciidoc ├── 240_Stopwords.asciidoc ├── 240_Stopwords ├── 10_Intro.asciidoc ├── 20_Using_stopwords.asciidoc ├── 30_Stopwords_and_performance.asciidoc ├── 40_Divide_and_conquer.asciidoc ├── 50_Phrase_queries.asciidoc ├── 60_Common_grams.asciidoc └── 70_Relevance.asciidoc ├── 260_Synonyms.asciidoc ├── 260_Synonyms ├── 10_Intro.asciidoc ├── 20_Using_synonyms.asciidoc ├── 30_Synonym_formats.asciidoc ├── 40_Expand_contract.asciidoc ├── 50_Analysis_chain.asciidoc ├── 60_Multi_word_synonyms.asciidoc └── 70_Symbol_synonyms.asciidoc ├── 270_Fuzzy_matching.asciidoc ├── 270_Fuzzy_matching ├── 10_Intro.asciidoc ├── 20_Fuzziness.asciidoc ├── 30_Fuzzy_query.asciidoc ├── 40_Fuzzy_match_query.asciidoc ├── 50_Scoring_fuzziness.asciidoc └── 60_Phonetic_matching.asciidoc ├── 300_Aggregations ├── 100_circuit_breaker_fd_settings.asciidoc ├── 105_filtering.asciidoc ├── 115_eager.asciidoc ├── 120_breadth_vs_depth.asciidoc ├── 125_Conclusion.asciidoc ├── 20_basic_example.asciidoc ├── 21_add_metric.asciidoc ├── 22_nested_bucket.asciidoc ├── 23_extra_metrics.asciidoc ├── 30_histogram.asciidoc ├── 35_date_histogram.asciidoc ├── 40_scope.asciidoc ├── 45_filtering.asciidoc ├── 50_sorting_ordering.asciidoc ├── 55_approx_intro.asciidoc ├── 60_cardinality.asciidoc ├── 65_percentiles.asciidoc ├── 70_sigterms_intro.asciidoc ├── 75_sigterms.asciidoc ├── 90_docvalues.asciidoc ├── 93_technical_docvalues.asciidoc └── 95_analyzed_vs_not.asciidoc ├── 301_Aggregation_Overview.asciidoc ├── 302_Example_Walkthrough.asciidoc ├── 303_Making_Graphs.asciidoc ├── 304_Approximate_Aggregations.asciidoc ├── 305_Significant_Terms.asciidoc ├── 306_Practical_Considerations.asciidoc ├── 310_Geopoints.asciidoc ├── 310_Geopoints ├── 20_Geopoints.asciidoc ├── 30_Filter_by_geopoint.asciidoc ├── 32_Bounding_box.asciidoc ├── 34_Geo_distance.asciidoc └── 50_Sorting_by_distance.asciidoc ├── 320_Geohashes.asciidoc ├── 320_Geohashes ├── 40_Geohashes.asciidoc ├── 50_Geohash_mapping.asciidoc └── 60_Geohash_cell_filter.asciidoc ├── 330_Geo_aggs.asciidoc ├── 330_Geo_aggs ├── 60_Geo_aggs.asciidoc ├── 62_Geo_distance_agg.asciidoc ├── 64_Geohash_grid_agg.asciidoc └── 66_Geo_bounds_agg.asciidoc ├── 340_Geoshapes.asciidoc ├── 340_Geoshapes ├── 70_Geoshapes.asciidoc ├── 72_Mapping_geo_shapes.asciidoc ├── 74_Indexing_geo_shapes.asciidoc ├── 76_Querying_geo_shapes.asciidoc └── 78_Indexed_geo_shapes.asciidoc ├── 400_Relationships.asciidoc ├── 400_Relationships ├── 10_Intro.asciidoc ├── 15_Application_joins.asciidoc ├── 20_Denormalization.asciidoc ├── 22_Top_hits.asciidoc ├── 25_Concurrency.asciidoc └── 26_Concurrency_solutions.asciidoc ├── 402_Nested.asciidoc ├── 402_Nested ├── 30_Nested_objects.asciidoc ├── 31_Nested_mapping.asciidoc ├── 32_Nested_query.asciidoc ├── 33_Nested_sorting.asciidoc └── 35_Nested_aggs.asciidoc ├── 404_Parent_Child.asciidoc ├── 404_Parent_Child ├── 40_Parent_child.asciidoc ├── 45_Indexing_parent_child.asciidoc ├── 50_Has_child.asciidoc ├── 55_Has_parent.asciidoc ├── 60_Children_agg.asciidoc ├── 65_Grandparents.asciidoc └── 70_Practical_considerations.asciidoc ├── 410_Scaling.asciidoc ├── 410_Scaling ├── 10_Intro.asciidoc ├── 15_Shard.asciidoc ├── 20_Overallocation.asciidoc ├── 25_Kagillion_shards.asciidoc ├── 30_Capacity_planning.asciidoc ├── 35_Replica_shards.asciidoc ├── 40_Multiple_indices.asciidoc ├── 45_Index_per_timeframe.asciidoc ├── 50_Index_templates.asciidoc ├── 55_Retiring_data.asciidoc ├── 60_Index_per_user.asciidoc ├── 65_Shared_index.asciidoc ├── 70_Faking_it.asciidoc ├── 75_One_big_user.asciidoc └── 80_Scale_is_not_infinite.asciidoc ├── 500_Cluster_Admin.asciidoc ├── 500_Cluster_Admin ├── 10_intro.asciidoc ├── 15_marvel.asciidoc ├── 20_health.asciidoc ├── 30_node_stats.asciidoc └── 40_other_stats.asciidoc ├── 510_Deployment.asciidoc ├── 510_Deployment ├── 10_intro.asciidoc ├── 20_hardware.asciidoc ├── 30_other.asciidoc ├── 40_config.asciidoc ├── 45_dont_touch.asciidoc ├── 50_heap.asciidoc ├── 60_file_descriptors.asciidoc ├── 70_conclusion.asciidoc └── 80_cluster_settings.asciidoc ├── 520_Post_Deployment.asciidoc ├── 520_Post_Deployment ├── 10_dynamic_settings.asciidoc ├── 20_logging.asciidoc ├── 30_indexing_perf.asciidoc ├── 35_delayed_shard_allocation.asciidoc ├── 40_rolling_restart.asciidoc ├── 50_backup.asciidoc ├── 60_restore.asciidoc └── 70_conclusion.asciidoc ├── LICENSE.asciidoc ├── Preface.asciidoc ├── README.md ├── atlas.json ├── author_bio.html ├── book-docinfo.xml ├── book-extra-title-page.html ├── book.asciidoc ├── callouts ├── 1.pdf ├── 1.png ├── 10.pdf ├── 10.png ├── 11.pdf ├── 11.png ├── 2.pdf ├── 2.png ├── 3.pdf ├── 3.png ├── 4.pdf ├── 4.png ├── 5.pdf ├── 5.png ├── 6.pdf ├── 6.png ├── 7.pdf ├── 7.png ├── 8.pdf ├── 8.png ├── 9.pdf └── 9.png ├── colo.html ├── copyright.html ├── cover.html ├── foreword.asciidoc ├── images ├── 300_120_breadth_first_1.svg ├── 300_120_breadth_first_2.svg ├── 300_120_breadth_first_3.svg ├── 300_120_breadth_first_4.svg ├── 300_120_depth_first_1.svg ├── 300_120_depth_first_2.svg ├── 300_120_depth_first_3.svg ├── cover.png ├── elas_0201.png ├── elas_0202.png ├── elas_0203.png ├── elas_0204.png ├── elas_0205.png ├── elas_0206.png ├── elas_0301.png ├── elas_0401.png ├── elas_0402.png ├── elas_0403.png ├── elas_0404.png ├── elas_0405.png ├── elas_0406.png ├── elas_0901.png ├── elas_0902.png ├── elas_1101.png ├── elas_1102.png ├── elas_1103.png ├── elas_1104.png ├── elas_1105.png ├── elas_1106.png ├── elas_1107.png ├── elas_1108.png ├── elas_1109.png ├── elas_1110.png ├── elas_1111.png ├── elas_1701.png ├── elas_1702.png ├── elas_1703.png ├── elas_1704.png ├── elas_1705.png ├── elas_1706.png ├── elas_17in01.png ├── elas_17in02.png ├── elas_28in01.png ├── elas_28in02.png ├── elas_29in01.png ├── elas_29in02.png ├── elas_29in03.png ├── elas_33in01.png ├── elas_33in02.png ├── elas_4401.png ├── elas_4402.png ├── elas_4403.png └── elas_4404.png ├── ix.html ├── page_header.html ├── scripts ├── 300_Aggregations │ ├── README.md │ ├── generate.py │ ├── import.py │ └── mappings.json ├── search_docs.pl └── svg_to_png.pl ├── snippets ├── 010_Intro │ ├── 10_Info.json │ ├── 15_Count.json │ ├── 25_Index.json │ ├── 30_Get.json │ ├── 30_Query_DSL.json │ ├── 30_Simple_search.json │ └── 35_Aggregations.json ├── 020_Distributed_Cluster │ ├── 10_Cluster_health.json │ ├── 15_Add_index.json │ └── 30_Replicas.json ├── 030_Data │ ├── 10_Create_doc_123.json │ ├── 10_Create_doc_auto_ID.json │ ├── 15_Get_document.json │ ├── 25_Reindex_doc.json │ ├── 30_Create_doc.json │ ├── 35_Delete_doc.json │ ├── 40_Concurrency.json │ ├── 40_External_versions.json │ ├── 45_Partial_update.json │ ├── 45_Upsert.json │ ├── 50_Mget.json │ ├── 55_Bulk.json │ ├── 55_Bulk_defaults.json │ └── 55_Bulk_independent.json ├── 050_Search │ ├── 05_Empty_search.json │ ├── 15_Pagination.json │ ├── 20_All_field.json │ ├── 20_Query_string.json │ └── Test_data.json ├── 052_Mapping_Analysis │ ├── 25_Data_type_differences.json │ ├── 40_Analyze.json │ └── 45_Mapping.json ├── 054_Query_DSL │ ├── 60_Bool_query.json │ ├── 60_Empty_query.json │ ├── 60_Match_query.json │ ├── 70_Bool_filter.json │ ├── 70_Bool_query.json │ ├── 70_Exists_filter.json │ ├── 70_Match_all_query.json │ ├── 70_Match_query.json │ ├── 70_Multi_match_query.json │ ├── 70_Range_filter.json │ ├── 70_Term_filter.json │ ├── 70_Terms_filter.json │ ├── 75_Filtered_query.json │ ├── 80_Understanding_queries.json │ └── 80_Validate_query.json ├── 056_Sorting │ ├── 85_Multilevel_sort.json │ ├── 85_Sort_by_date.json │ ├── 88_Multifield.json │ ├── 90_Explain.json │ └── 90_Explain_API.json ├── 070_Index_Mgmt │ ├── 10_Settings.json │ ├── 15_Configure_Analyzer.json │ ├── 20_Custom_analyzer.json │ ├── 31_Source_field.json │ ├── 35_Dynamic_mapping.json │ ├── 40_Custom_dynamic_mapping.json │ ├── 45_Default_mapping.json │ └── 55_Aliases.json ├── 080_Structured_Search │ ├── 05_Term_number.json │ ├── 05_Term_text.json │ ├── 10_Bool_filter.json │ ├── 15_Terms_filter.json │ ├── 20_Exact.json │ ├── 25_Range_filter.json │ └── 30_Exists_missing.json ├── 100_Full_Text_Search │ ├── 05_Match_query.json │ ├── 15_Bool_query.json │ ├── 25_Boost.json │ └── 30_Analysis.json ├── 110_Multi_Field_Search │ ├── 05_Multiple_query_strings.json │ ├── 15_Best_fields.json │ ├── 25_Best_fields.json │ ├── 30_Most_fields.json │ ├── 40_Bad_frequencies.json │ ├── 40_Entity_search_problems.json │ ├── 45_Custom_all.json │ ├── 50_Cross_field.json │ └── 55_Not_analyzed.json ├── 120_Proximity_Matching │ ├── 05_Match_phrase_query.json │ ├── 05_Term_positions.json │ ├── 10_Slop.json │ ├── 15_Multi_value_fields.json │ ├── 20_Scoring.json │ ├── 25_Relevance.json │ ├── 30_Performance.json │ └── 35_Shingles.json ├── 130_Partial_Matching │ ├── 10_Prefix_query.json │ ├── 15_Wildcard_regexp.json │ ├── 20_Match_phrase_prefix.json │ ├── 35_Postcodes.json │ ├── 35_Search_as_you_type.json │ └── 40_Compound_words.json └── 300_Aggregations │ ├── 20_basic_example.json │ ├── 30_histogram.json │ ├── 35_date_histogram.json │ ├── 40_scope.json │ ├── 45_filtering.json │ ├── 50_sorting_ordering.json │ ├── 60_cardinality.json │ └── 75_sigterms.json ├── stash ├── 35_null_value.asciidoc ├── 45_termslookup.asciidoc ├── 55_revisittermslookup.asciidoc ├── 60_Reindex_Optimizations.asciidoc ├── Arbitrary preference for search.asciidoc ├── Execution_mode_of_terms.asciidoc ├── Terminology.asciidoc ├── normalizingexact.asciidoc ├── omit_norms.asciidoc ├── persegmentbitsets.asciidoc ├── rewrite.asciidoc └── stopwords.asciidoc ├── svg ├── 02-01_cluster.svg ├── 02-02_one_node.svg ├── 02-03_two_nodes.svg ├── 02-04_three_nodes.svg ├── 02-05_replicas.svg ├── 02-06_node_failure.svg ├── 03-01_concurrency.svg ├── 04-01_index.svg ├── 04-02_write.svg ├── 04-03_get.svg ├── 04-04_update.svg ├── 04-05_mget.svg ├── 04-06_bulk.svg ├── 06-01_query.svg ├── 06-02_fetch.svg ├── 075_30_index.svg ├── 075_30_post_commit.svg ├── 075_30_pre_commit.svg ├── 075_40_post_refresh.svg ├── 075_40_pre_refresh.svg ├── 075_50_post_flush.svg ├── 075_50_post_refresh.svg ├── 075_50_pre_flush.svg ├── 075_50_pre_refresh.svg ├── 075_60_merge.svg ├── 075_60_post_merge.svg ├── 170_01_query.svg ├── 170_02_docs.svg ├── 170_03_linear_popularity.svg ├── 170_04_log_popularity.svg ├── 170_05_log_factor.svg ├── 170_06_log_sum.svg ├── 170_07_decay.svg ├── 170_08_term_saturation.svg ├── 410_15_one_shard.svg ├── 410_20_two_shards.svg ├── 410_35_four_nodes.svg └── 410_35_three_nodes.svg ├── test └── test.json ├── theme ├── epub │ └── layout.html ├── mobi │ └── layout.html └── pdf │ ├── pdf.css │ └── pdf.xsl ├── titlepage.html ├── toc.html └── tools ├── intakereport.txt └── oneoffs └── oneoff.css /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | html_docs 2 | styles.css 3 | book.html 4 | 5 | .project 6 | .settings 7 | 8 | .DS_Store 9 | 10 | .idea -------------------------------------------------------------------------------- /010_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | include::010_Intro/05_What_is_it.asciidoc[] 2 | 3 | include::010_Intro/10_Installing_ES.asciidoc[] 4 | 5 | include::010_Intro/15_API.asciidoc[] 6 | 7 | include::010_Intro/20_Document.asciidoc[] 8 | 9 | include::010_Intro/25_Tutorial_Indexing.asciidoc[] 10 | 11 | include::010_Intro/30_Tutorial_Search.asciidoc[] 12 | 13 | include::010_Intro/35_Tutorial_Aggregations.asciidoc[] 14 | 15 | include::010_Intro/40_Tutorial_Conclusion.asciidoc[] 16 | 17 | include::010_Intro/45_Distributed.asciidoc[] 18 | 19 | include::010_Intro/50_Conclusion.asciidoc[] -------------------------------------------------------------------------------- /010_Intro/40_Tutorial_Conclusion.asciidoc: -------------------------------------------------------------------------------- 1 | === Tutorial Conclusion 2 | 3 | Hopefully, this little tutorial was a good demonstration about what is possible 4 | in Elasticsearch. It is really just scratching the surface, and many features--such as suggestions, geolocation, percolation, fuzzy and partial matching--were omitted to keep the tutorial short. But it did highlight just how 5 | easy it is to start building advanced search functionality. No configuration 6 | was needed--just add data and start searching! 7 | 8 | It's likely that the syntax left you confused in places, and you may have questions 9 | about how to tweak and tune various aspects. That's fine! The rest of the 10 | book dives into each of these issues in detail, giving you a solid 11 | understanding of how Elasticsearch works. 12 | -------------------------------------------------------------------------------- /010_Intro/50_Conclusion.asciidoc: -------------------------------------------------------------------------------- 1 | === Next Steps 2 | 3 | By now you should have a taste of what you can do with Elasticsearch, and how 4 | easy it is to get started. Elasticsearch tries hard to work out of the box 5 | with minimal knowledge and configuration. The best way to learn Elasticsearch 6 | is by jumping in: just start indexing and searching! 7 | 8 | However, the more you know about Elasticsearch, the more productive you can 9 | become. The more you can tell Elasticsearch about the domain-specific 10 | elements of your application, the more you can fine-tune the output. 11 | 12 | The rest of this book will help you move from novice to expert. Each chapter explains the essentials, but also includes expert-level tips. If 13 | you're just getting started, these tips are probably not immediately relevant 14 | to you; Elasticsearch has sensible defaults and will generally do the right 15 | thing without any interference. You can always revisit these chapters later, 16 | when you are looking to improve performance by shaving off any wasted 17 | milliseconds. 18 | -------------------------------------------------------------------------------- /01_Search_in_depth.asciidoc: -------------------------------------------------------------------------------- 1 | ifndef::es_build[= placeholder1] 2 | 3 | [[search-in-depth]] 4 | = Search in Depth 5 | 6 | [partintro] 7 | -- 8 | 9 | In <> we covered the basic tools in just enough detail to 10 | allow you to start searching your data with Elasticsearch. ((("searching", "using Elasticsearch"))) It won't take 11 | long, though, before you find that you want more: more flexibility when matching 12 | user queries, more-accurate ranking of results, more-specific searches to 13 | cover different problem domains. 14 | 15 | To move to the next level, it is not enough to just use the `match` query. You 16 | need to understand your data and how you want to be able to search it. The 17 | chapters in this part explain how to index and query your data to allow 18 | you to take advantage of word proximity, partial matching, fuzzy matching, and 19 | language awareness. 20 | 21 | Understanding how each query contributes to the relevance `_score` will help 22 | you to tune your queries: to ensure that the documents you consider to be the 23 | best results appear on the first page, and to trim the ``long tail'' of barely 24 | relevant results. 25 | 26 | Search is not just about full-text search: a large portion of your data will 27 | be structured values like dates and numbers. We will start by explaining how 28 | to combine structured search((("structured search", "combining with full text search")))((("full text search", "combining with structured search"))) with full-text search in the most efficient way. 29 | 30 | -- 31 | 32 | include::080_Structured_Search.asciidoc[] 33 | 34 | include::100_Full_Text_Search.asciidoc[] 35 | 36 | include::110_Multi_Field_Search.asciidoc[] 37 | 38 | include::120_Proximity_Matching.asciidoc[] 39 | 40 | include::130_Partial_Matching.asciidoc[] 41 | 42 | include::170_Relevance.asciidoc[] 43 | 44 | 45 | -------------------------------------------------------------------------------- /020_Distributed_Cluster.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | include::020_Distributed_Cluster/00_Intro.asciidoc[] 3 | 4 | include::020_Distributed_Cluster/05_Empty_cluster.asciidoc[] 5 | 6 | include::020_Distributed_Cluster/10_Cluster_health.asciidoc[] 7 | 8 | include::020_Distributed_Cluster/15_Add_an_index.asciidoc[] 9 | 10 | include::020_Distributed_Cluster/20_Add_failover.asciidoc[] 11 | 12 | include::020_Distributed_Cluster/25_Scale_horizontally.asciidoc[] 13 | 14 | include::020_Distributed_Cluster/30_Scale_more.asciidoc[] 15 | 16 | include::020_Distributed_Cluster/35_Coping_with_failure.asciidoc[] 17 | 18 | -------------------------------------------------------------------------------- /020_Distributed_Cluster/05_Empty_cluster.asciidoc: -------------------------------------------------------------------------------- 1 | === An Empty Cluster 2 | 3 | If we start a single node, with no data and no ((("empty cluster")))((("clusters", "empty")))indices, our cluster looks like 4 | <>. 5 | 6 | [[img-cluster]] 7 | .A cluster with one empty node 8 | image::images/elas_0201.png["A cluster with one empty node"] 9 | 10 | A _node_ is a running instance of ((("nodes", "in clusters")))Elasticsearch, while a _cluster_ consists of 11 | one or more nodes with the same `cluster.name` that are working together to 12 | share their data and workload. As nodes are added to or removed from the 13 | cluster, the cluster reorganizes itself to spread the data evenly. 14 | 15 | One node in the cluster is elected to be the _master_ node, which((("master node"))) is in charge 16 | of managing cluster-wide changes like creating or deleting an index, or adding 17 | or removing a node from the cluster. The master node does not need to be 18 | involved in document-level changes or searches, which means that having just 19 | one master node will not become a bottleneck as traffic grows. Any node can 20 | become the master. Our example cluster has only one node, so it performs the 21 | master role. 22 | 23 | As users, we can talk to _any node in the cluster_, including the master node. 24 | Every node knows where each document lives and can forward our request 25 | directly to the nodes that hold the data we are interested in. Whichever node 26 | we talk to manages the process of gathering the response from the node or 27 | nodes holding the data and returning the final response to the client. It is 28 | all managed transparently by Elasticsearch. 29 | 30 | -------------------------------------------------------------------------------- /020_Distributed_Cluster/10_Cluster_health.asciidoc: -------------------------------------------------------------------------------- 1 | [[cluster-health]] 2 | === Cluster Health 3 | 4 | Many statistics can be monitored in an Elasticsearch cluster, 5 | but the single most important((("cluster health"))) one is _cluster health_, which reports a 6 | `status` of either `green`, `yellow`, or `red`: 7 | 8 | [source,js] 9 | -------------------------------------------------- 10 | GET /_cluster/health 11 | -------------------------------------------------- 12 | // SENSE: 020_Distributed_Cluster/10_Cluster_health.json 13 | 14 | On an empty cluster with no indices, this will return something like the following: 15 | 16 | [source,js] 17 | -------------------------------------------------- 18 | { 19 | "cluster_name": "elasticsearch", 20 | "status": "green", <1> 21 | "timed_out": false, 22 | "number_of_nodes": 1, 23 | "number_of_data_nodes": 1, 24 | "active_primary_shards": 0, 25 | "active_shards": 0, 26 | "relocating_shards": 0, 27 | "initializing_shards": 0, 28 | "unassigned_shards": 0 29 | } 30 | -------------------------------------------------- 31 | <1> The `status` field is the one we're most interested in. 32 | 33 | The `status` field provides ((("status field")))an overall indication of how the cluster is 34 | functioning. The meanings of the three colors are provided here for reference: 35 | 36 | `green`:: 37 | All primary and replica shards are active. 38 | 39 | `yellow`:: 40 | All primary shards are active, but not all replica shards are active. 41 | 42 | `red`:: 43 | Not all primary shards are active. 44 | 45 | In the rest of this chapter, we explain what _primary_ and _replica_ shards are 46 | and explain the practical implications of each of the preceding colors. 47 | -------------------------------------------------------------------------------- /020_Distributed_Cluster/25_Scale_horizontally.asciidoc: -------------------------------------------------------------------------------- 1 | === Scale Horizontally 2 | 3 | What about scaling as the demand for our application grows?((("scaling", "horizontally")))((("clusters", "three-node cluster")))((("primary shards", "in three-node cluster"))) If we start a 4 | third node, our cluster reorganizes itself to look like 5 | <>. 6 | 7 | [[cluster-three-nodes]] 8 | .A three-node cluster--shards have been reallocated to spread the load 9 | image::images/elas_0204.png["A three-node cluster"] 10 | 11 | One shard each from `Node 1` and `Node 2` have moved to the new 12 | `Node 3`, and we have two shards per node, instead of three. 13 | This means that the hardware resources (CPU, RAM, I/O) of each node 14 | are being shared among fewer shards, allowing each shard to perform 15 | better. 16 | 17 | A shard is a fully fledged search engine in its own right, and is 18 | capable of using all of the resources of a single node. With our 19 | total of six shards (three primaries and three replicas), our index is capable 20 | of scaling out to a maximum of six nodes, with one shard on each node 21 | and each shard having access to 100% of its node's resources. 22 | 23 | -------------------------------------------------------------------------------- /030_Data/20_Exists.asciidoc: -------------------------------------------------------------------------------- 1 | [[doc-exists]] 2 | === Checking Whether a Document Exists 3 | 4 | If all you want to do is to check whether a ((("documents", "checking whether a document exists")))document exists--you're not 5 | interested in the content at all--then use((("HEAD method")))((("HTTP methods", "HEAD"))) the `HEAD` method instead 6 | of the `GET` method. `HEAD` requests don't return a body, just HTTP headers: 7 | 8 | [source,js] 9 | -------------------------------------------------- 10 | curl -i -XHEAD http://localhost:9200/website/blog/123 11 | -------------------------------------------------- 12 | 13 | Elasticsearch will return a `200 OK` status code if the document exists: 14 | 15 | [source,js] 16 | -------------------------------------------------- 17 | HTTP/1.1 200 OK 18 | Content-Type: text/plain; charset=UTF-8 19 | Content-Length: 0 20 | -------------------------------------------------- 21 | 22 | And a `404 Not Found` if it doesn't exist: 23 | 24 | [source,js] 25 | -------------------------------------------------- 26 | curl -i -XHEAD http://localhost:9200/website/blog/124 27 | -------------------------------------------------- 28 | 29 | [source,js] 30 | -------------------------------------------------- 31 | HTTP/1.1 404 Not Found 32 | Content-Type: text/plain; charset=UTF-8 33 | Content-Length: 0 34 | -------------------------------------------------- 35 | 36 | Of course, just because a document didn't exist when you checked it, doesn't 37 | mean that it won't exist a millisecond later: another process might create the 38 | document in the meantime. 39 | -------------------------------------------------------------------------------- /030_Data_In_Data_Out.asciidoc: -------------------------------------------------------------------------------- 1 | include::030_Data/00_Intro.asciidoc[] 2 | 3 | include::030_Data/05_Document.asciidoc[] 4 | 5 | include::030_Data/10_Index.asciidoc[] 6 | 7 | include::030_Data/15_Get.asciidoc[] 8 | 9 | include::030_Data/20_Exists.asciidoc[] 10 | 11 | include::030_Data/25_Update.asciidoc[] 12 | 13 | include::030_Data/30_Create.asciidoc[] 14 | 15 | include::030_Data/35_Delete.asciidoc[] 16 | 17 | include::030_Data/40_Version_control.asciidoc[] 18 | 19 | include::030_Data/45_Partial_update.asciidoc[] 20 | 21 | include::030_Data/50_Mget.asciidoc[] 22 | 23 | include::030_Data/55_Bulk.asciidoc[] 24 | 25 | -------------------------------------------------------------------------------- /040_Distributed_CRUD.asciidoc: -------------------------------------------------------------------------------- 1 | include::040_Distributed_CRUD/00_Intro.asciidoc[] 2 | 3 | include::040_Distributed_CRUD/05_Routing.asciidoc[] 4 | 5 | include::040_Distributed_CRUD/10_Shard_interaction.asciidoc[] 6 | 7 | include::040_Distributed_CRUD/15_Create_index_delete.asciidoc[] 8 | 9 | include::040_Distributed_CRUD/20_Retrieving.asciidoc[] 10 | 11 | include::040_Distributed_CRUD/25_Partial_updates.asciidoc[] 12 | 13 | include::040_Distributed_CRUD/30_Bulk_requests.asciidoc[] 14 | 15 | include::040_Distributed_CRUD/35_Bulk_format.asciidoc[] 16 | 17 | -------------------------------------------------------------------------------- /040_Distributed_CRUD/00_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[distributed-docs]] 2 | == Distributed Document Store 3 | 4 | In the preceding chapter, we looked at all the ways to put data into your index and 5 | then retrieve it. But we glossed over many technical details surrounding how 6 | the data is distributed and fetched from the cluster. This separation is done 7 | on purpose; you don't really need to know how data is distributed to work 8 | with Elasticsearch. It just works. 9 | 10 | In this chapter, we dive into those internal, technical details 11 | to help you understand how your data is stored in a distributed system. 12 | 13 | .Content Warning 14 | **** 15 | 16 | The information presented in this chapter is for your interest. You are not required to 17 | understand and remember all the detail in order to use Elasticsearch. The 18 | options that are discussed are for advanced users only. 19 | 20 | Read the section to gain a taste for how things work, and to know where the 21 | information is in case you need to refer to it in the future, but don't be 22 | overwhelmed by the details. 23 | 24 | **** 25 | 26 | -------------------------------------------------------------------------------- /040_Distributed_CRUD/10_Shard_interaction.asciidoc: -------------------------------------------------------------------------------- 1 | === How Primary and Replica Shards Interact 2 | 3 | For explanation purposes, let's((("shards", "interaction of primary and replica shards")))((("primary shards", "interaction with replica shards")))((("replica shards", "interaction with primary shards"))) imagine that we have a cluster 4 | consisting of three nodes. It contains one index called `blogs` that has 5 | two primary shards. Each primary shard has two replicas. Copies of 6 | the same shard are never allocated to the same node, so our cluster 7 | looks something like <>. 8 | 9 | [[img-distrib]] 10 | .A cluster with three nodes and one index 11 | image::images/elas_0401.png["A cluster with three nodes and one index"] 12 | 13 | We can send our requests to any node in the cluster.((("nodes", "sending requests to"))) Every node is fully 14 | capable of serving any request. Every node knows the location of every 15 | document in the cluster and so can forward requests directly to the required 16 | node. In the following examples, we will send all of our requests to `Node 1`, 17 | which we will refer to as the _coordinating node_. 18 | 19 | TIP: When sending requests, it is good practice to round-robin through all the 20 | nodes in the cluster, in order to spread the load. 21 | -------------------------------------------------------------------------------- /040_Distributed_CRUD/20_Retrieving.asciidoc: -------------------------------------------------------------------------------- 1 | [[distrib-read]] 2 | === Retrieving a Document 3 | 4 | A document can be retrieved from a ((("documents", "retrieving")))primary shard or from any of its replicas, as shown in <>. 5 | 6 | [[img-distrib-read]] 7 | .Retrieving a single document 8 | image::images/elas_0403.png["Retrieving a single document"] 9 | 10 | Here is the sequence of steps to retrieve a document from either a 11 | primary or replica shard: 12 | 13 | 1. The client sends a get request to `Node 1`. 14 | 15 | 2. The node uses the document's `_id` to determine that the document 16 | belongs to shard `0`. Copies of shard `0` exist on all three nodes. 17 | On this occasion, it forwards the request to `Node 2`. 18 | 19 | 3. `Node 2` returns the document to `Node 1`, which returns the document 20 | to the client. 21 | 22 | For read requests, the coordinating node will choose a different shard copy on 23 | every request in order to balance the load; it round-robins through all 24 | shard copies. 25 | 26 | It is possible that, while a document is being indexed, the document will 27 | already be present on the primary shard but not yet copied to the replica 28 | shards. In this case, a replica might report that the document doesn't exist, 29 | while the primary would have returned the document successfully. Once the 30 | indexing request has returned success to the user, the document will be 31 | available on the primary and all replica shards. 32 | -------------------------------------------------------------------------------- /050_Search.asciidoc: -------------------------------------------------------------------------------- 1 | include::050_Search/00_Intro.asciidoc[] 2 | 3 | include::050_Search/05_Empty_search.asciidoc[] 4 | 5 | include::050_Search/10_Multi_index_multi_type.asciidoc[] 6 | 7 | include::050_Search/15_Pagination.asciidoc[] 8 | 9 | include::050_Search/20_Query_string.asciidoc[] 10 | -------------------------------------------------------------------------------- /052_Mapping_Analysis.asciidoc: -------------------------------------------------------------------------------- 1 | include::052_Mapping_Analysis/25_Data_type_differences.asciidoc[] 2 | 3 | include::052_Mapping_Analysis/30_Exact_vs_full_text.asciidoc[] 4 | 5 | include::052_Mapping_Analysis/35_Inverted_index.asciidoc[] 6 | 7 | include::052_Mapping_Analysis/40_Analysis.asciidoc[] 8 | 9 | include::052_Mapping_Analysis/45_Mapping.asciidoc[] 10 | 11 | include::052_Mapping_Analysis/50_Complex_datatypes.asciidoc[] 12 | 13 | -------------------------------------------------------------------------------- /054_Query_DSL.asciidoc: -------------------------------------------------------------------------------- 1 | include::054_Query_DSL/55_Request_body_search.asciidoc[] 2 | 3 | include::054_Query_DSL/60_Query_DSL.asciidoc[] 4 | 5 | include::054_Query_DSL/65_Queries_vs_filters.asciidoc[] 6 | 7 | include::054_Query_DSL/70_Important_clauses.asciidoc[] 8 | 9 | include::054_Query_DSL/75_Combining_queries_together.asciidoc[] 10 | 11 | include::054_Query_DSL/80_Validating_queries.asciidoc[] 12 | -------------------------------------------------------------------------------- /056_Sorting.asciidoc: -------------------------------------------------------------------------------- 1 | include::056_Sorting/85_Sorting.asciidoc[] 2 | 3 | include::056_Sorting/88_String_sorting.asciidoc[] 4 | 5 | include::056_Sorting/90_What_is_relevance.asciidoc[] 6 | 7 | include::056_Sorting/95_Docvalues.asciidoc[] 8 | -------------------------------------------------------------------------------- /060_Distributed_Search.asciidoc: -------------------------------------------------------------------------------- 1 | include::060_Distributed_Search/00_Intro.asciidoc[] 2 | 3 | include::060_Distributed_Search/05_Query_phase.asciidoc[] 4 | 5 | include::060_Distributed_Search/10_Fetch_phase.asciidoc[] 6 | 7 | include::060_Distributed_Search/15_Search_options.asciidoc[] 8 | 9 | include::060_Distributed_Search/20_Scroll.asciidoc[] 10 | 11 | -------------------------------------------------------------------------------- /060_Distributed_Search/00_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[distributed-search]] 2 | == Distributed Search Execution 3 | 4 | Before moving on, we are going to take a detour and talk about how search is 5 | executed in a distributed environment.((("distributed search execution"))) It is a bit more complicated than the 6 | basic _create-read-update-delete_ (CRUD) requests((("CRUD (create-read-update-delete) operations"))) that we discussed in 7 | <>. 8 | 9 | .Content Warning 10 | **** 11 | 12 | The information presented in this chapter is for your interest. You are not required to 13 | understand and remember all the detail in order to use Elasticsearch. 14 | 15 | Read this chapter to gain a taste for how things work, and to know where the 16 | information is in case you need to refer to it in the future, but don't be 17 | overwhelmed by the detail. 18 | 19 | **** 20 | 21 | A CRUD operation deals with a single document that has a unique combination of 22 | `_index`, `_type`, and <> (which defaults to the 23 | document's `_id`). This means that we know exactly which shard in the cluster 24 | holds that document. 25 | 26 | Search requires a more complicated execution model because we don't know which 27 | documents will match the query: they could be on any shard in the cluster. A 28 | search request has to consult a copy of every shard in the index or indices 29 | we're interested in to see if they have any matching documents. 30 | 31 | But finding all matching documents is only half the story. Results from 32 | multiple shards must be combined into a single sorted list before the `search` 33 | API can return a ``page'' of results. For this reason, search is executed in a 34 | two-phase process called _query then fetch_. 35 | -------------------------------------------------------------------------------- /06_Modeling_your_data.asciidoc: -------------------------------------------------------------------------------- 1 | ifndef::es_build[= placeholder6] 2 | 3 | [[modeling-your-data]] 4 | 5 | = Modeling Your Data 6 | 7 | [partintro] 8 | -- 9 | 10 | Elasticsearch is a different kind of beast, especially if you come from the 11 | world of SQL.((("modeling your data"))) It comes with many benefits: performance, scale, near real-time 12 | search, and analytics across massive amounts of data. And it is easy to get 13 | going! Just download and start using it. 14 | 15 | But it is not magic. To get the most out of Elasticsearch, you need to 16 | understand how it works and how to make it work for your needs. 17 | 18 | Handling relationships between entities is not as obvious as it is with a 19 | dedicated relational store. The golden rule of a relational database--normalize your data--does not apply to Elasticsearch. In <>, 20 | <>, and <> we discuss the pros and cons of 21 | the available approaches. 22 | 23 | Then in <> we talk about the features that Elasticsearch offers 24 | that enable you to scale out quickly and flexibly. Scale is not one-size-fits-all. You need to think about how data flows through your system, and 25 | design your model accordingly. Time-based data like log events or social 26 | network streams require a very different approach than more static collections 27 | of documents. 28 | 29 | And finally, we talk about the one thing in Elasticsearch that doesn't scale. 30 | 31 | -- 32 | 33 | include::400_Relationships.asciidoc[] 34 | 35 | include::402_Nested.asciidoc[] 36 | 37 | include::404_Parent_Child.asciidoc[] 38 | 39 | include::410_Scaling.asciidoc[] 40 | 41 | 42 | -------------------------------------------------------------------------------- /070_Index_Mgmt.asciidoc: -------------------------------------------------------------------------------- 1 | [[index-management]] 2 | == Index Management 3 | 4 | We have seen how Elasticsearch makes it easy to start developing a new 5 | application without requiring any advance planning or setup. However, it 6 | doesn't take long before you start wanting to fine-tune the indexing and 7 | search process to better suit your particular use case. Almost all of these customizations relate to the index, and the types 8 | that it contains. In this chapter, we introduce the APIs 9 | for managing indices and type mappings, and the most important settings. 10 | 11 | include::070_Index_Mgmt/05_Create_Delete.asciidoc[] 12 | 13 | include::070_Index_Mgmt/10_Settings.asciidoc[] 14 | 15 | include::070_Index_Mgmt/15_Configure_Analyzer.asciidoc[] 16 | 17 | include::070_Index_Mgmt/20_Custom_Analyzers.asciidoc[] 18 | 19 | include::070_Index_Mgmt/25_Mappings.asciidoc[] 20 | 21 | include::070_Index_Mgmt/30_Root_Object.asciidoc[] 22 | 23 | include::070_Index_Mgmt/35_Dynamic_Mapping.asciidoc[] 24 | 25 | include::070_Index_Mgmt/40_Custom_Dynamic_Mapping.asciidoc[] 26 | 27 | include::070_Index_Mgmt/45_Default_Mapping.asciidoc[] 28 | 29 | include::070_Index_Mgmt/50_Reindexing.asciidoc[] 30 | 31 | include::070_Index_Mgmt/55_Aliases.asciidoc[] 32 | -------------------------------------------------------------------------------- /070_Index_Mgmt/30_Root_Object.asciidoc: -------------------------------------------------------------------------------- 1 | [[root-object]] 2 | === The Root Object 3 | 4 | The uppermost level of a mapping is known ((("mapping (types)", "root object")))((("root object")))as the _root object_. It may 5 | contain the following: 6 | 7 | * A _properties_ section, which lists the mapping for each field that a 8 | document may contain 9 | 10 | * Various metadata fields, all of which start with an underscore, such 11 | as `_type`, `_id`, and `_source` 12 | 13 | * Settings, which control how the dynamic detection of new fields 14 | is handled, such as `analyzer`, `dynamic_date_formats`, and 15 | `dynamic_templates` 16 | 17 | * Other settings, which can be applied both to the root object and to fields 18 | of type `object`, such as `enabled`, `dynamic`, and `include_in_all` 19 | 20 | ==== Properties 21 | 22 | We have already discussed the three most important settings for document 23 | fields or ((("root object", "properties")))((("properties", "important settings")))properties in <> and <>: 24 | 25 | `type`:: 26 | The datatype that the field contains, such as `string` or `date` 27 | 28 | `index`:: 29 | Whether a field should be searchable as full text (`analyzed`), searchable as an exact value (`not_analyzed`), or not searchable at all (`no`) 30 | 31 | `analyzer`:: 32 | Which `analyzer` to use for a full-text field, both at index time and at search time 33 | 34 | We will discuss other field types such as `ip`, `geo_point`, and `geo_shape` in 35 | the appropriate sections later in the book. 36 | 37 | include::31_Metadata_source.asciidoc[] 38 | 39 | include::32_Metadata_all.asciidoc[] 40 | 41 | include::33_Metadata_ID.asciidoc[] 42 | -------------------------------------------------------------------------------- /070_Index_Mgmt/33_Metadata_ID.asciidoc: -------------------------------------------------------------------------------- 1 | ==== Metadata: Document Identity 2 | 3 | There are four metadata fields ((("metadata, document", "identity")))associated with document identity: 4 | 5 | `_id`:: 6 | The string ID of the document 7 | 8 | `_type`:: 9 | The type name of the document 10 | 11 | `_index`:: 12 | The index where the document lives 13 | 14 | `_uid`:: 15 | The `_type` and `_id` concatenated together as `type#id` 16 | 17 | By default, the `_uid` field is((("id field"))) stored (can be retrieved) and 18 | indexed (searchable). The `_type` field((("type field")))((("index field")))((("uid field"))) is indexed but not stored, 19 | and the `_id` and `_index` fields are neither indexed nor stored, meaning 20 | they don't really exist. 21 | 22 | In spite of this, you can query the `_id` field as though it were a real 23 | field. Elasticsearch uses the `_uid` field to derive the `_id`. Although you 24 | can change the `index` and `store` settings for these fields, you almost 25 | never need to do so. 26 | -------------------------------------------------------------------------------- /070_Index_Mgmt/45_Default_Mapping.asciidoc: -------------------------------------------------------------------------------- 1 | [[default-mapping]] 2 | === Default Mapping 3 | 4 | Often, all types in an index share similar fields and settings. ((("mapping (types)", "default")))((("default mapping"))) It can be 5 | more convenient to specify these common settings in the `_default_` mapping, 6 | instead of having to repeat yourself every time you create a new type. The 7 | `_default_` mapping acts as a template for new types. All types created 8 | _after_ the `_default_` mapping will include all of these default settings, 9 | unless explicitly overridden in the type mapping itself. 10 | 11 | For instance, we can disable the `_all` field for all types,((("_all field", sortas="all field"))) using the 12 | `_default_` mapping, but enable it just for the `blog` type, as follows: 13 | 14 | [source,js] 15 | -------------------------------------------------- 16 | PUT /my_index 17 | { 18 | "mappings": { 19 | "_default_": { 20 | "_all": { "enabled": false } 21 | }, 22 | "blog": { 23 | "_all": { "enabled": true } 24 | } 25 | } 26 | } 27 | -------------------------------------------------- 28 | // SENSE: 070_Index_Mgmt/45_Default_mapping.json 29 | 30 | 31 | The `_default_` mapping can also be a good place to specify index-wide 32 | <>. 33 | -------------------------------------------------------------------------------- /075_Inside_a_shard.asciidoc: -------------------------------------------------------------------------------- 1 | include::075_Inside_a_shard/10_Intro.asciidoc[] 2 | 3 | include::075_Inside_a_shard/20_Making_text_searchable.asciidoc[] 4 | 5 | include::075_Inside_a_shard/30_Dynamic_indices.asciidoc[] 6 | 7 | include::075_Inside_a_shard/40_Near_real_time.asciidoc[] 8 | 9 | include::075_Inside_a_shard/50_Persistent_changes.asciidoc[] 10 | 11 | include::075_Inside_a_shard/60_Segment_merging.asciidoc[] 12 | 13 | -------------------------------------------------------------------------------- /075_Inside_a_shard/10_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[inside-a-shard]] 2 | == Inside a Shard 3 | 4 | In <>, we introduced the _shard_, and described((("shards"))) it as a 5 | low-level _worker unit_. But what exactly _is_ a shard and how does it work? 6 | In this chapter, we answer these questions: 7 | 8 | * Why is search _near_ real-time? 9 | * Why are document CRUD (create-read-update-delete) operations _real-time_? 10 | * How does Elasticsearch ensure that the changes you make are durable, that 11 | they won't be lost if there is a power failure? 12 | * Why does deleting documents not free up space immediately? 13 | * What do the `refresh`, `flush`, and `optimize` APIs do, and when should 14 | you use them? 15 | 16 | The easiest way to understand how a shard functions today is to start with a 17 | history lesson. We will look at the problems that needed to be solved in order 18 | to provide a distributed durable data store with near real-time search and 19 | analytics. 20 | 21 | .Content Warning 22 | **** 23 | 24 | The information presented in this chapter is for your interest. You are not required to 25 | understand and remember all the detail in order to use Elasticsearch. Read 26 | this chapter to gain a taste for how things work, and to know where the 27 | information is in case you need to refer to it in the future, but don't be 28 | overwhelmed by the detail. 29 | 30 | **** 31 | 32 | -------------------------------------------------------------------------------- /07_Admin.asciidoc: -------------------------------------------------------------------------------- 1 | ifndef::es_build[= placeholder7] 2 | 3 | 4 | [[administration]] 5 | = Administration, Monitoring, and Deployment 6 | 7 | [partintro] 8 | -- 9 | The majority of this book is aimed at building applications by using Elasticsearch 10 | as the backend. This section is a little different. Here, you will learn 11 | how to manage Elasticsearch itself. Elasticsearch is a complex piece of 12 | software, with many moving parts. Many APIs are designed 13 | to help you manage your Elasticsearch deployment. 14 | 15 | In this chapter, we cover three main topics: 16 | 17 | - Monitoring your cluster's vital statistics, understanding which behaviors are normal and which 18 | should be cause for alarm, and interpreting various stats provided by Elasticsearch 19 | - Deploying your cluster to production, including best practices and important 20 | configuration that should (or should not!) be changed 21 | - Performing post-deployment logistics, such as a rolling restart or backup of 22 | your cluster 23 | -- 24 | 25 | include::500_Cluster_Admin.asciidoc[] 26 | 27 | include::510_Deployment.asciidoc[] 28 | 29 | include::520_Post_Deployment.asciidoc[] 30 | 31 | 32 | -------------------------------------------------------------------------------- /080_Structured_Search.asciidoc: -------------------------------------------------------------------------------- 1 | include::080_Structured_Search/00_structuredsearch.asciidoc[] 2 | 3 | include::080_Structured_Search/05_term.asciidoc[] 4 | 5 | include::080_Structured_Search/10_compoundfilters.asciidoc[] 6 | 7 | include::080_Structured_Search/15_terms.asciidoc[] 8 | 9 | include::080_Structured_Search/20_contains.asciidoc[] 10 | 11 | include::080_Structured_Search/25_ranges.asciidoc[] 12 | 13 | include::080_Structured_Search/30_existsmissing.asciidoc[] 14 | 15 | include::080_Structured_Search/40_bitsets.asciidoc[] 16 | -------------------------------------------------------------------------------- /080_Structured_Search/00_structuredsearch.asciidoc: -------------------------------------------------------------------------------- 1 | [[structured-search]] 2 | == Structured Search 3 | 4 | _Structured search_ is about interrogating ((("structured search")))data that has inherent structure. 5 | Dates, times, and numbers are all structured: they have a precise format 6 | that you can perform logical operations on. Common operations include 7 | comparing ranges of numbers or dates, or determining which of two values is 8 | larger. 9 | 10 | Text can be structured too. A box of crayons has a discrete set of colors: 11 | `red`, `green`, `blue`. A blog post may be tagged with keywords 12 | `distributed` and `search`. Products in an ecommerce store have Universal 13 | Product Codes (UPCs) or some other identifier that requires strict and 14 | structured formatting. 15 | 16 | With structured search, the answer to your question is _always_ a yes or no; 17 | something either belongs in the set or it does not. Structured search does 18 | not worry about document relevance or scoring; it simply includes or 19 | excludes documents. 20 | 21 | This should make sense logically. A number can't be _more_ in a range than 22 | any other number that falls in the same range. It is either in the range--or it isn't. Similarly, for structured text, a value is either equal or it 23 | isn't. There is no concept of _more similar_. 24 | -------------------------------------------------------------------------------- /100_Full_Text_Search.asciidoc: -------------------------------------------------------------------------------- 1 | include::100_Full_Text_Search/00_Intro.asciidoc[] 2 | 3 | include::100_Full_Text_Search/05_Match_query.asciidoc[] 4 | 5 | include::100_Full_Text_Search/10_Multi_word_queries.asciidoc[] 6 | 7 | include::100_Full_Text_Search/15_Combining_queries.asciidoc[] 8 | 9 | include::100_Full_Text_Search/20_How_match_uses_bool.asciidoc[] 10 | 11 | include::100_Full_Text_Search/25_Boosting_clauses.asciidoc[] 12 | 13 | include::100_Full_Text_Search/30_Controlling_analysis.asciidoc[] 14 | 15 | include::100_Full_Text_Search/35_Relevance_is_broken.asciidoc[] 16 | 17 | -------------------------------------------------------------------------------- /110_Multi_Field_Search.asciidoc: -------------------------------------------------------------------------------- 1 | include::110_Multi_Field_Search/00_Intro.asciidoc[] 2 | 3 | include::110_Multi_Field_Search/05_Multiple_query_strings.asciidoc[] 4 | 5 | include::110_Multi_Field_Search/10_Single_query_string.asciidoc[] 6 | 7 | include::110_Multi_Field_Search/15_Best_field.asciidoc[] 8 | 9 | include::110_Multi_Field_Search/20_Tuning_best_field_queries.asciidoc[] 10 | 11 | include::110_Multi_Field_Search/25_Multi_match_query.asciidoc[] 12 | 13 | include::110_Multi_Field_Search/30_Most_fields.asciidoc[] 14 | 15 | include::110_Multi_Field_Search/35_Entity_search.asciidoc[] 16 | 17 | include::110_Multi_Field_Search/40_Field_centric.asciidoc[] 18 | 19 | include::110_Multi_Field_Search/45_Custom_all.asciidoc[] 20 | 21 | include::110_Multi_Field_Search/50_Cross_field.asciidoc[] 22 | 23 | include::110_Multi_Field_Search/55_Not_analyzed.asciidoc[] 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /110_Multi_Field_Search/00_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[multi-field-search]] 2 | == Multifield Search 3 | 4 | Queries are seldom simple one-clause `match` queries. ((("multifield search"))) We frequently need to 5 | search for the same or different query strings in one or more fields, which 6 | means that we need to be able to combine multiple query clauses and their 7 | relevance scores in a way that makes sense. 8 | 9 | Perhaps we're looking for a book called _War and Peace_ by an author called 10 | Leo Tolstoy. Perhaps we're searching the Elasticsearch documentation 11 | for ``minimum should match,'' which might be in the title or the body of a 12 | page. Or perhaps we're searching for users with first name John and last 13 | name Smith. 14 | 15 | In this chapter, we present the available tools for constructing multiclause 16 | searches and how to figure out which solution you should apply to your 17 | particular use case. 18 | -------------------------------------------------------------------------------- /110_Multi_Field_Search/55_Not_analyzed.asciidoc: -------------------------------------------------------------------------------- 1 | === Exact-Value Fields 2 | 3 | The final topic that we should touch on before leaving multifield queries is 4 | that of exact-value `not_analyzed` fields. ((("not_analyzed fields", "exact value, in multi-field queries")))((("multifield search", "exact value fields")))((("exact values", "exact value not_analyzed fields in multifield search")))((("analyzed fields", "avoiding mixing with not analyzed fields in multi_match queries"))) It is not useful to mix 5 | `not_analyzed` fields with `analyzed` fields in `multi_match` queries. 6 | 7 | The reason for this can be demonstrated easily by looking at a query 8 | explanation. Imagine that we have set the `title` field to be `not_analyzed`: 9 | 10 | [source,js] 11 | -------------------------------------------------- 12 | GET /_validate/query?explain 13 | { 14 | "query": { 15 | "multi_match": { 16 | "query": "peter smith", 17 | "type": "cross_fields", 18 | "fields": [ "title", "first_name", "last_name" ] 19 | } 20 | } 21 | } 22 | -------------------------------------------------- 23 | // SENSE: 110_Multi_Field_Search/55_Not_analyzed.json 24 | 25 | Because the `title` field is not analyzed, it searches that field for a single 26 | term consisting of the whole query string! 27 | 28 | title:peter smith 29 | ( 30 | blended("peter", fields: [first_name, last_name]) 31 | blended("smith", fields: [first_name, last_name]) 32 | ) 33 | 34 | That term clearly does not exist in the inverted index of the `title` field, 35 | and can never be found. Avoid using `not_analyzed` fields in `multi_match` 36 | queries. 37 | -------------------------------------------------------------------------------- /120_Proximity_Matching.asciidoc: -------------------------------------------------------------------------------- 1 | include::120_Proximity_Matching/00_Intro.asciidoc[] 2 | 3 | include::120_Proximity_Matching/05_Phrase_matching.asciidoc[] 4 | 5 | include::120_Proximity_Matching/10_Slop.asciidoc[] 6 | 7 | include::120_Proximity_Matching/15_Multi_value_fields.asciidoc[] 8 | 9 | include::120_Proximity_Matching/20_Scoring.asciidoc[] 10 | 11 | include::120_Proximity_Matching/25_Relevance.asciidoc[] 12 | 13 | include::120_Proximity_Matching/30_Performance.asciidoc[] 14 | 15 | include::120_Proximity_Matching/35_Shingles.asciidoc[] 16 | 17 | 18 | -------------------------------------------------------------------------------- /120_Proximity_Matching/00_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[proximity-matching]] 2 | == Proximity Matching 3 | 4 | Standard full-text search with TF/IDF treats documents, or at least each field 5 | within a document, as a big _bag of words_.((("proximity matching"))) The `match` query can tell us whether 6 | that bag contains our search terms, but that is only part of the story. 7 | It can't tell us anything about the relationship between words. 8 | 9 | Consider the difference between these sentences: 10 | 11 | * Sue ate the alligator. 12 | * The alligator ate Sue. 13 | * Sue never goes anywhere without her alligator-skin purse. 14 | 15 | A `match` query for `sue alligator` would match all three documents, but it 16 | doesn't tell us whether the two words form part of the same idea, or even the same 17 | paragraph. 18 | 19 | Understanding how words relate to each other is a complicated problem, and 20 | we can't solve it by just using another type of query, 21 | but we can at least find words that appear to be related because they appear 22 | near each other or even right next to each other. 23 | 24 | Each document may be much longer than the examples we have presented: `Sue` 25 | and `alligator` may be separated by paragraphs of other text. Perhaps we still 26 | want to return these documents in which the words are widely separated, but we 27 | want to give documents in which the words are close together a higher relevance 28 | score. 29 | 30 | This is the province of _phrase matching_, or _proximity matching_. 31 | 32 | [TIP] 33 | ================================================== 34 | 35 | In this chapter, we are using the same example documents that we used for 36 | the <>. 37 | 38 | ================================================== 39 | -------------------------------------------------------------------------------- /130_Partial_Matching.asciidoc: -------------------------------------------------------------------------------- 1 | include::130_Partial_Matching/00_Intro.asciidoc[] 2 | 3 | include::130_Partial_Matching/05_Postcodes.asciidoc[] 4 | 5 | include::130_Partial_Matching/10_Prefix_query.asciidoc[] 6 | 7 | include::130_Partial_Matching/15_WildcardRegexp.asciidoc[] 8 | 9 | include::130_Partial_Matching/20_Match_phrase_prefix.asciidoc[] 10 | 11 | include::130_Partial_Matching/25_Index_time.asciidoc[] 12 | 13 | include::130_Partial_Matching/30_Ngram_intro.asciidoc[] 14 | 15 | include::130_Partial_Matching/35_Search_as_you_type.asciidoc[] 16 | 17 | include::130_Partial_Matching/40_Compound_words.asciidoc[] 18 | 19 | -------------------------------------------------------------------------------- /130_Partial_Matching/25_Index_time.asciidoc: -------------------------------------------------------------------------------- 1 | === Index-Time Optimizations 2 | 3 | All of the solutions we've talked about so far are implemented at 4 | _query time_. ((("index time optimizations")))((("partial matching", "index time optimizations")))They don't require any special mappings or indexing patterns; 5 | they simply work with the data that you've already indexed. 6 | 7 | The flexibility of query-time operations comes at a cost: search performance. 8 | Sometimes it may make sense to move the cost away from the query. In a real- 9 | time web application, an additional 100ms may be too much latency to tolerate. 10 | 11 | By preparing your data at index time, you can make your searches more flexible 12 | and improve performance. You still pay a price: increased index size and 13 | slightly slower indexing throughput, but it is a price you pay once at index 14 | time, instead of paying it on every query. 15 | 16 | Your users will thank you. 17 | -------------------------------------------------------------------------------- /130_Partial_Matching/30_Ngram_intro.asciidoc: -------------------------------------------------------------------------------- 1 | === Ngrams for Partial Matching 2 | 3 | As we have said before, ``You can find only terms that exist in the inverted 4 | index.'' Although the `prefix`, `wildcard`, and `regexp` queries demonstrated that 5 | that is not strictly true, it _is_ true that doing a single-term lookup is 6 | much faster than iterating through the terms list to find matching terms on 7 | the fly.((("partial matching", "index time optimizations", "n-grams"))) Preparing your data for partial matching ahead of time will increase 8 | your search performance. 9 | 10 | Preparing your data at index time means choosing the right analysis chain, and 11 | the tool that we use for partial matching is the _n-gram_.((("n-grams"))) An n-gram can be 12 | best thought of as a _moving window on a word_. The _n_ stands for a length. 13 | If we were to n-gram the word `quick`, the results would depend on the length 14 | we have chosen: 15 | 16 | [horizontal] 17 | * Length 1 (unigram): [ `q`, `u`, `i`, `c`, `k` ] 18 | * Length 2 (bigram): [ `qu`, `ui`, `ic`, `ck` ] 19 | * Length 3 (trigram): [ `qui`, `uic`, `ick` ] 20 | * Length 4 (four-gram): [ `quic`, `uick` ] 21 | * Length 5 (five-gram): [ `quick` ] 22 | 23 | Plain n-grams are useful for matching _somewhere within a word_, a technique 24 | that we will use in <>. However, for search-as-you-type, 25 | we use a specialized form of n-grams called _edge n-grams_. ((("edge n-grams"))) Edge 26 | n-grams are anchored to the beginning of the word. Edge n-gramming the word 27 | `quick` would result in this: 28 | 29 | * `q` 30 | * `qu` 31 | * `qui` 32 | * `quic` 33 | * `quick` 34 | 35 | You may notice that this conforms exactly to the letters that a user searching for ``quick'' would type. In other words, these are the 36 | perfect terms to use for instant search! 37 | -------------------------------------------------------------------------------- /170_Relevance.asciidoc: -------------------------------------------------------------------------------- 1 | include::170_Relevance/05_Intro.asciidoc[] 2 | 3 | include::170_Relevance/10_Scoring_theory.asciidoc[] 4 | 5 | include::170_Relevance/15_Practical_scoring.asciidoc[] 6 | 7 | include::170_Relevance/20_Query_time_boosting.asciidoc[] 8 | 9 | include::170_Relevance/25_Query_scoring.asciidoc[] 10 | 11 | include::170_Relevance/30_Not_quite_not.asciidoc[] 12 | 13 | include::170_Relevance/35_Ignoring_TFIDF.asciidoc[] 14 | 15 | include::170_Relevance/40_Function_score_query.asciidoc[] 16 | 17 | include::170_Relevance/45_Popularity.asciidoc[] 18 | 19 | include::170_Relevance/50_Boosting_filtered_subsets.asciidoc[] 20 | 21 | include::170_Relevance/55_Random_scoring.asciidoc[] 22 | 23 | include::170_Relevance/60_Decay_functions.asciidoc[] 24 | 25 | include::170_Relevance/65_Script_score.asciidoc[] 26 | 27 | include::170_Relevance/70_Pluggable_similarities.asciidoc[] 28 | 29 | include::170_Relevance/75_Changing_similarities.asciidoc[] 30 | 31 | include::170_Relevance/80_Conclusion.asciidoc[] 32 | -------------------------------------------------------------------------------- /170_Relevance/05_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[controlling-relevance]] 2 | == Controlling Relevance 3 | 4 | Databases that deal purely in structured data (such as dates, numbers, and 5 | string enums) have it easy: they((("relevance", "controlling"))) just have to check whether a document (or a 6 | row, in a relational database) matches the query. 7 | 8 | While Boolean yes/no matches are an essential part of full-text search, they 9 | are not enough by themselves. Instead, we also need to know how relevant each 10 | document is to the query. Full-text search engines have to not only find the 11 | matching documents, but also sort them by relevance. 12 | 13 | Full-text relevance ((("similarity algorithms")))formulae, or _similarity algorithms_, combine several 14 | factors to produce a single relevance `_score` for each document. In this 15 | chapter, we examine the various moving parts and discuss how they can be 16 | controlled. 17 | 18 | Of course, relevance is not just about full-text queries; it may need to 19 | take structured data into account as well. Perhaps we are looking for a 20 | vacation home with particular features (air-conditioning, sea view, free 21 | WiFi). The more features that a property has, the more relevant it is. Or 22 | perhaps we want to factor in sliding scales like recency, price, popularity, or 23 | distance, while still taking the relevance of a full-text query into account. 24 | 25 | All of this is possible thanks to the powerful scoring infrastructure 26 | available in Elasticsearch. 27 | 28 | We will start by looking at the theoretical side of how Lucene calculates 29 | relevance, and then move on to practical examples of how you can control the 30 | process. 31 | -------------------------------------------------------------------------------- /200_Language_intro.asciidoc: -------------------------------------------------------------------------------- 1 | include::200_Language_intro/00_Intro.asciidoc[] 2 | 3 | include::200_Language_intro/10_Using.asciidoc[] 4 | 5 | include::200_Language_intro/20_Configuring.asciidoc[] 6 | 7 | include::200_Language_intro/30_Language_pitfalls.asciidoc[] 8 | 9 | include::200_Language_intro/40_One_language_per_doc.asciidoc[] 10 | 11 | include::200_Language_intro/50_One_language_per_field.asciidoc[] 12 | 13 | include::200_Language_intro/60_Mixed_language_fields.asciidoc[] 14 | 15 | -------------------------------------------------------------------------------- /200_Language_intro/00_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[language-intro]] 2 | == Getting Started with Languages 3 | 4 | Elasticsearch ships with a collection of language analyzers that provide 5 | good, basic, {ref}/analysis-lang-analyzer.html[out-of-the-box support] 6 | for many of the world's most common languages. 7 | 8 | These analyzers typically perform four roles: 9 | 10 | * Tokenize text into individual words: 11 | + 12 | `The quick brown foxes` -> [`The`, `quick`, `brown`, `foxes`] 13 | 14 | * Lowercase tokens: 15 | + 16 | `The` -> `the` 17 | 18 | * Remove common _stopwords_: 19 | + 20 | [`The`, `quick`, `brown`, `foxes`] -> [`quick`, `brown`, `foxes`] 21 | 22 | * Stem tokens to their root form: 23 | + 24 | `foxes` -> `fox` 25 | 26 | Each analyzer may also apply other transformations specific to its language in 27 | order to make words from that language more searchable: 28 | 29 | * The `english` analyzer removes the possessive `'s`: 30 | + 31 | `John's` -> `john` 32 | 33 | * The `french` analyzer removes _elisions_ like `l'` and `qu'` and 34 | _diacritics_ like `¨` or `^`: 35 | + 36 | `l'église` -> `eglis` 37 | 38 | * The `german` analyzer normalizes terms, replacing `ä` and `ae` with `a`, or 39 | `ß` with `ss`, among others: 40 | + 41 | `äußerst` -> `ausserst` 42 | -------------------------------------------------------------------------------- /210_Identifying_words.asciidoc: -------------------------------------------------------------------------------- 1 | include::210_Identifying_words/00_Intro.asciidoc[] 2 | 3 | include::210_Identifying_words/10_Standard_analyzer.asciidoc[] 4 | 5 | include::210_Identifying_words/20_Standard_tokenizer.asciidoc[] 6 | 7 | include::210_Identifying_words/30_ICU_plugin.asciidoc[] 8 | 9 | include::210_Identifying_words/40_ICU_tokenizer.asciidoc[] 10 | 11 | include::210_Identifying_words/50_Tidying_text.asciidoc[] 12 | 13 | ////////////////// 14 | 15 | Compound words 16 | 17 | language specific 18 | - kuromoji 19 | - chinese 20 | 21 | ////////////////// 22 | -------------------------------------------------------------------------------- /210_Identifying_words/00_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[identifying-words]] 2 | == Identifying Words 3 | 4 | A word in English is relatively simple to spot: words are separated by 5 | whitespace or (some) punctuation.((("languages", "identifyig words")))((("words", "identifying"))) Even in English, though, there can be 6 | controversy: is _you're_ one word or two? What about _o'clock_, 7 | _cooperate_, _half-baked_, or _eyewitness_? 8 | 9 | Languages like German or Dutch combine individual words to create longer 10 | compound words like _Weißkopfseeadler_ (white-headed sea eagle), but in order 11 | to be able to return `Weißkopfseeadler` as a result for the query `Adler` 12 | (eagle), we need to understand how to break up compound words into their 13 | constituent parts. 14 | 15 | Asian languages are even more complex: some have no whitespace between words, 16 | sentences, or even paragraphs.((("Asian languages", "identifying words"))) Some words can be represented by a single 17 | character, but the same single character, when placed next to other 18 | characters, can form just one part of a longer word with a quite different 19 | meaning. 20 | 21 | It should be obvious that there is no silver-bullet analyzer that will 22 | miraculously deal with all human languages. Elasticsearch ships with dedicated 23 | analyzers for many languages, and more language-specific analyzers are 24 | available as plug-ins. 25 | 26 | However, not all languages have dedicated analyzers, and sometimes you won't 27 | even be sure which language(s) you are dealing with. For these situations, we 28 | need good standard tools that do a reasonable job regardless of language. 29 | -------------------------------------------------------------------------------- /210_Identifying_words/10_Standard_analyzer.asciidoc: -------------------------------------------------------------------------------- 1 | [[standard-analyzer]] 2 | === standard Analyzer 3 | 4 | The `standard` analyzer is used by default for any full-text `analyzed` string 5 | field. ((("standard analyzer"))) If we were to reimplement the `standard` analyzer as a 6 | <>, it would be defined as follows: 7 | 8 | [role="pagebreak-before"] 9 | [source,js] 10 | -------------------------------------------------- 11 | { 12 | "type": "custom", 13 | "tokenizer": "standard", 14 | "filter": [ "lowercase", "stop" ] 15 | } 16 | -------------------------------------------------- 17 | 18 | In <> and <>, we talk about the 19 | `lowercase`, and `stop` _token filters_, but for the moment, let's focus on 20 | the `standard` _tokenizer_. 21 | 22 | -------------------------------------------------------------------------------- /220_Token_normalization.asciidoc: -------------------------------------------------------------------------------- 1 | include::220_Token_normalization/00_Intro.asciidoc[] 2 | 3 | include::220_Token_normalization/10_Lowercasing.asciidoc[] 4 | 5 | include::220_Token_normalization/20_Removing_diacritics.asciidoc[] 6 | 7 | include::220_Token_normalization/30_Unicode_world.asciidoc[] 8 | 9 | include::220_Token_normalization/40_Case_folding.asciidoc[] 10 | 11 | include::220_Token_normalization/50_Character_folding.asciidoc[] 12 | 13 | // TODO: Add normalization character filter with ngram tokenizer for decompounding german 14 | // German ngrams should be 4, not 3 15 | 16 | include::220_Token_normalization/60_Sorting_and_collations.asciidoc[] 17 | 18 | -------------------------------------------------------------------------------- /220_Token_normalization/00_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[token-normalization]] 2 | == Normalizing Tokens 3 | 4 | Breaking text into tokens is ((("normalization", "of tokens")))((("tokens", "normalizing")))only half the job. To make those 5 | tokens more easily searchable, they need to go through a _normalization_ 6 | process to remove insignificant differences between otherwise identical words, 7 | such as uppercase versus lowercase. Perhaps we also need to remove significant 8 | differences, to make `esta`, `ésta`, and `está` all searchable as the same 9 | word. Would you search for `déjà vu`, or just for `deja vu`? 10 | 11 | This is the job of the token filters, which((("token filters"))) receive a stream of tokens from 12 | the tokenizer. You can have multiple token filters, each doing its particular 13 | job. Each receives the new token stream as output by the token filter before 14 | it. 15 | 16 | -------------------------------------------------------------------------------- /220_Token_normalization/10_Lowercasing.asciidoc: -------------------------------------------------------------------------------- 1 | [[lowercase-token-filter]] 2 | === In That Case 3 | 4 | The most frequently used token filter is the `lowercase` filter, which does 5 | exactly what you would expect; it transforms ((("tokens", "normalizing", "lowercase filter")))((("lowercase token filter")))each token into its lowercase 6 | form: 7 | 8 | [source,js] 9 | -------------------------------------------------- 10 | GET /_analyze?tokenizer=standard&filters=lowercase 11 | The QUICK Brown FOX! <1> 12 | -------------------------------------------------- 13 | <1> Emits tokens `the`, `quick`, `brown`, `fox` 14 | 15 | It doesn't matter whether users search for `fox` or `FOX`, as long as the same 16 | analysis process is applied at query time and at search time. The `lowercase` 17 | filter will transform a query for `FOX` into a query for `fox`, which is the 18 | same token that we have stored in our inverted index. 19 | 20 | To use token filters as part of the analysis process, we ((("analyzers", "using token filters")))((("token filters", "using with analyzers")))can create a `custom` 21 | analyzer: 22 | 23 | [source,js] 24 | -------------------------------------------------- 25 | PUT /my_index 26 | { 27 | "settings": { 28 | "analysis": { 29 | "analyzer": { 30 | "my_lowercaser": { 31 | "tokenizer": "standard", 32 | "filter": [ "lowercase" ] 33 | } 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------- 39 | 40 | And we can test it out with the `analyze` API: 41 | 42 | [source,js] 43 | -------------------------------------------------- 44 | GET /my_index/_analyze?analyzer=my_lowercaser 45 | The QUICK Brown FOX! <1> 46 | -------------------------------------------------- 47 | <1> Emits tokens `the`, `quick`, `brown`, `fox` 48 | 49 | -------------------------------------------------------------------------------- /230_Stemming.asciidoc: -------------------------------------------------------------------------------- 1 | include::230_Stemming/00_Intro.asciidoc[] 2 | 3 | include::230_Stemming/10_Algorithmic_stemmers.asciidoc[] 4 | 5 | include::230_Stemming/20_Dictionary_stemmers.asciidoc[] 6 | 7 | include::230_Stemming/30_Hunspell_stemmer.asciidoc[] 8 | 9 | include::230_Stemming/40_Choosing_a_stemmer.asciidoc[] 10 | 11 | include::230_Stemming/50_Controlling_stemming.asciidoc[] 12 | 13 | include::230_Stemming/60_Stemming_in_situ.asciidoc[] 14 | -------------------------------------------------------------------------------- /240_Stopwords.asciidoc: -------------------------------------------------------------------------------- 1 | include::240_Stopwords/10_Intro.asciidoc[] 2 | 3 | include::240_Stopwords/20_Using_stopwords.asciidoc[] 4 | 5 | include::240_Stopwords/30_Stopwords_and_performance.asciidoc[] 6 | 7 | include::240_Stopwords/40_Divide_and_conquer.asciidoc[] 8 | 9 | include::240_Stopwords/50_Phrase_queries.asciidoc[] 10 | 11 | include::240_Stopwords/60_Common_grams.asciidoc[] 12 | 13 | include::240_Stopwords/70_Relevance.asciidoc[] 14 | 15 | -------------------------------------------------------------------------------- /240_Stopwords/70_Relevance.asciidoc: -------------------------------------------------------------------------------- 1 | [[stopwords-relavance]] 2 | === Stopwords and Relevance 3 | 4 | The last topic to cover before moving on from stopwords((("stopwords", "relevance and")))((("relevance", "stopwords and"))) is that of relevance. 5 | Leaving stopwords in your index could make the relevance calculation 6 | less accurate, especially if your documents are very long. 7 | 8 | As we have already discussed in <>, the((("BM25", "term frequency saturation"))) reason for this is 9 | that <> doesn't impose an 10 | upper limit on the impact of term frequency.((("Term Frequency/Inverse Document Frequency (TF/IDF) similarity algorithm", "stopwords and"))) Very common words may have a low 11 | weight because of inverse document frequency but, in long documents, the sheer 12 | number of occurrences of stopwords in a single document may lead to their 13 | weight being artificially boosted. 14 | 15 | You may want to consider using the <> similarity on long 16 | fields that include stopwords instead of the default Lucene similarity. 17 | 18 | -------------------------------------------------------------------------------- /260_Synonyms.asciidoc: -------------------------------------------------------------------------------- 1 | include::260_Synonyms/10_Intro.asciidoc[] 2 | 3 | include::260_Synonyms/20_Using_synonyms.asciidoc[] 4 | 5 | include::260_Synonyms/30_Synonym_formats.asciidoc[] 6 | 7 | include::260_Synonyms/40_Expand_contract.asciidoc[] 8 | 9 | include::260_Synonyms/50_Analysis_chain.asciidoc[] 10 | 11 | include::260_Synonyms/60_Multi_word_synonyms.asciidoc[] 12 | 13 | include::260_Synonyms/70_Symbol_synonyms.asciidoc[] 14 | 15 | -------------------------------------------------------------------------------- /260_Synonyms/30_Synonym_formats.asciidoc: -------------------------------------------------------------------------------- 1 | [[synonym-formats]] 2 | === Formatting Synonyms 3 | 4 | In their simplest form, synonyms are((("synonyms", "formatting"))) listed as comma-separated values: 5 | 6 | "jump,leap,hop" 7 | 8 | If any of these terms is encountered, it is replaced by all of the listed 9 | synonyms. For instance: 10 | 11 | [role="pagebreak-before"] 12 | [source,text] 13 | -------------------------- 14 | Original terms: Replaced by: 15 | ──────────────────────────────── 16 | jump → (jump,leap,hop) 17 | leap → (jump,leap,hop) 18 | hop → (jump,leap,hop) 19 | -------------------------- 20 | 21 | Alternatively, with the `=>` syntax, it is possible to specify a list of terms 22 | to match (on the left side), and a list of one or more replacements (on 23 | the right side): 24 | 25 | "u s a,united states,united states of america => usa" 26 | "g b,gb,great britain => britain,england,scotland,wales" 27 | 28 | [source,text] 29 | -------------------------- 30 | Original terms: Replaced by: 31 | ──────────────────────────────── 32 | u s a → (usa) 33 | united states → (usa) 34 | great britain → (britain,england,scotland,wales) 35 | -------------------------- 36 | 37 | If multiple rules for the same synonyms are specified, they are merged 38 | together. The order of rules is not respected. Instead, the longest matching 39 | rule wins. Take the following rules as an example: 40 | 41 | "united states => usa", 42 | "united states of america => usa" 43 | 44 | If these rules conflicted, Elasticsearch would turn `United States of 45 | America` into the terms `(usa),(of),(america)`. Instead, the longest 46 | sequence wins, and we end up with just the term `(usa)`. 47 | 48 | -------------------------------------------------------------------------------- /270_Fuzzy_matching.asciidoc: -------------------------------------------------------------------------------- 1 | include::270_Fuzzy_matching/10_Intro.asciidoc[] 2 | 3 | include::270_Fuzzy_matching/20_Fuzziness.asciidoc[] 4 | 5 | include::270_Fuzzy_matching/30_Fuzzy_query.asciidoc[] 6 | 7 | include::270_Fuzzy_matching/40_Fuzzy_match_query.asciidoc[] 8 | 9 | include::270_Fuzzy_matching/50_Scoring_fuzziness.asciidoc[] 10 | 11 | include::270_Fuzzy_matching/60_Phonetic_matching.asciidoc[] 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /270_Fuzzy_matching/10_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[fuzzy-matching]] 2 | == Typoes and Mispelings 3 | 4 | We expect a query on structured data like dates and prices to return only 5 | documents that match exactly. ((("typoes and misspellings", "fuzzy matching")))((("fuzzy matching"))) However, good full-text search shouldn't have the 6 | same restriction. Instead, we can widen the net to include words that _may_ 7 | match, but use the relevance score to push the better matches to the top 8 | of the result set. 9 | 10 | In fact, full-text search ((("full text search", "fuzzy matching")))that only matches exactly will probably frustrate 11 | your users. Wouldn't you expect a search for ``quick brown fox'' to match a 12 | document containing ``fast brown foxes,'' ``Johnny Walker'' to match 13 | ``Johnnie Walker,'' or ``Arnold Shcwarzenneger'' to match ``Arnold 14 | Schwarzenegger''? 15 | 16 | If documents exist that _do_ contain exactly what the user has queried, 17 | they should appear at the top of the result set, but weaker matches can be 18 | included further down the list. If no documents match exactly, at least we 19 | can show the user potential matches; they may even be what the user 20 | originally intended! 21 | 22 | We have already looked at diacritic-free matching in <>, 23 | word stemming in <>, and synonyms in <>, but all of those 24 | approaches presuppose that words are spelled correctly, or that there is only 25 | one way to spell each word. 26 | 27 | Fuzzy matching allows for query-time matching of misspelled words, while 28 | phonetic token filters at index time can be used for _sounds-like_ matching. 29 | 30 | -------------------------------------------------------------------------------- /270_Fuzzy_matching/40_Fuzzy_match_query.asciidoc: -------------------------------------------------------------------------------- 1 | [[fuzzy-match-query]] 2 | === Fuzzy match Query 3 | 4 | The `match` query supports ((("typoes and misspellings", "fuzzy match query")))((("match query", "fuzzy matching")))((("fuzzy matching", "match query")))fuzzy matching out of the box: 5 | 6 | [source,json] 7 | ----------------------------------- 8 | GET /my_index/my_type/_search 9 | { 10 | "query": { 11 | "match": { 12 | "text": { 13 | "query": "SURPRIZE ME!", 14 | "fuzziness": "AUTO", 15 | "operator": "and" 16 | } 17 | } 18 | } 19 | } 20 | ----------------------------------- 21 | 22 | The query string is first analyzed, to produce the terms `[surprize, me]`, and 23 | then each term is fuzzified using the specified `fuzziness`. 24 | 25 | Similarly, the `multi_match` query also ((("multi_match queries", "fuzziness support")))supports `fuzziness`, but only when 26 | executing with type `best_fields` or `most_fields`: 27 | 28 | [source,json] 29 | ----------------------------------- 30 | GET /my_index/my_type/_search 31 | { 32 | "query": { 33 | "multi_match": { 34 | "fields": [ "text", "title" ], 35 | "query": "SURPRIZE ME!", 36 | "fuzziness": "AUTO" 37 | } 38 | } 39 | } 40 | ----------------------------------- 41 | 42 | Both the `match` and `multi_match` queries also support the `prefix_length` 43 | and `max_expansions` parameters. 44 | 45 | TIP: Fuzziness works only with the basic `match` and `multi_match` queries. It 46 | doesn't work with phrase matching, common terms, or `cross_fields` matches. 47 | 48 | -------------------------------------------------------------------------------- /270_Fuzzy_matching/50_Scoring_fuzziness.asciidoc: -------------------------------------------------------------------------------- 1 | [[fuzzy-scoring]] 2 | === Scoring Fuzziness 3 | 4 | Users love fuzzy queries. They assume that these queries will somehow magically find 5 | the right combination of proper spellings.((("fuzzy queries", "scoring fuzziness")))((("typoes and misspellings", "scoring fuzziness")))((("relevance scores", "fuzziness and"))) Unfortunately, the truth is 6 | somewhat more prosaic. 7 | 8 | Imagine that we have 1,000 documents containing ``Schwarzenegger,'' and just 9 | one document with the misspelling ``Schwarzeneger.'' According to the theory 10 | of <>, the misspelling is 11 | much more relevant than the correct spelling, because it appears in far fewer 12 | documents! 13 | 14 | In other words, if we were to treat fuzzy matches((("match query", "fuzzy match query"))) like any other match, we 15 | would favor misspellings over correct spellings, which would make for grumpy 16 | users. 17 | 18 | TIP: Fuzzy matching should not be used for scoring purposes--only to widen 19 | the net of matching terms in case there are misspellings. 20 | 21 | By default, the `match` query gives all fuzzy matches the constant score of 1. 22 | This is sufficient to add potential matches onto the end of the result list, 23 | without interfering with the relevance scoring of nonfuzzy queries. 24 | 25 | [TIP] 26 | ================================================== 27 | 28 | Fuzzy queries alone are much less useful than they initially appear. They are 29 | better used as part of a ``bigger'' feature, such as the _search-as-you-type_ 30 | {ref}/search-suggesters-completion.html[`completion` suggester] or the 31 | _did-you-mean_ {ref}/search-suggesters-phrase.html[`phrase` suggester]. 32 | 33 | ================================================== 34 | -------------------------------------------------------------------------------- /302_Example_Walkthrough.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | include::300_Aggregations/20_basic_example.asciidoc[] 3 | 4 | include::300_Aggregations/21_add_metric.asciidoc[] 5 | 6 | include::300_Aggregations/22_nested_bucket.asciidoc[] 7 | 8 | include::300_Aggregations/23_extra_metrics.asciidoc[] -------------------------------------------------------------------------------- /303_Making_Graphs.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | 3 | include::300_Aggregations/30_histogram.asciidoc[] 4 | 5 | include::300_Aggregations/35_date_histogram.asciidoc[] 6 | 7 | include::300_Aggregations/40_scope.asciidoc[] 8 | 9 | include::300_Aggregations/45_filtering.asciidoc[] 10 | 11 | include::300_Aggregations/50_sorting_ordering.asciidoc[] -------------------------------------------------------------------------------- /304_Approximate_Aggregations.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | include::300_Aggregations/55_approx_intro.asciidoc[] 5 | 6 | include::300_Aggregations/60_cardinality.asciidoc[] 7 | 8 | include::300_Aggregations/65_percentiles.asciidoc[] -------------------------------------------------------------------------------- /305_Significant_Terms.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | include::300_Aggregations/70_sigterms_intro.asciidoc[] 3 | 4 | include::300_Aggregations/75_sigterms.asciidoc[] 5 | -------------------------------------------------------------------------------- /306_Practical_Considerations.asciidoc: -------------------------------------------------------------------------------- 1 | [[docvalues-and-fielddata]] 2 | == Doc Values and Fielddata 3 | 4 | include::300_Aggregations/90_docvalues.asciidoc[] 5 | 6 | include::300_Aggregations/93_technical_docvalues.asciidoc[] 7 | 8 | include::300_Aggregations/95_analyzed_vs_not.asciidoc[] 9 | 10 | include::300_Aggregations/100_circuit_breaker_fd_settings.asciidoc[] 11 | 12 | include::300_Aggregations/105_filtering.asciidoc[] 13 | 14 | include::300_Aggregations/115_eager.asciidoc[] 15 | 16 | include::300_Aggregations/120_breadth_vs_depth.asciidoc[] 17 | 18 | include::300_Aggregations/125_Conclusion.asciidoc[] 19 | -------------------------------------------------------------------------------- /310_Geopoints.asciidoc: -------------------------------------------------------------------------------- 1 | include::310_Geopoints/20_Geopoints.asciidoc[] 2 | 3 | include::310_Geopoints/30_Filter_by_geopoint.asciidoc[] 4 | 5 | include::310_Geopoints/32_Bounding_box.asciidoc[] 6 | 7 | include::310_Geopoints/34_Geo_distance.asciidoc[] 8 | 9 | include::310_Geopoints/50_Sorting_by_distance.asciidoc[] 10 | -------------------------------------------------------------------------------- /320_Geohashes.asciidoc: -------------------------------------------------------------------------------- 1 | include::320_Geohashes/40_Geohashes.asciidoc[] 2 | 3 | include::320_Geohashes/50_Geohash_mapping.asciidoc[] 4 | 5 | include::320_Geohashes/60_Geohash_cell_filter.asciidoc[] 6 | 7 | -------------------------------------------------------------------------------- /320_Geohashes/50_Geohash_mapping.asciidoc: -------------------------------------------------------------------------------- 1 | [[geohash-mapping]] 2 | === Mapping Geohashes 3 | 4 | The first step is to decide just how much precision you need.((("geohashes", "mapping")))((("mapping (types)", "geohashes"))) Although you could 5 | index all geo-points with the default full 12 levels of precision, do you 6 | really need to be accurate to within a few centimeters? You can save yourself 7 | a lot of space in the index by reducing your precision requirements to 8 | something more realistic, such as `1km`:((("geohash_precision parameter")))((("geohash_prefix parameter"))) 9 | 10 | [source,json] 11 | ---------------------------- 12 | PUT /attractions 13 | { 14 | "mappings": { 15 | "restaurant": { 16 | "properties": { 17 | "name": { 18 | "type": "string" 19 | }, 20 | "location": { 21 | "type": "geo_point", 22 | "geohash_prefix": true, <1> 23 | "geohash_precision": "1km" <2> 24 | } 25 | } 26 | } 27 | } 28 | } 29 | ---------------------------- 30 | <1> Setting `geohash_prefix` to `true` tells Elasticsearch to index 31 | all geohash prefixes, up to the specified precision. 32 | <2> The precision can be specified as an absolute number, representing the 33 | length of the geohash, or as a distance. A precision of `1km` corresponds 34 | to a geohash of length `7`. 35 | 36 | With this mapping in place, geohash prefixes of lengths 1 to 7 will be indexed, 37 | providing geohashes accurate to about 150 meters. 38 | 39 | -------------------------------------------------------------------------------- /330_Geo_aggs.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | include::330_Geo_aggs/60_Geo_aggs.asciidoc[] 3 | 4 | include::330_Geo_aggs/62_Geo_distance_agg.asciidoc[] 5 | 6 | include::330_Geo_aggs/64_Geohash_grid_agg.asciidoc[] 7 | 8 | include::330_Geo_aggs/66_Geo_bounds_agg.asciidoc[] 9 | 10 | -------------------------------------------------------------------------------- /330_Geo_aggs/60_Geo_aggs.asciidoc: -------------------------------------------------------------------------------- 1 | [[geo-aggs]] 2 | == Geo Aggregations 3 | 4 | Although filtering or scoring results by geolocation is useful,((("geo-aggregations")))((("aggregations", "geo"))) it is often more 5 | useful to be able to present information to the user on a map. A search may 6 | return way too many results to be able to display each geo-point individually, 7 | but geo-aggregations can be used to cluster geo-points into more manageable 8 | buckets. 9 | 10 | Three aggregations work with fields of type `geo_point`: 11 | 12 | <>:: 13 | 14 | Groups documents into concentric circles around a central point. 15 | 16 | <>:: 17 | 18 | Groups documents by geohash cell, for display on a map. 19 | 20 | <>:: 21 | 22 | Returns the `lat/lon` coordinates of a bounding box that would 23 | encompass all of the geo-points. This is useful for choosing 24 | the correct zoom level when displaying a map. 25 | 26 | -------------------------------------------------------------------------------- /340_Geoshapes.asciidoc: -------------------------------------------------------------------------------- 1 | include::340_Geoshapes/70_Geoshapes.asciidoc[] 2 | 3 | include::340_Geoshapes/72_Mapping_geo_shapes.asciidoc[] 4 | 5 | include::340_Geoshapes/74_Indexing_geo_shapes.asciidoc[] 6 | 7 | include::340_Geoshapes/76_Querying_geo_shapes.asciidoc[] 8 | 9 | include::340_Geoshapes/78_Indexed_geo_shapes.asciidoc[] 10 | -------------------------------------------------------------------------------- /340_Geoshapes/70_Geoshapes.asciidoc: -------------------------------------------------------------------------------- 1 | [[geo-shapes]] 2 | == Geo Shapes 3 | 4 | Geo-shapes use a completely different approach than geo-points.((("geo-shapes"))) A circle on a 5 | computer screen does not consist of a perfect continuous line. Instead it is 6 | drawn by coloring adjacent pixels as an approximation of a circle. Geo-shapes 7 | work in much the same way. 8 | 9 | Complex shapes--such as points, lines, polygons, multipolygons, and polygons with 10 | holes,--are ``painted'' onto a grid of geohash cells, and the shape is 11 | converted into a list of the ((("geohashes", "in geo-shapes")))geohashes of all the cells that it touches. 12 | 13 | [NOTE] 14 | ==== 15 | Actually, two types of grids can be used with geo-shapes: 16 | geohashes, which we have already discussed and which are the default encoding, 17 | and _quad trees_. ((("quad trees")))Quad trees are similar to geohashes except that there are 18 | only four cells at each level, instead of 32. The difference comes down to a 19 | choice of encoding. 20 | ==== 21 | 22 | All of the geohashes that compose a shape are indexed as if they were terms. 23 | With this information in the index, it is easy to determine whether one shape 24 | intersects with another, as they will share the same geohash terms. 25 | 26 | That is the extent of what you can do with geo-shapes: determine the 27 | relationship between a query shape and a shape in the index. The `relation` 28 | can be ((("relation parameter (geo-shapes)")))one of the following: 29 | 30 | `intersects`:: 31 | 32 | The query shape overlaps with the indexed shape (default). 33 | 34 | `disjoint`:: 35 | 36 | The query shape does _not_ overlap at all with the indexed shape. 37 | 38 | `within`:: 39 | 40 | The indexed shape is entirely within the query shape. 41 | 42 | Geo-shapes cannot be used to caculate distance, cannot be used for 43 | sorting or scoring, and cannot be used in aggregations. 44 | 45 | -------------------------------------------------------------------------------- /400_Relationships.asciidoc: -------------------------------------------------------------------------------- 1 | include::400_Relationships/10_Intro.asciidoc[] 2 | 3 | include::400_Relationships/15_Application_joins.asciidoc[] 4 | 5 | include::400_Relationships/20_Denormalization.asciidoc[] 6 | 7 | include::400_Relationships/22_Top_hits.asciidoc[] 8 | 9 | include::400_Relationships/25_Concurrency.asciidoc[] 10 | 11 | include::400_Relationships/26_Concurrency_solutions.asciidoc[] 12 | 13 | -------------------------------------------------------------------------------- /400_Relationships/20_Denormalization.asciidoc: -------------------------------------------------------------------------------- 1 | [[denormalization]] 2 | === Denormalizing Your Data 3 | 4 | The way to get the best search performance out of Elasticsearch is to use it 5 | as it is intended, by((("relationships", "denormalizing your data")))((("denormalization", "denormalizing data at index time"))) 6 | http://en.wikipedia.org/wiki/Denormalization[denormalizing] your data at index 7 | time. Having redundant copies of data in each document that requires access to 8 | it removes the need for joins. 9 | 10 | If we want to be able to find a blog post by the name of the user who wrote it, 11 | include the user's name in the blog-post document itself: 12 | 13 | 14 | [source,json] 15 | -------------------------------- 16 | PUT /my_index/user/1 17 | { 18 | "name": "John Smith", 19 | "email": "john@smith.com", 20 | "dob": "1970/10/24" 21 | } 22 | 23 | PUT /my_index/blogpost/2 24 | { 25 | "title": "Relationships", 26 | "body": "It's complicated...", 27 | "user": { 28 | "id": 1, 29 | "name": "John Smith" <1> 30 | } 31 | } 32 | -------------------------------- 33 | <1> Part of the user's data has been denormalized into the `blogpost` document. 34 | 35 | Now, we can find blog posts about `relationships` by users called `John` 36 | with a single query: 37 | 38 | [source,json] 39 | -------------------------------- 40 | GET /my_index/blogpost/_search 41 | { 42 | "query": { 43 | "bool": { 44 | "must": [ 45 | { "match": { "title": "relationships" }}, 46 | { "match": { "user.name": "John" }} 47 | ] 48 | } 49 | } 50 | } 51 | -------------------------------- 52 | 53 | The advantage of data denormalization is speed. Because each document 54 | contains all of the information that is required to determine whether it 55 | matches the query, there is no need for expensive joins. 56 | 57 | -------------------------------------------------------------------------------- /402_Nested.asciidoc: -------------------------------------------------------------------------------- 1 | include::402_Nested/30_Nested_objects.asciidoc[] 2 | 3 | include::402_Nested/31_Nested_mapping.asciidoc[] 4 | 5 | include::402_Nested/32_Nested_query.asciidoc[] 6 | 7 | include::402_Nested/33_Nested_sorting.asciidoc[] 8 | 9 | include::402_Nested/35_Nested_aggs.asciidoc[] 10 | 11 | -------------------------------------------------------------------------------- /402_Nested/31_Nested_mapping.asciidoc: -------------------------------------------------------------------------------- 1 | [[nested-mapping]] 2 | === Nested Object Mapping 3 | 4 | Setting up a `nested` field is simple--where ((("mapping (types)", "nested object")))((("nested object mapping")))you would normally specify type 5 | `object`, make it type `nested` instead: 6 | 7 | [source,json] 8 | -------------------------- 9 | PUT /my_index 10 | { 11 | "mappings": { 12 | "blogpost": { 13 | "properties": { 14 | "comments": { 15 | "type": "nested", <1> 16 | "properties": { 17 | "name": { "type": "string" }, 18 | "comment": { "type": "string" }, 19 | "age": { "type": "short" }, 20 | "stars": { "type": "short" }, 21 | "date": { "type": "date" } 22 | } 23 | } 24 | } 25 | } 26 | } 27 | } 28 | -------------------------- 29 | <1> A `nested` field accepts the same parameters as a field of type `object`. 30 | 31 | That's all that is required. Any `comments` objects would now be indexed as 32 | separate nested documents. See the 33 | {ref}/nested.html[`nested` type reference docs] for more. 34 | 35 | -------------------------------------------------------------------------------- /404_Parent_Child.asciidoc: -------------------------------------------------------------------------------- 1 | include::404_Parent_Child/40_Parent_child.asciidoc[] 2 | 3 | include::404_Parent_Child/45_Indexing_parent_child.asciidoc[] 4 | 5 | include::404_Parent_Child/50_Has_child.asciidoc[] 6 | 7 | include::404_Parent_Child/55_Has_parent.asciidoc[] 8 | 9 | include::404_Parent_Child/60_Children_agg.asciidoc[] 10 | 11 | include::404_Parent_Child/65_Grandparents.asciidoc[] 12 | 13 | include::404_Parent_Child/70_Practical_considerations.asciidoc[] 14 | 15 | -------------------------------------------------------------------------------- /404_Parent_Child/55_Has_parent.asciidoc: -------------------------------------------------------------------------------- 1 | [[has-parent]] 2 | === Finding Children by Their Parents 3 | 4 | While a `nested` query can always ((("parent-child relationship", "finding children by their parents")))return only the root document as a result, 5 | parent and child documents are independent and each can be queried 6 | independently. The `has_child` query allows us to return parents based on 7 | data in their children, and the `has_parent` query returns children based on 8 | data in their parents.((("has_parent query and filter", "query"))) 9 | 10 | It looks very similar to the `has_child` query. This example returns 11 | employees who work in the UK: 12 | 13 | [source,json] 14 | ------------------------- 15 | GET /company/employee/_search 16 | { 17 | "query": { 18 | "has_parent": { 19 | "type": "branch", <1> 20 | "query": { 21 | "match": { 22 | "country": "UK" 23 | } 24 | } 25 | } 26 | } 27 | } 28 | ------------------------- 29 | <1> Returns children who have parents of type `branch` 30 | 31 | The `has_parent` query also supports the `score_mode`,((("score_mode parameter"))) but it accepts only two 32 | settings: `none` (the default) and `score`. Each child can have only one 33 | parent, so there is no need to reduce multiple scores into a single score for 34 | the child. The choice is simply between using the score (`score`) or not 35 | (`none`). 36 | 37 | .Non-scoring has_parent Query 38 | ************************** 39 | 40 | When used in non-scoring mode (e.g. inside a `filter` clause), the `has_parent` 41 | query no longer supports the `score_mode` parameter. Because it is merely 42 | including/excluding documents and not scoring, the `score_mode` parameter 43 | no longer applies. 44 | ************************** 45 | -------------------------------------------------------------------------------- /404_Parent_Child/60_Children_agg.asciidoc: -------------------------------------------------------------------------------- 1 | [[children-agg]] 2 | === Children Aggregation 3 | 4 | Parent-child supports a 5 | {ref}/search-aggregations-bucket-children-aggregation.html[`children` aggregation] as ((("aggregations", "children aggregation")))((("children aggregation")))((("parent-child relationship", "children aggregation")))a direct analog to the `nested` aggregation discussed in 6 | <>. A parent aggregation (the equivalent of 7 | `reverse_nested`) is not supported. 8 | 9 | This example demonstrates how we could determine the favorite hobbies of our 10 | employees by country: 11 | 12 | [source,json] 13 | ------------------------- 14 | GET /company/branch/_search 15 | { 16 | "size" : 0, 17 | "aggs": { 18 | "country": { 19 | "terms": { <1> 20 | "field": "country" 21 | }, 22 | "aggs": { 23 | "employees": { 24 | "children": { <2> 25 | "type": "employee" 26 | }, 27 | "aggs": { 28 | "hobby": { 29 | "terms": { <3> 30 | "field": "hobby" 31 | } 32 | } 33 | } 34 | } 35 | } 36 | } 37 | } 38 | } 39 | ------------------------- 40 | <1> The `country` field in the `branch` documents. 41 | <2> The `children` aggregation joins the parent documents with 42 | their associated children of type `employee`. 43 | <3> The `hobby` field from the `employee` child documents. 44 | -------------------------------------------------------------------------------- /410_Scaling.asciidoc: -------------------------------------------------------------------------------- 1 | include::410_Scaling/10_Intro.asciidoc[] 2 | 3 | include::410_Scaling/15_Shard.asciidoc[] 4 | 5 | include::410_Scaling/20_Overallocation.asciidoc[] 6 | 7 | include::410_Scaling/25_Kagillion_shards.asciidoc[] 8 | 9 | include::410_Scaling/30_Capacity_planning.asciidoc[] 10 | 11 | include::410_Scaling/35_Replica_shards.asciidoc[] 12 | 13 | include::410_Scaling/40_Multiple_indices.asciidoc[] 14 | 15 | include::410_Scaling/45_Index_per_timeframe.asciidoc[] 16 | 17 | include::410_Scaling/50_Index_templates.asciidoc[] 18 | 19 | include::410_Scaling/55_Retiring_data.asciidoc[] 20 | 21 | include::410_Scaling/60_Index_per_user.asciidoc[] 22 | 23 | include::410_Scaling/65_Shared_index.asciidoc[] 24 | 25 | include::410_Scaling/70_Faking_it.asciidoc[] 26 | 27 | include::410_Scaling/75_One_big_user.asciidoc[] 28 | 29 | include::410_Scaling/80_Scale_is_not_infinite.asciidoc[] 30 | -------------------------------------------------------------------------------- /410_Scaling/10_Intro.asciidoc: -------------------------------------------------------------------------------- 1 | [[scale]] 2 | == Designing for Scale 3 | 4 | Elasticsearch is used by some companies to index ((("scaling", "designing for scale")))and search petabytes of data 5 | every day, but most of us start out with something a little more humble in 6 | size. Even if we aspire to be the next Facebook, it is unlikely that our bank 7 | balance matches our aspirations. We need to build for what we have today, but 8 | in a way that will allow us to scale out flexibly and rapidly. 9 | 10 | Elasticsearch is built to scale. It will run very happily on your laptop or 11 | in a cluster containing hundreds of nodes, and the experience is almost 12 | identical. Growing from a small cluster to a large cluster is almost entirely 13 | automatic and painless. Growing from a large cluster to a very large cluster 14 | requires a bit more planning and design, but it is still relatively painless. 15 | 16 | Of course, it is not magic. Elasticsearch has its limitations too. If you 17 | are aware of those limitations and work with them, the growing process will be 18 | pleasant. If you treat Elasticsearch badly, you could be in for a world of 19 | pain. 20 | 21 | The default settings in Elasticsearch will take you a long way, but to get the 22 | most bang for your buck, you need to think about how data flows through your 23 | system. We will talk about two common data flows: time-based data (such as log 24 | events or social network streams, where relevance is driven by recency), and 25 | user-based data (where a large document collection can be subdivided by user or 26 | customer). 27 | 28 | This chapter will help you make the right decisions up front, to avoid 29 | nasty surprises later. 30 | -------------------------------------------------------------------------------- /500_Cluster_Admin.asciidoc: -------------------------------------------------------------------------------- 1 | [[cluster-admin]] 2 | == Monitoring 3 | 4 | include::500_Cluster_Admin/10_intro.asciidoc[] 5 | 6 | include::500_Cluster_Admin/15_marvel.asciidoc[] 7 | 8 | include::500_Cluster_Admin/20_health.asciidoc[] 9 | 10 | include::500_Cluster_Admin/30_node_stats.asciidoc[] 11 | 12 | include::500_Cluster_Admin/40_other_stats.asciidoc[] 13 | -------------------------------------------------------------------------------- /500_Cluster_Admin/10_intro.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | Elasticsearch is often deployed as a cluster of nodes. A variety of APIs let you 3 | manage and monitor the cluster itself, rather than interact with the data stored 4 | within the cluster. 5 | 6 | As with most functionality in Elasticsearch, there is an overarching design goal 7 | that tasks should be performed through an API rather than by modifying static 8 | configuration files. This becomes especially important as your cluster scales. 9 | Even with a provisioning system (such as Puppet, Chef, and Ansible), a single 10 | HTTP API call is often simpler than pushing new configurations to hundreds of 11 | physical machines. 12 | 13 | To that end, this chapter presents the various APIs that allow you to 14 | dynamically tweak, tune, and configure your cluster. It also covers a host of 15 | APIs that provide statistics about the cluster itself so you can monitor for 16 | health and performance. 17 | -------------------------------------------------------------------------------- /500_Cluster_Admin/15_marvel.asciidoc: -------------------------------------------------------------------------------- 1 | [[marvel]] 2 | === Marvel for Monitoring 3 | 4 | https://www.elastic.co/guide/en/marvel/current/index.html[Marvel] enables 5 | you to easily monitor Elasticsearch through Kibana. You can view your 6 | cluster’s health and performance in real time as well as analyze 7 | past cluster, index, and node metrics. 8 | 9 | While you can access a large number of statistics through the APIs described 10 | in this chapter, they only show you what's going on at a single point in time. 11 | Knowing memory usage at this instant is helpful, but knowing 12 | memory usage _over time_ is much more useful. Marvel queries and aggregates 13 | the metrics so you can visualize your cluster's 14 | behavior over time, which makes it easy to spot trends. 15 | 16 | As your cluster grows, the output from the stats APIs can get truly hairy. 17 | Once you have a dozen nodes, let alone a hundred, reading through stacks of JSON 18 | becomes very tedious. Marvel lets you explore the data interactively and 19 | makes it easy to zero in on what's going on with particular nodes or indices. 20 | 21 | Marvel uses the same stats APIs that are available to you--it does not expose 22 | any statistics that you can't access through the APIs. However, Marvel greatly 23 | simplifies the collection and visualization of those statistics. 24 | 25 | Marvel is free to use (even in production!), so you should definitely try it out! 26 | For installation instructions, see 27 | https://www.elastic.co/guide/en/marvel/current/getting-started.html[Getting Started with Marvel]. 28 | -------------------------------------------------------------------------------- /510_Deployment.asciidoc: -------------------------------------------------------------------------------- 1 | [[deploy]] 2 | == Production Deployment 3 | 4 | include::510_Deployment/10_intro.asciidoc[] 5 | 6 | include::510_Deployment/20_hardware.asciidoc[] 7 | 8 | include::510_Deployment/30_other.asciidoc[] 9 | 10 | include::510_Deployment/40_config.asciidoc[] 11 | 12 | include::510_Deployment/45_dont_touch.asciidoc[] 13 | 14 | include::510_Deployment/50_heap.asciidoc[] 15 | 16 | include::510_Deployment/60_file_descriptors.asciidoc[] 17 | 18 | include::510_Deployment/70_conclusion.asciidoc[] 19 | 20 | -------------------------------------------------------------------------------- /510_Deployment/10_intro.asciidoc: -------------------------------------------------------------------------------- 1 | If you have made it this far in the book, hopefully you've learned a thing or 2 | two about Elasticsearch and are ready to deploy your cluster to production. This 3 | chapter is not meant to be an exhaustive guide to running your cluster in 4 | production, but it covers the key things to consider before putting your cluster 5 | live. 6 | 7 | Three main areas are covered: 8 | 9 | - Logistical considerations, such as hardware recommendations and deployment 10 | strategies 11 | - Configuration changes that are more suited to a production environment 12 | - Post-deployment considerations, such as security, maximizing indexing 13 | performance, and backups 14 | -------------------------------------------------------------------------------- /510_Deployment/70_conclusion.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | === Revisit This List Before Production 3 | 4 | You are likely reading this section before you go into production. The details 5 | covered in this chapter are good to be generally aware of, but it is critical to 6 | revisit this entire list right before deploying to production. 7 | 8 | Some of the topics will simply stop you cold (such as too few available file 9 | descriptors). These are easy enough to debug because they are quickly apparent. 10 | Other issues, such as split brains and memory settings, are visible only after 11 | something bad happens. At that point, the resolution is often messy and tedious. 12 | 13 | It is much better to proactively prevent these situations from occurring by 14 | configuring your cluster appropriately _before_ disaster strikes. So if you are 15 | going to dog-ear (or bookmark) one section from the entire book, this chapter 16 | would be a good candidate. The week before deploying to production, simply flip 17 | through the list presented here and check off all the recommendations. 18 | -------------------------------------------------------------------------------- /510_Deployment/80_cluster_settings.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | === -------------------------------------------------------------------------------- /520_Post_Deployment.asciidoc: -------------------------------------------------------------------------------- 1 | [[post_deploy]] 2 | == Post-Deployment 3 | 4 | Once you have deployed your cluster in production, there are some tools and 5 | best practices to keep your cluster running in top shape. In this short 6 | chapter, we talk about configuring settings dynamically, tweaking 7 | logging levels, improving indexing performance, and backing up your cluster. 8 | 9 | include::520_Post_Deployment/10_dynamic_settings.asciidoc[] 10 | 11 | include::520_Post_Deployment/20_logging.asciidoc[] 12 | 13 | include::520_Post_Deployment/30_indexing_perf.asciidoc[] 14 | 15 | include::520_Post_Deployment/35_delayed_shard_allocation.asciidoc[] 16 | 17 | include::520_Post_Deployment/40_rolling_restart.asciidoc[] 18 | 19 | include::520_Post_Deployment/50_backup.asciidoc[] 20 | 21 | include::520_Post_Deployment/60_restore.asciidoc[] 22 | 23 | include::520_Post_Deployment/70_conclusion.asciidoc[] 24 | -------------------------------------------------------------------------------- /520_Post_Deployment/10_dynamic_settings.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | === Changing Settings Dynamically 3 | 4 | Many settings in Elasticsearch are dynamic and can be modified through the API. 5 | Configuration changes that force a node (or cluster) restart are strenuously 6 | avoided. And while it's possible to make the changes through the static configs, 7 | we recommend that you use the API instead. 8 | 9 | The `cluster-update` API operates((("Cluster Update API"))) in two modes: 10 | 11 | Transient:: 12 | These changes are in effect until the cluster restarts. Once a full cluster 13 | restart takes place, these settings are erased. 14 | 15 | Persistent:: 16 | These changes are permanently in place unless explicitly changed. They will 17 | survive full cluster restarts and override the static configuration files. 18 | 19 | Transient versus persistent settings are supplied in the JSON body: 20 | 21 | [source,js] 22 | ---- 23 | PUT /_cluster/settings 24 | { 25 | "persistent" : { 26 | "discovery.zen.minimum_master_nodes" : 2 <1> 27 | }, 28 | "transient" : { 29 | "indices.store.throttle.max_bytes_per_sec" : "50mb" <2> 30 | } 31 | } 32 | ---- 33 | <1> This persistent setting will survive full cluster restarts. 34 | <2> This transient setting will be removed after the first full cluster restart. 35 | 36 | A complete list of settings that can be updated dynamically can be found in the 37 | {ref}/cluster-update-settings.html[online reference docs]. 38 | -------------------------------------------------------------------------------- /520_Post_Deployment/70_conclusion.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | === Clusters Are Living, Breathing Creatures 3 | 4 | Once you get a cluster into production, you'll find that it takes on a life of its 5 | own. Elasticsearch works hard to make clusters self-sufficient and _just work_. 6 | But a cluster still requires routine care and feeding, such as routine backups 7 | and upgrades. 8 | 9 | Elasticsearch releases new versions with bug fixes and performance enhancements 10 | at a very fast pace, and it is always a good idea to keep your cluster current. 11 | Similarly, Lucene continues to find new and exciting bugs in the JVM itself, 12 | which means you should always try to keep your JVM up-to-date. 13 | 14 | This means it is a good idea to have a standardized, routine way to perform 15 | rolling restarts and upgrades in your cluster. Upgrading should be a routine 16 | process, rather than a once-yearly fiasco that requires countless hours of 17 | precise planning. 18 | 19 | Similarly, it is important to have disaster recovery plans in place. Take 20 | frequent snapshots of your cluster--and periodically _test_ those snapshots by 21 | performing a real recovery! It is all too common for organizations to make 22 | routine backups but never test their recovery strategy. Often you'll find a 23 | glaring deficiency the first time you perform a real recovery (such as users 24 | being unaware of which drive to mount). It's better to work these bugs out of 25 | your process with routine testing, rather than at 3 a.m. when there is a crisis. 26 | -------------------------------------------------------------------------------- /LICENSE.asciidoc: -------------------------------------------------------------------------------- 1 | This work is licensed under a Creative Commons Attribution-NonCommercial-NoDerivs 3.0 Unported License. 2 | 3 | See http://creativecommons.org/licenses/by-nc-nd/3.0/ for the full text of the License. 4 | 5 | -------------------------------------------------------------------------------- /atlas.json: -------------------------------------------------------------------------------- 1 | { 2 | "branch": "master", 3 | "files": [ 4 | "cover.html", 5 | "titlepage.html", 6 | "copyright.html", 7 | "toc.html", 8 | "foreword.asciidoc", 9 | "Preface.asciidoc", 10 | "00_Getting_started.asciidoc", 11 | "01_Search_in_depth.asciidoc", 12 | "02_Dealing_with_language.asciidoc", 13 | "03_Aggregations.asciidoc", 14 | "04_Geolocation.asciidoc", 15 | "06_Modeling_your_data.asciidoc", 16 | "07_Admin.asciidoc", 17 | "ix.html", 18 | "author_bio.html", 19 | "colo.html" 20 | ], 21 | "formats": { 22 | "pdf": { 23 | "version": "web", 24 | "index": true, 25 | "toc": true, 26 | "syntaxhighlighting": true, 27 | "show_comments": false, 28 | "antennahouse_version": "AHFormatterV62_64-MR4" 29 | }, 30 | "epub": { 31 | "index": false, 32 | "toc": true, 33 | "epubcheck": true, 34 | "syntaxhighlighting": false, 35 | "show_comments": false 36 | }, 37 | "mobi": { 38 | "index": false, 39 | "toc": true, 40 | "syntaxhighlighting": false, 41 | "show_comments": false 42 | }, 43 | "html": { 44 | "index": false, 45 | "toc": false, 46 | "syntaxhighlighting": false, 47 | "show_comments": false, 48 | "consolidated": true 49 | } 50 | }, 51 | "theme": "oreillymedia/animal_theme", 52 | "title": "Elasticsearch: The Definitive Guide" 53 | } -------------------------------------------------------------------------------- /author_bio.html: -------------------------------------------------------------------------------- 1 |
2 |

About the Authors

3 |

Clinton Gormley was the first user of Elasticsearch and wrote the Perl API back in 2010. When Elasticsearch formed a company in 2012, he joined as a developer and the maintainer of the Perl modules. Now Clinton spends a lot of his time designing the user interfaces and speaking and writing about Elasticsearch. He studied medicine at the University of Cape Town and lives in Barcelona.

4 |

Zachary Tong has been working with Elasticsearch since 2011. During that time, he has written a number of tutorials to help beginners start using Elasticsearch. Zach is now a developer at Elasticsearch and maintains the PHP client, gives trainings, and helps customers manage clusters in production. He studied biology at Rensselaer Polytechnic Institute and now lives in South Carolina.

5 |
6 | -------------------------------------------------------------------------------- /book-extra-title-page.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

4 | 5 | Clinton 6 | 7 | 8 | Gormley 9 | 10 |

11 |
12 |
13 |

14 | 15 | Zachary 16 | 17 | 18 | Tong 19 | 20 |

21 |
22 |
23 | 24 |
25 | 28 |
29 |
30 |
31 | 32 | 33 |

34 | This work is licensed under a 35 | 36 | Creative Commons Attribution-NonCommercial-NoDerivs 3.0 Unported License 37 | 38 | . 39 |

40 |
41 |
42 |
43 |
44 |

45 | 46 | Abstract 47 | 48 |

49 |

50 | If you would like to purchase an eBook or printed version of this book, you can do so from O'Reilly Media: 51 | 52 | Buy this book from O'Reilly Media 53 | 54 |

55 |
56 | -------------------------------------------------------------------------------- /book.asciidoc: -------------------------------------------------------------------------------- 1 | :title-separator: | 2 | :bookseries: animal 3 | :es_build: 1 4 | :ref: https://www.elastic.co/guide/en/elasticsearch/reference/2.4 5 | 6 | = Elasticsearch: The Definitive Guide 7 | 8 | include::foreword.asciidoc[] 9 | 10 | include::Preface.asciidoc[] 11 | 12 | include::00_Getting_started.asciidoc[] 13 | 14 | include::01_Search_in_depth.asciidoc[] 15 | 16 | include::02_Dealing_with_language.asciidoc[] 17 | 18 | include::03_Aggregations.asciidoc[] 19 | 20 | include::04_Geolocation.asciidoc[] 21 | 22 | include::06_Modeling_your_data.asciidoc[] 23 | 24 | include::07_Admin.asciidoc[] 25 | 26 | -------------------------------------------------------------------------------- /callouts/1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/1.pdf -------------------------------------------------------------------------------- /callouts/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/1.png -------------------------------------------------------------------------------- /callouts/10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/10.pdf -------------------------------------------------------------------------------- /callouts/10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/10.png -------------------------------------------------------------------------------- /callouts/11.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/11.pdf -------------------------------------------------------------------------------- /callouts/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/11.png -------------------------------------------------------------------------------- /callouts/2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/2.pdf -------------------------------------------------------------------------------- /callouts/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/2.png -------------------------------------------------------------------------------- /callouts/3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/3.pdf -------------------------------------------------------------------------------- /callouts/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/3.png -------------------------------------------------------------------------------- /callouts/4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/4.pdf -------------------------------------------------------------------------------- /callouts/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/4.png -------------------------------------------------------------------------------- /callouts/5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/5.pdf -------------------------------------------------------------------------------- /callouts/5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/5.png -------------------------------------------------------------------------------- /callouts/6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/6.pdf -------------------------------------------------------------------------------- /callouts/6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/6.png -------------------------------------------------------------------------------- /callouts/7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/7.pdf -------------------------------------------------------------------------------- /callouts/7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/7.png -------------------------------------------------------------------------------- /callouts/8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/8.pdf -------------------------------------------------------------------------------- /callouts/8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/8.png -------------------------------------------------------------------------------- /callouts/9.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/9.pdf -------------------------------------------------------------------------------- /callouts/9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/callouts/9.png -------------------------------------------------------------------------------- /colo.html: -------------------------------------------------------------------------------- 1 |
2 |

Colophon

3 | 4 |

The animal on the cover of Elasticsearch: The Definitive Guide is an Elegant Snail-eater (Dipsas Elegans). This snake is native to Ecuador, in the Pacific slopes of the Andes. As the name suggests, the diet of the elegant snail-eater consists primarily of snails and slugs, which they find by slowly navigating the forest floor or low-lying shrubs.

5 | 6 |

The male of this snake species range between 636 and 919 mm in length, while females range between 560 and 782 mm. The whole body includes various brown hues, with alternating dark and light vertical bars throughout.

7 | 8 |

The elegant snail-eater is non-venomous and very docile. They prefer moist surroundings during the daytime, such as under leaves or in rotting logs and come out to forage at night. They lay an average of seven eggs per clutch. The current, moist habitat in which these snakes thrive is becoming smaller due to human encroachment and destruction, which may lead to their extinction.

9 | 10 |

Many of the animals on O'Reilly covers are endangered; all of them are important to the world. To learn more about how you can help, go to animals.oreilly.com.

11 | 12 |

The cover image is from Johnson's Natural History. The cover fonts are URW Typewriter and Guardian Sans. The text font is Adobe Minion Pro; the heading font is Adobe Myriad Condensed; and the code font is Dalton Maag's Ubuntu Mono.

13 | 14 |
15 | -------------------------------------------------------------------------------- /cover.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | -------------------------------------------------------------------------------- /images/cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/cover.png -------------------------------------------------------------------------------- /images/elas_0201.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0201.png -------------------------------------------------------------------------------- /images/elas_0202.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0202.png -------------------------------------------------------------------------------- /images/elas_0203.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0203.png -------------------------------------------------------------------------------- /images/elas_0204.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0204.png -------------------------------------------------------------------------------- /images/elas_0205.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0205.png -------------------------------------------------------------------------------- /images/elas_0206.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0206.png -------------------------------------------------------------------------------- /images/elas_0301.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0301.png -------------------------------------------------------------------------------- /images/elas_0401.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0401.png -------------------------------------------------------------------------------- /images/elas_0402.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0402.png -------------------------------------------------------------------------------- /images/elas_0403.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0403.png -------------------------------------------------------------------------------- /images/elas_0404.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0404.png -------------------------------------------------------------------------------- /images/elas_0405.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0405.png -------------------------------------------------------------------------------- /images/elas_0406.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0406.png -------------------------------------------------------------------------------- /images/elas_0901.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0901.png -------------------------------------------------------------------------------- /images/elas_0902.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_0902.png -------------------------------------------------------------------------------- /images/elas_1101.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1101.png -------------------------------------------------------------------------------- /images/elas_1102.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1102.png -------------------------------------------------------------------------------- /images/elas_1103.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1103.png -------------------------------------------------------------------------------- /images/elas_1104.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1104.png -------------------------------------------------------------------------------- /images/elas_1105.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1105.png -------------------------------------------------------------------------------- /images/elas_1106.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1106.png -------------------------------------------------------------------------------- /images/elas_1107.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1107.png -------------------------------------------------------------------------------- /images/elas_1108.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1108.png -------------------------------------------------------------------------------- /images/elas_1109.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1109.png -------------------------------------------------------------------------------- /images/elas_1110.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1110.png -------------------------------------------------------------------------------- /images/elas_1111.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1111.png -------------------------------------------------------------------------------- /images/elas_1701.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1701.png -------------------------------------------------------------------------------- /images/elas_1702.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1702.png -------------------------------------------------------------------------------- /images/elas_1703.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1703.png -------------------------------------------------------------------------------- /images/elas_1704.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1704.png -------------------------------------------------------------------------------- /images/elas_1705.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1705.png -------------------------------------------------------------------------------- /images/elas_1706.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_1706.png -------------------------------------------------------------------------------- /images/elas_17in01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_17in01.png -------------------------------------------------------------------------------- /images/elas_17in02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_17in02.png -------------------------------------------------------------------------------- /images/elas_28in01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_28in01.png -------------------------------------------------------------------------------- /images/elas_28in02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_28in02.png -------------------------------------------------------------------------------- /images/elas_29in01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_29in01.png -------------------------------------------------------------------------------- /images/elas_29in02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_29in02.png -------------------------------------------------------------------------------- /images/elas_29in03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_29in03.png -------------------------------------------------------------------------------- /images/elas_33in01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_33in01.png -------------------------------------------------------------------------------- /images/elas_33in02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_33in02.png -------------------------------------------------------------------------------- /images/elas_4401.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_4401.png -------------------------------------------------------------------------------- /images/elas_4402.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_4402.png -------------------------------------------------------------------------------- /images/elas_4403.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_4403.png -------------------------------------------------------------------------------- /images/elas_4404.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/elasticsearch-definitive-guide/a10d667828ceb127d6b092bb3674f833dba98964/images/elas_4404.png -------------------------------------------------------------------------------- /ix.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | -------------------------------------------------------------------------------- /page_header.html: -------------------------------------------------------------------------------- 1 |

2 | WARNING: This documentation covers Elasticsearch 2.x. The 2.x 3 | versions of Elasticsearch have passed their 4 | EOL dates. If you are running 5 | a 2.x version, we strongly advise you to upgrade. 6 |

7 |

8 | This documentation is no longer maintained and may be removed. For the latest 9 | information, see the current 11 | Elasticsearch documentation. 12 |

13 | -------------------------------------------------------------------------------- /scripts/300_Aggregations/README.md: -------------------------------------------------------------------------------- 1 | This directory contains two scripts that can be used to generate a taxi example data set. They require Python 3 and for `import.py` you must also have the Elasticsearch Python client installed (`pip3 install elasticsearch`). 2 | 3 | Run `./generate.py 100 > documents.json` to generate 100 random taxi rides. You can import them into a local Elasticsearch cluster (5.x or 6.0) by running `./import.py mappings.json documents.json`. 4 | -------------------------------------------------------------------------------- /scripts/300_Aggregations/import.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import elasticsearch 4 | import elasticsearch.helpers 5 | import json 6 | import logging 7 | import sys 8 | import itertools 9 | 10 | logger = logging.getLogger("import") 11 | 12 | index_name = "taxis" 13 | type_name = "rides" 14 | 15 | 16 | def create_index(client, mapping_file): 17 | if client.indices.exists(index=index_name): 18 | logger.info("Index [%s] already exists. Deleting it." % index_name) 19 | client.indices.delete(index=index_name) 20 | logger.info("Creating index [%s]" % index_name) 21 | client.indices.create(index=index_name, body='{"index.number_of_replicas": 0}') 22 | with open(mapping_file, "rt") as f: 23 | mappings = f.read() 24 | client.indices.put_mapping(index=index_name, 25 | doc_type=type_name, 26 | body=json.loads(mappings)) 27 | 28 | 29 | def import_data(client, data_file): 30 | meta_data = '{"_op_type": "index", "_index": "%s", "_type": "%s"}' % (index_name, type_name) 31 | with open(data_file, "rt") as f: 32 | elasticsearch.helpers.bulk(client, f, index=index_name, doc_type=type_name) 33 | 34 | 35 | def main(): 36 | if len(sys.argv) != 3: 37 | print("usage %s mapping_file_path data_file_path" % sys.argv[0]) 38 | exit(1) 39 | 40 | es = elasticsearch.Elasticsearch() 41 | mapping_file = sys.argv[1] 42 | data_file = sys.argv[2] 43 | 44 | create_index(es, mapping_file) 45 | import_data(es, data_file) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /scripts/300_Aggregations/mappings.json: -------------------------------------------------------------------------------- 1 | { 2 | "rides": { 3 | "properties": { 4 | "vendor": { 5 | "type": "keyword" 6 | }, 7 | "pickup_datetime": { 8 | "type": "date", 9 | "format": "yyyy-MM-dd HH:mm:ss" 10 | }, 11 | "dropoff_datetime": { 12 | "type": "date", 13 | "format": "yyyy-MM-dd HH:mm:ss" 14 | }, 15 | "passenger_count": { 16 | "type": "integer" 17 | }, 18 | "trip_distance": { 19 | "scaling_factor": 100, 20 | "type": "scaled_float" 21 | }, 22 | "pickup_zone": { 23 | "type": "keyword" 24 | }, 25 | "dropoff_zone": { 26 | "type": "keyword" 27 | }, 28 | "payment_type": { 29 | "type": "keyword" 30 | }, 31 | "fare_amount": { 32 | "scaling_factor": 100, 33 | "type": "scaled_float" 34 | }, 35 | "tip_amount": { 36 | "scaling_factor": 100, 37 | "type": "scaled_float" 38 | }, 39 | "total_amount": { 40 | "scaling_factor": 100, 41 | "type": "scaled_float" 42 | } 43 | }, 44 | "dynamic": "strict" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /scripts/svg_to_png.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | use Path::Class; 6 | use Capture::Tiny qw(capture); 7 | my $inkscape = 'inkscape'; 8 | 9 | my $width = shift @ARGV or die "USAGE: $0 [width]\n"; 10 | 11 | my $src = dir('svg'); 12 | my $out = dir('images_temp'); 13 | 14 | $out->rmtree; 15 | $out->mkpath; 16 | 17 | while ( my $file = $src->next ) { 18 | next unless -f $file; 19 | my $name = $file->basename; 20 | if ( $name =~ s/\.svg$// ) { 21 | print "$name.svg -> $name.png\n"; 22 | my ( $stdout, $stderr, $exit ) = capture { 23 | system( $inkscape, '--export-png', $out->file("$name.png"), 24 | '-w', $width, $file ); 25 | }; 26 | if ($exit) { 27 | die "Couldn't convert $file: $stderr\n"; 28 | } 29 | } 30 | else { 31 | print "Skipping: $name\n"; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /snippets/010_Intro/10_Info.json: -------------------------------------------------------------------------------- 1 | # Check that Elasticsearch is running 2 | GET / -------------------------------------------------------------------------------- /snippets/010_Intro/15_Count.json: -------------------------------------------------------------------------------- 1 | # Count all documents in the cluster 2 | GET /_count 3 | { 4 | "query": { 5 | "match_all": {} 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /snippets/010_Intro/25_Index.json: -------------------------------------------------------------------------------- 1 | # Delete the `megacorp` index in case it already exists 2 | DELETE /megacorp 3 | 4 | # Index document 1, type "employee", in the 5 | # "megacorp" index 6 | PUT /megacorp/employee/1 7 | { 8 | "first_name" : "John", 9 | "last_name" : "Smith", 10 | "age" : 25, 11 | "about" : "I love to go rock climbing", 12 | "interests": [ "sports", "music" ] 13 | } 14 | 15 | # Index two more documents 16 | PUT /megacorp/employee/2 17 | { 18 | "first_name" : "Jane", 19 | "last_name" : "Smith", 20 | "age" : 32, 21 | "about" : "I like to collect rock albums", 22 | "interests": [ "music" ] 23 | } 24 | 25 | PUT /megacorp/employee/3 26 | { 27 | "first_name" : "Douglas", 28 | "last_name" : "Fir", 29 | "age" : 35, 30 | "about": "I like to build cabinets", 31 | "interests": [ "forestry" ] 32 | } 33 | -------------------------------------------------------------------------------- /snippets/010_Intro/30_Get.json: -------------------------------------------------------------------------------- 1 | # Delete the `megacorp` index in case it already exists 2 | DELETE /megacorp 3 | 4 | # Index an example document 5 | PUT /megacorp/employee/1 6 | { 7 | "first_name" : "John", 8 | "last_name" : "Smith", 9 | "age" : 25, 10 | "about" : "I love to go rock climbing", 11 | "interests": [ "sports", "music" ] 12 | } 13 | 14 | # Retrieve the document 15 | GET /megacorp/employee/1 16 | 17 | # Update the document, by PUTing it again 18 | PUT /megacorp/employee/1 19 | { 20 | "first_name" : "John", 21 | "last_name" : "Smith", 22 | "age" : 26, 23 | "about" : "I love to go rock climbing", 24 | "interests": [ "sports", "music", "movies" ] 25 | } 26 | 27 | # Retrieve the updated document 28 | GET /megacorp/employee/1 29 | 30 | # Delete the document 31 | DELETE /megacorp/employee/1 32 | 33 | # Note: HEAD/exists requests do not work in Sense 34 | # because they only return HTTP headers, not 35 | # a JSON body -------------------------------------------------------------------------------- /snippets/010_Intro/30_Simple_search.json: -------------------------------------------------------------------------------- 1 | # Delete the `megacorp` index in case it already exists 2 | DELETE /megacorp 3 | 4 | # Index our example documents 5 | PUT /megacorp/employee/1 6 | { 7 | "first_name" : "John", 8 | "last_name" : "Smith", 9 | "age" : 25, 10 | "about" : "I love to go rock climbing", 11 | "interests": [ "sports", "music" ] 12 | } 13 | 14 | PUT /megacorp/employee/2 15 | { 16 | "first_name" : "Jane", 17 | "last_name" : "Smith", 18 | "age" : 32, 19 | "about" : "I like to collect rock albums", 20 | "interests": [ "music" ] 21 | } 22 | 23 | PUT /megacorp/employee/3 24 | { 25 | "first_name" : "Douglas", 26 | "last_name" : "Fir", 27 | "age" : 35, 28 | "about": "I like to build cabinets", 29 | "interests": [ "forestry" ] 30 | } 31 | 32 | # Search for all employees in the megacorp index: 33 | GET /megacorp/employee/_search 34 | 35 | # Search for all employees in the megacorp index 36 | # who have "Smith" in the last_name field 37 | GET /megacorp/employee/_search?q=last_name:Smith 38 | 39 | # Same query as above, but using the Query DSL 40 | GET /megacorp/employee/_search 41 | { 42 | "query": { 43 | "match": { 44 | "last_name": "smith" 45 | } 46 | } 47 | } 48 | 49 | -------------------------------------------------------------------------------- /snippets/010_Intro/35_Aggregations.json: -------------------------------------------------------------------------------- 1 | # Delete the `megacorp` index in case it already exists 2 | DELETE /megacorp 3 | 4 | # Index our example documents 5 | PUT /megacorp/employee/1 6 | { 7 | "first_name" : "John", 8 | "last_name" : "Smith", 9 | "age" : 25, 10 | "about" : "I love to go rock climbing", 11 | "interests": [ "sports", "music" ] 12 | } 13 | 14 | PUT /megacorp/employee/2 15 | { 16 | "first_name" : "Jane", 17 | "last_name" : "Smith", 18 | "age" : 32, 19 | "about" : "I like to collect rock albums", 20 | "interests": [ "music" ] 21 | } 22 | 23 | PUT /megacorp/employee/3 24 | { 25 | "first_name" : "Douglas", 26 | "last_name" : "Fir", 27 | "age" : 35, 28 | "about": "I like to build cabinets", 29 | "interests": [ "forestry" ] 30 | } 31 | 32 | # Calculate the most popular interests for all employees 33 | GET /megacorp/employee/_search 34 | { 35 | "aggs": { 36 | "all_interests": { 37 | "terms": { 38 | "field": "interests.keyword" 39 | } 40 | } 41 | } 42 | } 43 | 44 | # Calculate the most popular interests for 45 | # employees named "Smith" 46 | GET /megacorp/employee/_search 47 | { 48 | "query": { 49 | "match": { 50 | "last_name": "smith" 51 | } 52 | }, 53 | "aggs": { 54 | "all_interests": { 55 | "terms": { 56 | "field": "interests.keyword" 57 | } 58 | } 59 | } 60 | } 61 | 62 | # Calculate the average age of employee per interest 63 | GET /megacorp/employee/_search 64 | { 65 | "aggs" : { 66 | "all_interests" : { 67 | "terms" : { 68 | "field" : "interests.keyword" 69 | }, 70 | "aggs" : { 71 | "avg_age" : { 72 | "avg" : { "field" : "age" } 73 | } 74 | } 75 | } 76 | } 77 | } -------------------------------------------------------------------------------- /snippets/020_Distributed_Cluster/10_Cluster_health.json: -------------------------------------------------------------------------------- 1 | # Delete ALL DATA IN THE CLUSTER! 2 | DELETE /_all 3 | 4 | # Retrieve the cluster health 5 | GET /_cluster/health 6 | -------------------------------------------------------------------------------- /snippets/020_Distributed_Cluster/15_Add_index.json: -------------------------------------------------------------------------------- 1 | # Delete ALL DATA IN THE CLUSTER! 2 | DELETE /_all 3 | 4 | # Create an index with 3 primary shards with 1 replica each 5 | PUT /blogs 6 | { 7 | "settings" : { 8 | "number_of_shards" : 3, 9 | "number_of_replicas" : 1 10 | } 11 | } 12 | 13 | # Retrieve the cluster health 14 | GET /_cluster/health 15 | -------------------------------------------------------------------------------- /snippets/020_Distributed_Cluster/30_Replicas.json: -------------------------------------------------------------------------------- 1 | # Delete ALL DATA IN THE CLUSTER! 2 | DELETE /_all 3 | 4 | # Create an index with 3 primary shards with 1 replica each 5 | PUT /blogs 6 | { 7 | "settings" : { 8 | "number_of_shards" : 3, 9 | "number_of_replicas" : 1 10 | } 11 | } 12 | 13 | # Increae number of replicas to 2 14 | PUT /blogs/_settings 15 | { 16 | "number_of_replicas" : 2 17 | } 18 | 19 | # Retrieve the cluster health 20 | GET /_cluster/health 21 | -------------------------------------------------------------------------------- /snippets/030_Data/10_Create_doc_123.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Create a document with ID 123 5 | PUT /website/blog/123 6 | { 7 | "title": "My first blog entry", 8 | "text": "Just trying this out...", 9 | "date": "2014/01/01" 10 | } 11 | -------------------------------------------------------------------------------- /snippets/030_Data/10_Create_doc_auto_ID.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Create a document with an auto-generated ID 5 | POST /website/blog/ 6 | { 7 | "title": "My second blog entry", 8 | "text": "Still trying this out...", 9 | "date": "2014/01/01" 10 | } 11 | -------------------------------------------------------------------------------- /snippets/030_Data/15_Get_document.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Create a document with ID 123 5 | PUT /website/blog/123 6 | { 7 | "title": "My first blog entry", 8 | "text": "Just trying this out...", 9 | "date": "2014/01/01" 10 | } 11 | 12 | # Retrieve document with ID 123 13 | GET /website/blog/123 14 | 15 | # Retrieve non-existent document 16 | GET /website/blog/124 17 | 18 | # Retrieve part of a document 19 | GET /website/blog/123?_source=title,text 20 | 21 | # Retrieve just the _source field 22 | GET /website/blog/123/_source 23 | -------------------------------------------------------------------------------- /snippets/030_Data/25_Reindex_doc.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Create a document with ID 123 5 | PUT /website/blog/123 6 | { 7 | "title": "My first blog entry", 8 | "text": "Just trying this out...", 9 | "date": "2014/01/01" 10 | } 11 | 12 | # Reindex a document 13 | PUT /website/blog/123 14 | { 15 | "title": "My first blog entry", 16 | "text": "I am starting to get the hang of this...", 17 | "date": "2014/01/02" 18 | } 19 | 20 | # Retrieve the document 21 | GET /website/blog/123 22 | -------------------------------------------------------------------------------- /snippets/030_Data/30_Create_doc.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Create a document with ID 123 if the doc doesn't already exist 5 | PUT /website/blog/123?op_type=create 6 | { 7 | "title": "My first blog entry", 8 | "text": "Just trying this out...", 9 | "date": "2014/01/01" 10 | } 11 | 12 | # Conflict when trying to create a document which already exists 13 | PUT /website/blog/123?op_type=create 14 | { 15 | "title": "My first blog entry", 16 | "text": "I am starting to get the hang of this...", 17 | "date": "2014/01/02" 18 | } 19 | 20 | -------------------------------------------------------------------------------- /snippets/030_Data/35_Delete_doc.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Create a document 5 | PUT /website/blog/123 6 | { 7 | "title": "My first blog entry", 8 | "text": "I am starting to get the hang of this...", 9 | "date": "2014/01/02" 10 | } 11 | 12 | # Delete an existing document 13 | DELETE /website/blog/123 14 | 15 | # Delete an non-existent document 16 | DELETE /website/blog/123 17 | 18 | -------------------------------------------------------------------------------- /snippets/030_Data/40_Concurrency.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Create a new document 5 | PUT /website/blog/1/_create 6 | { 7 | "title": "My first blog entry", 8 | "text": "Just trying this out..." 9 | } 10 | 11 | # Retrieve the document 12 | GET /website/blog/1 13 | 14 | # Reindex the document only if it is still version 1 15 | PUT /website/blog/1?version=1 16 | { 17 | "title": "My first blog entry", 18 | "text": "Starting to get the hang of this..." 19 | } 20 | 21 | # Fail with a conflict error because the document is now version 2 22 | PUT /website/blog/1?version=1 23 | { 24 | "title": "My first blog entry", 25 | "text": "Starting to get the hang of this..." 26 | } 27 | 28 | -------------------------------------------------------------------------------- /snippets/030_Data/40_External_versions.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Set an external version number 5 | PUT /website/blog/2?version=5&version_type=external 6 | { 7 | "title": "My first external blog entry", 8 | "text": "Starting to get the hang of this..." 9 | } 10 | 11 | # Reindex the document with a new external version 12 | PUT /website/blog/2?version=10&version_type=external 13 | { 14 | "title": "My first external blog entry", 15 | "text": "This is a piece of cake..." 16 | } 17 | 18 | # Fail with a conflict error with an external version number 19 | PUT /website/blog/2?version=10&version_type=external 20 | { 21 | "title": "My first external blog entry", 22 | "text": "This is a piece of cake..." 23 | } 24 | -------------------------------------------------------------------------------- /snippets/030_Data/45_Partial_update.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Index a document 5 | PUT /website/blog/1 6 | { 7 | "title": "My first blog entry", 8 | "text": "I am starting to get the hang of this..." 9 | } 10 | 11 | # Partial update with doc 12 | POST /website/blog/1/_update 13 | { 14 | "doc" : { 15 | "tags" : [ "testing" ], 16 | "views": 0 17 | } 18 | } 19 | 20 | # Retrieve the updated doc 21 | GET /website/blog/1 22 | 23 | # Partial update with a script 24 | POST /website/blog/1/_update 25 | { 26 | "script" : "ctx._source.views+=1" 27 | } 28 | 29 | # Retrieve the updated doc 30 | GET /website/blog/1 31 | 32 | # Partial update with a script and params 33 | POST /website/blog/1/_update 34 | { 35 | "script" : "ctx._source.tags+=new_tag", 36 | "params" : { 37 | "new_tag" : "search" 38 | } 39 | } 40 | 41 | # Retrieve the updated doc 42 | GET /website/blog/1 43 | 44 | # Delete the document with the update API 45 | POST /website/blog/1/_update 46 | { 47 | "script" : "ctx.op = ctx._source.views == count ? 'delete' : 'none'", 48 | "params" : { 49 | "count": 1 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /snippets/030_Data/45_Upsert.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Insert the `upsert` doc because the doc doesn't exist 5 | POST /website/pageviews/1/_update 6 | { 7 | "script" : "ctx._source.views+=1", 8 | "upsert": { 9 | "views": 1 10 | } 11 | } 12 | 13 | # Retrieve the doc 14 | GET /website/pageviews/1 15 | 16 | # Then run the script 17 | POST /website/pageviews/1/_update 18 | { 19 | "script" : "ctx._source.views+=1", 20 | "upsert": { 21 | "views": 1 22 | } 23 | } 24 | 25 | # Retrieve the doc 26 | GET /website/pageviews/1 27 | 28 | # Retry the update request up to 5 times 29 | POST /website/pageviews/1/_update?retry_on_conflict=5 30 | { 31 | "script" : "ctx._source.views+=1", 32 | "upsert": { 33 | "views": 1 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /snippets/030_Data/50_Mget.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Insert two documents 5 | PUT /website/blog/2 6 | { 7 | "title": "My first external blog entry", 8 | "text": "This is a piece of cake..." 9 | } 10 | 11 | PUT /website/pageviews/1 12 | { 13 | "views": 2 14 | } 15 | 16 | # Retrieve multiple documents 17 | GET /_mget 18 | { 19 | "docs" : [ 20 | { 21 | "_index" : "website", 22 | "_type" : "blog", 23 | "_id" : 2 24 | }, 25 | { 26 | "_index" : "website", 27 | "_type" : "pageviews", 28 | "_id" : 1, 29 | "_source": "views" 30 | } 31 | ] 32 | } 33 | 34 | # Retrieve multiple docs, with a default index and type 35 | GET /website/blog/_mget 36 | { 37 | "docs" : [ 38 | { "_id" : 2 }, 39 | { "_type" : "pageviews", "_id" : 1 } 40 | ] 41 | } 42 | 43 | # Retrieve multiple docs with a default index and type, short form 44 | GET /website/blog/_mget 45 | { 46 | "ids" : [ "2", "1" ] 47 | } 48 | 49 | -------------------------------------------------------------------------------- /snippets/030_Data/55_Bulk.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Create a document 5 | PUT /website/blog/123 6 | { 7 | "title": "My first blog entry", 8 | "text": "I am starting to get the hang of this...", 9 | "date": "2014/01/02" 10 | } 11 | 12 | # Create, index, update and delete multiple documents 13 | POST /_bulk 14 | { "delete": { "_index": "website", "_type": "blog", "_id": "123" }} 15 | { "create": { "_index": "website", "_type": "blog", "_id": "123" }} 16 | { "title": "My first blog post" } 17 | { "index": { "_index": "website", "_type": "blog" }} 18 | { "title": "My second blog post" } 19 | { "update": { "_index": "website", "_type": "blog", "_id": "123", "_retry_on_conflict" : 3} } 20 | { "doc" : {"title" : "My updated blog post"} } 21 | 22 | -------------------------------------------------------------------------------- /snippets/030_Data/55_Bulk_defaults.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Bulk API with default index 5 | POST /website/_bulk 6 | { "index": { "_type": "log" }} 7 | { "event": "User logged in" } 8 | 9 | 10 | # Bulk API with default index and type 11 | POST /website/log/_bulk 12 | { "index": {}} 13 | { "event": "User logged in" } 14 | { "index": { "_type": "blog" }} 15 | { "title": "Overriding the default type" } 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /snippets/030_Data/55_Bulk_independent.json: -------------------------------------------------------------------------------- 1 | # Delete all data in the `website` index 2 | DELETE /website 3 | 4 | # Create a document 5 | PUT /website/blog/123 6 | { 7 | "title": "My first blog entry", 8 | "text": "I am starting to get the hang of this...", 9 | "date": "2014/01/02" 10 | } 11 | 12 | # Bulk API with errors 13 | POST /_bulk 14 | { "create": { "_index": "website", "_type": "blog", "_id": "123" }} 15 | { "title": "Cannot create - it already exists" } 16 | { "index": { "_index": "website", "_type": "blog", "_id": "123" }} 17 | { "title": "But we can update it" } 18 | 19 | -------------------------------------------------------------------------------- /snippets/050_Search/05_Empty_search.json: -------------------------------------------------------------------------------- 1 | # First load the test data 2 | # https://gist.github.com/clintongormley/8579281 3 | 4 | # Empty search 5 | GET /_search 6 | -------------------------------------------------------------------------------- /snippets/050_Search/15_Pagination.json: -------------------------------------------------------------------------------- 1 | # First load the test data 2 | # https://gist.github.com/clintongormley/8579281 3 | 4 | # Results 0..4 5 | GET /_search?size=5 6 | 7 | # Results 5..9 8 | GET /_search?size=5&from=5 9 | 10 | # Results 10..14 11 | GET /_search?size=5&from=10 12 | -------------------------------------------------------------------------------- /snippets/050_Search/20_All_field.json: -------------------------------------------------------------------------------- 1 | # First load the test data 2 | # https://gist.github.com/clintongormley/8579281 3 | 4 | # Find `mary` in the `_all` field 5 | GET /_search?q=mary 6 | 7 | # Find documents where: 8 | # `name` must contain `mary` or `john` 9 | # and `date` must be greater than `2014-09-10` 10 | # and `_all` must contain `aggregations` or `geo` 11 | GET /_search?q=%2Bname%3A(mary+john)+%2Bdate%3A%3E2014-09-10+%2B(aggregations+geo) -------------------------------------------------------------------------------- /snippets/050_Search/20_Query_string.json: -------------------------------------------------------------------------------- 1 | # First load the test data 2 | # https://gist.github.com/clintongormley/8579281 3 | 4 | # Find `elasticsearch` in the `tweet` field 5 | GET /_all/tweet/_search?q=tweet:elasticsearch 6 | 7 | # Find `john` in the `name` field 8 | # and `mary` in the `tweet` field 9 | GET /_all/tweet/_search?q=%2Bname%3Ajohn+%2Btweet%3Amary 10 | 11 | -------------------------------------------------------------------------------- /snippets/052_Mapping_Analysis/25_Data_type_differences.json: -------------------------------------------------------------------------------- 1 | # First load the test data 2 | # https://gist.github.com/clintongormley/8579281 3 | 4 | # Find `2014` in the `_all` field: 12 results 5 | GET /_search?q=2014 6 | 7 | # Find `2014-09-15` in the `_all` field: 12 results 8 | GET /_search?q=2014-09-15 9 | 10 | # Find `2014-09-15` in the `date` field: 1 result 11 | GET /_search?q=date:2014-09-15 12 | 13 | # Find `2014` in the `date` field: 0 results 14 | GET /_search?q=date:2014 15 | 16 | # Retrieve the mapping for index:gb, type: tweet 17 | GET /gb/_mapping/tweet 18 | -------------------------------------------------------------------------------- /snippets/052_Mapping_Analysis/40_Analyze.json: -------------------------------------------------------------------------------- 1 | # Analyze the `text` with the `standard` analyzer 2 | GET /_analyze 3 | { 4 | "analyzer": "standard", 5 | "text": "Text to analyze" 6 | } 7 | -------------------------------------------------------------------------------- /snippets/052_Mapping_Analysis/45_Mapping.json: -------------------------------------------------------------------------------- 1 | # Delete the `gb` index 2 | DELETE /gb 3 | 4 | # Specify mappings when creating the index 5 | PUT /gb 6 | { 7 | "mappings": { 8 | "tweet" : { 9 | "properties" : { 10 | "tweet" : { 11 | "type" : "string", 12 | "analyzer": "english" 13 | }, 14 | "date" : { 15 | "type" : "date" 16 | }, 17 | "name" : { 18 | "type" : "string" 19 | }, 20 | "user_id" : { 21 | "type" : "long" 22 | } 23 | } 24 | } 25 | } 26 | } 27 | 28 | # Update mappings on an existing index 29 | PUT /gb/_mapping/tweet 30 | { 31 | "properties" : { 32 | "tag" : { 33 | "type" : "string", 34 | "index": "not_analyzed" 35 | } 36 | } 37 | } 38 | 39 | # Check new mapping 40 | GET /gb/_mapping/tweet 41 | 42 | # Test the analyzer for the `tweet` field 43 | GET /gb/_analyze 44 | { 45 | "field": "tweet", 46 | "text": "Black-cats" 47 | } 48 | 49 | # Test the analyzer for the `tag` field 50 | GET /gb/_analyze 51 | { 52 | "field": "tag", 53 | "text": "Black-cats" 54 | } 55 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/60_Bool_query.json: -------------------------------------------------------------------------------- 1 | # First load the test data 2 | # https://gist.github.com/clintongormley/8579281 3 | 4 | # Bool query 5 | GET /_search 6 | { 7 | "query": { 8 | "bool": { 9 | "must": { 10 | "match": { 11 | "tweet": "elasticsearch" 12 | } 13 | }, 14 | "must_not": { 15 | "match": { 16 | "name": "mary" 17 | } 18 | }, 19 | "should": { 20 | "match": { 21 | "tweet": "full text" 22 | } 23 | }, 24 | "filter": { 25 | "range": { 26 | "age" : { 27 | "gt" : 30 28 | } 29 | } 30 | } 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/60_Empty_query.json: -------------------------------------------------------------------------------- 1 | # First load the test data 2 | # https://gist.github.com/clintongormley/8579281 3 | 4 | # Empty query 5 | GET /_search 6 | {} 7 | 8 | # Match all query 9 | GET /_search 10 | { 11 | "query": { 12 | "match_all": {} 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/60_Match_query.json: -------------------------------------------------------------------------------- 1 | # Match query 2 | GET /_search 3 | { 4 | "query": { 5 | "match": { 6 | "tweet": "elasticsearch" 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/70_Bool_filter.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some examples 5 | PUT /test/test/1 6 | { 7 | "folder": "inbox", 8 | "tag": [ 9 | "work" 10 | ], 11 | "starred": true, 12 | "unread": false 13 | } 14 | 15 | PUT /test/test/2 16 | { 17 | "folder": "inbox", 18 | "tag": [ 19 | "spam" 20 | ], 21 | "starred": true, 22 | "unread": false 23 | } 24 | 25 | PUT /test/test/3 26 | { 27 | "folder": "inbox", 28 | "tag": [ 29 | "other" 30 | ], 31 | "starred": false, 32 | "unread": true 33 | } 34 | 35 | 36 | 37 | # Where `folder` is "inbox" 38 | # and `tag` is not spam 39 | # and either `starred` or `unread` is true 40 | GET /test/test/_search 41 | { 42 | "query": { 43 | "bool": { 44 | "filter": { 45 | "bool": { 46 | "must": { 47 | "term": { 48 | "folder": "inbox" 49 | } 50 | }, 51 | "must_not": { 52 | "term": { 53 | "tag": "spam" 54 | } 55 | }, 56 | "should": [ 57 | { 58 | "term": { 59 | "starred": true 60 | } 61 | }, 62 | { 63 | "term": { 64 | "unread": true 65 | } 66 | } 67 | ] 68 | } 69 | } 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/70_Exists_filter.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some examples 5 | PUT /test/test/1 6 | { 7 | "title": "About search", 8 | "age": 26, 9 | "date": "2014-09-01", 10 | "tag": [ 11 | "full_text", 12 | "search" 13 | ], 14 | "public": false 15 | } 16 | 17 | PUT /test/test/2 18 | { 19 | "age": 38, 20 | "date": "2014-09-02", 21 | "tag": [ 22 | "full_text", 23 | "nosql" 24 | ], 25 | "public": true 26 | } 27 | 28 | 29 | # Where `title` field exists 30 | GET /test/test/_search 31 | { 32 | "query": { 33 | "bool": { 34 | "filter": { 35 | "exists": { 36 | "field": "title" 37 | } 38 | } 39 | } 40 | } 41 | } 42 | 43 | # Where `title` field is missing 44 | GET /test/test/_search 45 | { 46 | "query": { 47 | "bool": { 48 | "filter": { 49 | "missing": { 50 | "field": "title" 51 | } 52 | } 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/70_Match_all_query.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some examples 5 | PUT /test/test/1 6 | { 7 | "title": "About search", 8 | "age": 26, 9 | "date": "2014-09-01", 10 | "tag": [ 11 | "full_text", 12 | "search" 13 | ], 14 | "public": false 15 | } 16 | 17 | PUT /test/test/2 18 | { 19 | "age": 38, 20 | "date": "2014-09-02", 21 | "tag": [ 22 | "full_text", 23 | "nosql" 24 | ], 25 | "public": true 26 | } 27 | 28 | 29 | # Match all documents 30 | GET /test/test/_search 31 | { 32 | "query": { 33 | "match_all": {} 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/70_Match_query.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Make the `tag` field an exact value field 5 | PUT /test 6 | { 7 | "mappings": { 8 | "test": { 9 | "properties":{ 10 | "tag": { 11 | "type": "string", 12 | "index": "not_analyzed" 13 | } 14 | } 15 | } 16 | } 17 | } 18 | 19 | 20 | # Insert some examples 21 | PUT /test/test/1 22 | { 23 | "title": "About search", 24 | "age": 26, 25 | "date": "2014-09-01", 26 | "tag": [ 27 | "full_text", 28 | "search" 29 | ], 30 | "public": false 31 | } 32 | 33 | PUT /test/test/2 34 | { 35 | "age": 38, 36 | "date": "2014-09-02", 37 | "tag": [ 38 | "full_text", 39 | "nosql" 40 | ], 41 | "public": true 42 | } 43 | 44 | # Where `title` includes "about" or "search" 45 | GET /test/test/_search 46 | { 47 | "query": { 48 | "match": { 49 | "title": "About Search!" 50 | } 51 | } 52 | } 53 | 54 | 55 | # Where `age` is 26 56 | GET /test/test/_search 57 | { 58 | "query": { 59 | "match": { 60 | "age": 26 61 | } 62 | } 63 | } 64 | 65 | # Where `date` is "2014-09-01" 66 | GET /test/test/_search 67 | { 68 | "query": { 69 | "match": { 70 | "date": "2014-09-01" 71 | } 72 | } 73 | } 74 | 75 | # Where `public` is true 76 | GET /test/test/_search 77 | { 78 | "query": { 79 | "match": { 80 | "public": true 81 | } 82 | } 83 | } 84 | 85 | # Where `tag` is "full_text" 86 | GET /test/test/_search 87 | { 88 | "query": { 89 | "match": { 90 | "tag": "full_text" 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/70_Multi_match_query.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some example docs 5 | PUT /test/test/1 6 | { 7 | "title": "Full text search is awesome", 8 | "body": "Let's talk about information retrieval" 9 | } 10 | 11 | PUT /test/test/2 12 | { 13 | "title": "Information retrieval", 14 | "body": "Let's talk about full text search" 15 | } 16 | 17 | # Match "full text search" in the `title` or `body` 18 | GET /_search 19 | { 20 | "query": { 21 | "multi_match": { 22 | "query": "full text search", 23 | "fields": [ 24 | "title", 25 | "body" 26 | ] 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/70_Range_filter.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some examples 5 | PUT /test/test/1 6 | { 7 | "title": "About search", 8 | "age": 26, 9 | "date": "2014-09-01", 10 | "tag": [ 11 | "full_text", 12 | "search" 13 | ], 14 | "public": false 15 | } 16 | 17 | PUT /test/test/2 18 | { 19 | "age": 38, 20 | "date": "2014-09-02", 21 | "tag": [ 22 | "full_text", 23 | "nosql" 24 | ], 25 | "public": true 26 | } 27 | 28 | 29 | # Where `age` >= 20 and < 30 30 | GET /test/test/_search 31 | { 32 | "query": { 33 | "filtered": { 34 | "filter": { 35 | "range": { 36 | "age": { 37 | "gte": 20, 38 | "lt": 30 39 | } 40 | } 41 | } 42 | } 43 | } 44 | } 45 | 46 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/70_Term_filter.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some examples 5 | PUT /test/test/1 6 | { 7 | "title": "About search", 8 | "age": 26, 9 | "date": "2014-09-01", 10 | "tag": [ 11 | "full_text", 12 | "search" 13 | ], 14 | "public": false 15 | } 16 | 17 | PUT /test/test/2 18 | { 19 | "age": 38, 20 | "date": "2014-09-02", 21 | "tag": [ 22 | "full_text", 23 | "nosql" 24 | ], 25 | "public": true 26 | } 27 | 28 | 29 | # Where `age` is 26 30 | GET /test/test/_search 31 | { 32 | "query": { 33 | "filtered": { 34 | "filter": { 35 | "term": { 36 | "age": 26 37 | } 38 | } 39 | } 40 | } 41 | } 42 | 43 | # Where `date` is "2014-09-01" 44 | GET /test/test/_search 45 | { 46 | "query": { 47 | "filtered": { 48 | "filter": { 49 | "term": { 50 | "date": "2014-09-01" 51 | } 52 | } 53 | } 54 | } 55 | } 56 | 57 | # Where `public` is true 58 | GET /test/test/_search 59 | { 60 | "query": { 61 | "filtered": { 62 | "filter": { 63 | "term": { 64 | "public": true 65 | } 66 | } 67 | } 68 | } 69 | } 70 | 71 | # Where `tag` is "full_text" 72 | GET /test/test/_search 73 | { 74 | "query": { 75 | "filtered": { 76 | "filter": { 77 | "term": { 78 | "tag": "full_text" 79 | } 80 | } 81 | } 82 | } 83 | } 84 | 85 | 86 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/70_Terms_filter.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some examples 5 | PUT /test/test/1 6 | { 7 | "title": "About search", 8 | "age": 26, 9 | "date": "2014-09-01", 10 | "tag": [ 11 | "full_text", 12 | "search" 13 | ], 14 | "public": false 15 | } 16 | 17 | PUT /test/test/2 18 | { 19 | "age": 38, 20 | "date": "2014-09-02", 21 | "tag": [ 22 | "full_text", 23 | "nosql" 24 | ], 25 | "public": true 26 | } 27 | 28 | 29 | # Where `tag` contains: 30 | # "search", "full_text" or "nosql" 31 | GET /test/test/_search 32 | { 33 | "query": { 34 | "bool": { 35 | "filter": { 36 | "terms": { 37 | "tag": [ 38 | "search", 39 | "full_text", 40 | "nosql" 41 | ] 42 | } 43 | } 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /snippets/054_Query_DSL/75_Filtered_query.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some example docs 5 | PUT /test/test/1 6 | { 7 | "folder": "inbox", 8 | "email": "Big opportunity" 9 | } 10 | 11 | PUT /test/test/2 12 | { 13 | "folder": "spam", 14 | "email": "Business opportunity" 15 | } 16 | 17 | PUT /test/test/3 18 | { 19 | "folder": "inbox", 20 | "email": "Urgent business proposal" 21 | } 22 | 23 | # Where `email` contains "business" or "opportunity 24 | # and `folder` is "inbox" 25 | GET /test/test/_search 26 | { 27 | "query": { 28 | "filtered": { 29 | "query": { 30 | "match": { 31 | "email": "business opportunity" 32 | } 33 | }, 34 | "filter": { 35 | "term": { 36 | "folder": "inbox" 37 | } 38 | } 39 | } 40 | } 41 | } 42 | 43 | # Where `folder` is "inbox" 44 | GET /test/test/_search 45 | { 46 | "query": { 47 | "filtered": { 48 | "filter": { 49 | "term": { 50 | "folder": "inbox" 51 | } 52 | } 53 | } 54 | } 55 | } 56 | 57 | # Where `folder` is "inbox" 58 | # and `email` must not contain "urgent", "business" 59 | # or "proposal" 60 | GET /test/test/_search 61 | { 62 | "query": { 63 | "filtered": { 64 | "filter": { 65 | "bool": { 66 | "must": { 67 | "term": { 68 | "folder": "inbox" 69 | } 70 | }, 71 | "must_not": { 72 | "query": { 73 | "match": { 74 | "email": "urgent business proposal" 75 | } 76 | } 77 | } 78 | } 79 | } 80 | } 81 | } 82 | } -------------------------------------------------------------------------------- /snippets/054_Query_DSL/80_Understanding_queries.json: -------------------------------------------------------------------------------- 1 | # Delete the `gb` and `us` indices 2 | DELETE /gb,us 3 | 4 | # Set the `tweet` field in the `us` index to 5 | # use the "standard" analyzer (default) 6 | PUT /us 7 | { 8 | "mappings": { 9 | "tweet": { 10 | "properties": { 11 | "tweet": { 12 | "type": "string" 13 | } 14 | } 15 | } 16 | } 17 | } 18 | 19 | # Set the `tweet` field in the `gb` index to 20 | # use the "english" analyzer 21 | PUT /gb 22 | { 23 | "mappings": { 24 | "tweet": { 25 | "properties": { 26 | "tweet": { 27 | "type": "string", 28 | "analyzer": "english" 29 | } 30 | } 31 | } 32 | } 33 | } 34 | 35 | # Get the explanation of a query 36 | GET /us,gb/_validate/query?explain 37 | { 38 | "query": { 39 | "match": { 40 | "tweet": "really powerful" 41 | } 42 | } 43 | } -------------------------------------------------------------------------------- /snippets/054_Query_DSL/80_Validate_query.json: -------------------------------------------------------------------------------- 1 | # Delete the `gb` index 2 | DELETE /gb 3 | 4 | # Insert an example doc 5 | PUT /gb/tweet/5 6 | { 7 | "date" : "2014-09-15", 8 | "name" : "Mary Jones", 9 | "tweet" : "However did I manage before Elasticsearch?", 10 | "user_id" : 2 11 | } 12 | 13 | # Validate query 14 | GET /gb/tweet/_validate/query 15 | { 16 | "query": { 17 | "tweet": { 18 | "match": "really powerful" 19 | } 20 | } 21 | } 22 | 23 | # Validate query with explanation 24 | GET /gb/tweet/_validate/query?explain 25 | { 26 | "query": { 27 | "tweet": { 28 | "match": "really powerful" 29 | } 30 | } 31 | } -------------------------------------------------------------------------------- /snippets/056_Sorting/85_Multilevel_sort.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some example docs 5 | PUT /test/tweet/1 6 | { 7 | "date" : "2014-09-13", 8 | "name" : "Mary Jones", 9 | "tweet" : "Elasticsearch means full text search has never been so easy", 10 | "user_id" : 2 11 | } 12 | 13 | PUT /test/tweet/2 14 | { 15 | "date" : "2014-09-14", 16 | "name" : "John Smith", 17 | "tweet" : "@mary it is not just text, it does everything", 18 | "user_id" : 1 19 | } 20 | 21 | PUT /test/tweet/3 22 | { 23 | "date" : "2014-09-15", 24 | "name" : "Mary Jones", 25 | "tweet" : "However did I manage before Elasticsearch?", 26 | "user_id" : 2 27 | } 28 | 29 | PUT /test/tweet/4 30 | { 31 | "date" : "2014-09-16", 32 | "name" : "John Smith", 33 | "tweet" : "The Elasticsearch API is really easy to use", 34 | "user_id" : 1 35 | } 36 | 37 | # Return all docs for user 1 sorted by `date` 38 | GET /test/_search 39 | { 40 | "query": { 41 | "bool": { 42 | "must": { 43 | "match": { 44 | "tweet": "full text search" 45 | } 46 | }, 47 | "filter": { 48 | "term": { 49 | "user_id": 2 50 | } 51 | } 52 | } 53 | }, 54 | "sort": [ 55 | { 56 | "date": { 57 | "order": "desc" 58 | } 59 | }, 60 | { 61 | "_score": { 62 | "order": "desc" 63 | } 64 | } 65 | ] 66 | } 67 | -------------------------------------------------------------------------------- /snippets/056_Sorting/85_Sort_by_date.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some example docs 5 | PUT /test/tweet/1 6 | { 7 | "date" : "2014-09-13", 8 | "name" : "Mary Jones", 9 | "tweet" : "Elasticsearch means full text search has never been so easy", 10 | "user_id" : 2 11 | } 12 | 13 | PUT /test/tweet/2 14 | { 15 | "date" : "2014-09-14", 16 | "name" : "John Smith", 17 | "tweet" : "@mary it is not just text, it does everything", 18 | "user_id" : 1 19 | } 20 | 21 | PUT /test/tweet/3 22 | { 23 | "date" : "2014-09-15", 24 | "name" : "Mary Jones", 25 | "tweet" : "However did I manage before Elasticsearch?", 26 | "user_id" : 2 27 | } 28 | 29 | PUT /test/tweet/4 30 | { 31 | "date" : "2014-09-16", 32 | "name" : "John Smith", 33 | "tweet" : "The Elasticsearch API is really easy to use", 34 | "user_id" : 1 35 | } 36 | 37 | # Return all docs for user 1 sorted by `date` 38 | GET /test/_search 39 | { 40 | "query": { 41 | "bool": { 42 | "filter": { 43 | "term": { 44 | "user_id": 1 45 | } 46 | } 47 | } 48 | }, 49 | "sort": { 50 | "date": { 51 | "order": "desc" 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /snippets/056_Sorting/88_Multifield.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Create the `test` index making the `tweet` 5 | # field a multi-field 6 | PUT /test 7 | { 8 | "mappings": { 9 | "tweet": { 10 | "properties": { 11 | "tweet": { 12 | "type": "string", 13 | "analyzer": "english", 14 | "fields": { 15 | "raw": { 16 | "type": "string", 17 | "index": "not_analyzed" 18 | } 19 | } 20 | } 21 | } 22 | } 23 | } 24 | } 25 | 26 | # Insert some example docs 27 | PUT /test/tweet/1 28 | { 29 | "date" : "2014-09-13", 30 | "name" : "Mary Jones", 31 | "tweet" : "Elasticsearch means full text search has never been so easy", 32 | "user_id" : 2 33 | } 34 | 35 | PUT /test/tweet/2 36 | { 37 | "date" : "2014-09-14", 38 | "name" : "John Smith", 39 | "tweet" : "@mary it is not just text, it does everything", 40 | "user_id" : 1 41 | } 42 | 43 | PUT /test/tweet/3 44 | { 45 | "date" : "2014-09-15", 46 | "name" : "Mary Jones", 47 | "tweet" : "However did I manage before Elasticsearch?", 48 | "user_id" : 2 49 | } 50 | 51 | PUT /test/tweet/4 52 | { 53 | "date" : "2014-09-16", 54 | "name" : "John Smith", 55 | "tweet" : "The Elasticsearch API is really easy to use", 56 | "user_id" : 1 57 | } 58 | 59 | # Return docs where `tweet` contains "elasticsearch 60 | # sorted by the `tweet.raw` field 61 | GET /test/_search 62 | { 63 | "query": { 64 | "match": { 65 | "tweet": "Elasticsearch" 66 | } 67 | }, 68 | "sort": { 69 | "tweet.raw": "asc" 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /snippets/056_Sorting/90_Explain.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert example doc 5 | PUT /test/tweet/1 6 | { 7 | "date": "2014-09-22", 8 | "name": "John Smith", 9 | "tweet": "Elasticsearch and I have left the honeymoon stage, and I still love her.", 10 | "user_id": 1 11 | } 12 | 13 | # Run a search with `explain` 14 | GET /_search?explain 15 | { 16 | "query": { 17 | "match": { 18 | "tweet": "honeymoon" 19 | } 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /snippets/056_Sorting/90_Explain_API.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert example doc 5 | PUT /test/tweet/1 6 | { 7 | "date": "2014-09-22", 8 | "name": "John Smith", 9 | "tweet": "Elasticsearch and I have left the honeymoon stage, and I still love her.", 10 | "user_id": 1 11 | } 12 | 13 | # Use the explain API to figure out why 14 | # this document doesn't match 15 | GET /test/tweet/1/_explain 16 | { 17 | "query": { 18 | "filtered": { 19 | "filter": { 20 | "term": { 21 | "user_id": 2 22 | } 23 | }, 24 | "query": { 25 | "match": { 26 | "tweet": "honeymoon" 27 | } 28 | } 29 | } 30 | } 31 | } -------------------------------------------------------------------------------- /snippets/070_Index_Mgmt/10_Settings.json: -------------------------------------------------------------------------------- 1 | # Delete `my_temp_index` 2 | DELETE /my_temp_index 3 | 4 | # Create an index with 1 primary shard and no replicas 5 | PUT /my_temp_index 6 | { 7 | "settings": { 8 | "number_of_shards": 1, 9 | "number_of_replicas": 0 10 | } 11 | } 12 | 13 | # Add 1 replica shard for each primary 14 | PUT /my_temp_index/_settings 15 | { 16 | "number_of_replicas": 1 17 | } 18 | 19 | -------------------------------------------------------------------------------- /snippets/070_Index_Mgmt/15_Configure_Analyzer.json: -------------------------------------------------------------------------------- 1 | # Delete the `spanish_docs` index 2 | DELETE /spanish_docs 3 | 4 | # Configuring an analyzer to use Spanish stopwords 5 | PUT /spanish_docs 6 | { 7 | "settings": { 8 | "analysis": { 9 | "analyzer": { 10 | "es_std": { 11 | "type": "standard", 12 | "stopwords": "_spanish_" 13 | } 14 | } 15 | } 16 | } 17 | } 18 | 19 | # Test out the new analyzer 20 | GET /spanish_docs/_analyze 21 | { 22 | "analyzer": "es_std", 23 | "text":"El veloz zorro marrón" 24 | } 25 | -------------------------------------------------------------------------------- /snippets/070_Index_Mgmt/20_Custom_analyzer.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create a custom analyzer 5 | PUT /my_index 6 | { 7 | "settings": { 8 | "analysis": { 9 | "char_filter": { 10 | "&_to_and": { 11 | "type": "mapping", 12 | "mappings": [ 13 | "&=> and " 14 | ] 15 | } 16 | }, 17 | "filter": { 18 | "my_stopwords": { 19 | "type": "stop", 20 | "stopwords": [ 21 | "the", 22 | "a" 23 | ] 24 | } 25 | }, 26 | "analyzer": { 27 | "my_analyzer": { 28 | "type": "custom", 29 | "char_filter": [ 30 | "html_strip", 31 | "&_to_and" 32 | ], 33 | "tokenizer": "standard", 34 | "filter": [ 35 | "lowercase", 36 | "my_stopwords" 37 | ] 38 | } 39 | } 40 | } 41 | } 42 | } 43 | 44 | # Test out the new analyzer 45 | GET /my_index/_analyze 46 | { 47 | "text": "The quick & brown fox", 48 | "analyzer": "my_analyzer" 49 | } 50 | 51 | # Apply "my_analyzer" to the `title` field 52 | PUT /my_index/_mapping/my_type 53 | { 54 | "properties": { 55 | "title": { 56 | "type": "string", 57 | "analyzer": "my_analyzer" 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /snippets/070_Index_Mgmt/31_Source_field.json: -------------------------------------------------------------------------------- 1 | # Delete the `test` index 2 | DELETE /test 3 | 4 | # Insert some examples 5 | PUT /test/test/1 6 | { 7 | "title": "Doc one", 8 | "created": "2014-01-01", 9 | "body": "The quick brown fox jumped over the lazy dog" 10 | } 11 | 12 | # Only return the `title` and `created` fields 13 | GET /test/_search 14 | { 15 | "query": { 16 | "match_all": {} 17 | }, 18 | "_source": [ 19 | "title", 20 | "created" 21 | ] 22 | } 23 | 24 | # Don't return any fields 25 | GET /test/_search 26 | { 27 | "query": { 28 | "match_all": {} 29 | }, 30 | "_source": false 31 | } 32 | -------------------------------------------------------------------------------- /snippets/070_Index_Mgmt/35_Dynamic_mapping.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Set dynamic to `strict` at the root object level 5 | # but allow dynamic mapping in the `stash` object 6 | PUT /my_index 7 | { 8 | "mappings": { 9 | "my_type": { 10 | "dynamic": "strict", 11 | "properties": { 12 | "title": { 13 | "type": "string" 14 | }, 15 | "stash": { 16 | "type": "object", 17 | "dynamic": true 18 | } 19 | } 20 | } 21 | } 22 | } 23 | 24 | # Dynamically create a new field under `stash` 25 | PUT /my_index/my_type/1 26 | { 27 | "title": "This doc adds a new field", 28 | "stash": { 29 | "new_field": "Success!" 30 | } 31 | } 32 | 33 | # Check the mapping to verify 34 | GET /my_index/_mapping/my_type 35 | 36 | # Throw an error when trying to add a new field 37 | # to the root object 38 | PUT /my_index/my_type/1 39 | { 40 | "title": "This throws a StrictDynamicMappingException", 41 | "new_field": "Fail!" 42 | } -------------------------------------------------------------------------------- /snippets/070_Index_Mgmt/40_Custom_dynamic_mapping.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Setup dynamic mapping to analyze any string field 5 | # ending in "_es" with the "spanish" analyzer, 6 | # and all other string fields with the "english" 7 | # analyzer 8 | PUT /my_index 9 | { 10 | "mappings": { 11 | "my_type": { 12 | "dynamic_templates": [ 13 | { 14 | "es": { 15 | "match": "*_es", 16 | "match_mapping_type": "string", 17 | "mapping": { 18 | "type": "string", 19 | "analyzer": "spanish" 20 | } 21 | } 22 | }, 23 | { 24 | "en": { 25 | "match": "*", 26 | "match_mapping_type": "string", 27 | "mapping": { 28 | "type": "string", 29 | "analyzer": "english" 30 | } 31 | } 32 | } 33 | ] 34 | } 35 | } 36 | } 37 | 38 | # Create a document 39 | PUT /my_index/my_type/1 40 | { 41 | "title_en": "English title", 42 | "title_es": "Titulo en Español" 43 | } 44 | 45 | # Check mapping 46 | GET /my_index/_mapping/my_type 47 | -------------------------------------------------------------------------------- /snippets/070_Index_Mgmt/45_Default_mapping.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Disable the `_all` field for all types except `blog` 5 | PUT /my_index 6 | { 7 | "mappings": { 8 | "_default_": { 9 | "_all": { 10 | "enabled": false 11 | } 12 | }, 13 | "blog": { 14 | "_all": { 15 | "enabled": true 16 | } 17 | } 18 | } 19 | } 20 | 21 | # Index a blog and a user doc 22 | PUT /my_index/user/1 23 | { 24 | "name": "John Smith" 25 | } 26 | 27 | PUT /my_index/blog/2 28 | { 29 | "user_id": 1, 30 | "title": "Using default mapping" 31 | } 32 | 33 | # Check the mapping 34 | GET /my_index/_mapping 35 | -------------------------------------------------------------------------------- /snippets/070_Index_Mgmt/55_Aliases.json: -------------------------------------------------------------------------------- 1 | # Delete indices beginning with `my_index` 2 | DELETE /my_index* 3 | 4 | # Create index `my_index_v1` 5 | PUT /my_index_v1 6 | 7 | # Create alias `my_index` pointing to `my_index_v1` 8 | PUT /my_index_v1/_alias/my_index 9 | 10 | # Check alias `my_index` 11 | GET /*/_alias/my_index 12 | 13 | # Check index `my_index_v1` 14 | GET /my_index_v1/_alias/* 15 | 16 | # Index into alias `my_index` 17 | PUT /my_index/my_type/1 18 | { 19 | "tags": "Some Tag" 20 | } 21 | 22 | # Search in alias `my_index` 23 | GET /my_index/_search 24 | { 25 | "sort": "tags" 26 | } 27 | 28 | # Create index `my_index_v2` and make `tags` exact value 29 | PUT /my_index_v2 30 | { 31 | "mappings": { 32 | "my_type": { 33 | "properties": { 34 | "tags": { 35 | "type": "string", 36 | "index": "not_analyzed" 37 | } 38 | } 39 | } 40 | } 41 | } 42 | 43 | # Index doc into `my_index_v2` 44 | PUT /my_index_v2/my_type/1 45 | { 46 | "tags": "Some Tag" 47 | } 48 | 49 | # Switch the alias to point to `my_index_v2` 50 | POST /_aliases 51 | { 52 | "actions": [ 53 | { 54 | "add": { 55 | "index": "my_index_v2", 56 | "alias": "my_index" 57 | } 58 | }, 59 | { 60 | "remove": { 61 | "index": "my_index_v1", 62 | "alias": "my_index" 63 | } 64 | } 65 | ] 66 | } 67 | 68 | # Check the aliases 69 | GET /_alias 70 | 71 | # Search in alias `my_index` 72 | GET /my_index/_search 73 | { 74 | "sort": "tags" 75 | } 76 | -------------------------------------------------------------------------------- /snippets/080_Structured_Search/05_Term_number.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_store` index 2 | DELETE /my_store 3 | 4 | # Index example docs 5 | POST /my_store/products/_bulk 6 | {"index":{"_id":1}} 7 | {"price":10,"productID":"XHDK-A-1293-#fJ3"} 8 | {"index":{"_id":2}} 9 | {"price":20,"productID":"KDKE-B-9947-#kL5"} 10 | {"index":{"_id":3}} 11 | {"price":30,"productID":"JODL-X-1937-#pV7"} 12 | {"index":{"_id":4}} 13 | {"price":30,"productID":"QQPX-R-3956-#aD8"} 14 | 15 | # Term filter with number 16 | GET /my_store/products/_search 17 | { 18 | "query": { 19 | "constant_score": { 20 | "filter": { 21 | "term": { 22 | "price": 20 23 | } 24 | } 25 | } 26 | } 27 | } 28 | 29 | # Check the analyzed tokens 30 | GET /my_store/_analyze 31 | { 32 | "field": "productID", 33 | "text": "XHDK-A-1293-#fJ3" 34 | } 35 | 36 | # Same as above, without the `match_all` query 37 | GET /my_store/products/_search 38 | { 39 | "query": { 40 | "constant_score": { 41 | "filter": { 42 | "term": { 43 | "price": 20 44 | } 45 | } 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /snippets/080_Structured_Search/15_Terms_filter.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_store` index 2 | DELETE /my_store 3 | 4 | # Index example docs 5 | POST /my_store/products/_bulk 6 | {"index":{"_id":1}} 7 | {"price":10,"productID":"XHDK-A-1293-#fJ3"} 8 | {"index":{"_id":2}} 9 | {"price":20,"productID":"KDKE-B-9947-#kL5"} 10 | {"index":{"_id":3}} 11 | {"price":30,"productID":"JODL-X-1937-#pV7"} 12 | {"index":{"_id":4}} 13 | {"price":30,"productID":"QQPX-R-3956-#aD8"} 14 | 15 | # Where price is 20 or 30 16 | GET /my_store/products/_search 17 | { 18 | "query": { 19 | "constant_score": { 20 | "filter": { 21 | "terms": { 22 | "price": [ 20, 30 ] 23 | } 24 | } 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /snippets/080_Structured_Search/20_Exact.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Index example docs 5 | PUT /my_index/my_type/1 6 | { 7 | "tags": [ 8 | "search" 9 | ], 10 | "tag_count": 1 11 | } 12 | 13 | PUT /my_index/my_type/2 14 | { 15 | "tags": [ 16 | "search", 17 | "open_source" 18 | ], 19 | "tag_count": 2 20 | } 21 | 22 | # Where tags = "search" only 23 | GET /my_index/my_type/_search 24 | { 25 | "query": { 26 | "constant_score": { 27 | "filter": { 28 | "bool": { 29 | "must": [ 30 | { 31 | "term": { 32 | "tags": "search" 33 | } 34 | }, 35 | { 36 | "term": { 37 | "tag_count": 1 38 | } 39 | } 40 | ] 41 | } 42 | } 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /snippets/080_Structured_Search/25_Range_filter.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_store` index 2 | DELETE /my_store 3 | 4 | # Index example docs 5 | POST /my_store/products/_bulk 6 | {"index":{"_id":1}} 7 | {"price":10,"productID":"XHDK-A-1293-#fJ3"} 8 | {"index":{"_id":2}} 9 | {"price":20,"productID":"KDKE-B-9947-#kL5"} 10 | {"index":{"_id":3}} 11 | {"price":30,"productID":"JODL-X-1937-#pV7"} 12 | {"index":{"_id":4}} 13 | {"price":30,"productID":"QQPX-R-3956-#aD8"} 14 | 15 | 16 | # Where 20 <= `price` < 40 17 | GET /my_store/products/_search 18 | { 19 | "query": { 20 | "constant_score": { 21 | "filter": { 22 | "range": { 23 | "price": { 24 | "gte": 20, 25 | "lt": 40 26 | } 27 | } 28 | } 29 | } 30 | } 31 | } 32 | 33 | # Where `price` > 20 34 | GET /my_store/products/_search 35 | { 36 | "query": { 37 | "constant_score": { 38 | "filter": { 39 | "range": { 40 | "price": { 41 | "gt": 20 42 | } 43 | } 44 | } 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /snippets/080_Structured_Search/30_Exists_missing.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Index example docs 5 | POST /my_index/posts/_bulk 6 | { "index": { "_id": "1" }} 7 | { "tags" : ["search"] } 8 | { "index": { "_id": "2" }} 9 | { "tags" : ["search", "open_source"] } 10 | { "index": { "_id": "3" }} 11 | { "other_field" : "some data" } 12 | { "index": { "_id": "4" }} 13 | { "tags" : null } 14 | { "index": { "_id": "5" }} 15 | { "tags" : ["search", null] } 16 | 17 | # Where the `tags` field exists 18 | GET /my_index/posts/_search 19 | { 20 | "query": { 21 | "constant_score": { 22 | "filter": { 23 | "exists": { 24 | "field": "tags" 25 | } 26 | } 27 | } 28 | } 29 | } 30 | 31 | # Where the `tags` field is missing 32 | GET /my_index/posts/_search 33 | { 34 | "query": { 35 | "constant_score": { 36 | "filter": { 37 | "missing": { 38 | "field": "tags" 39 | } 40 | } 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /snippets/100_Full_Text_Search/05_Match_query.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | POST /my_index/my_type/_bulk 10 | { "index": { "_id": 1 }} 11 | { "title": "The quick brown fox" } 12 | { "index": { "_id": 2 }} 13 | { "title": "The quick brown fox jumps over the lazy dog" } 14 | { "index": { "_id": 3 }} 15 | { "title": "The quick brown fox jumps over the quick dog" } 16 | { "index": { "_id": 4 }} 17 | { "title": "Brown fox brown dog" } 18 | 19 | # Single word match query 20 | GET /my_index/my_type/_search 21 | { 22 | "query": { 23 | "match": { 24 | "title": "QUICK!" 25 | } 26 | } 27 | } 28 | 29 | # Multi-word match query 30 | GET /my_index/my_type/_search 31 | { 32 | "query": { 33 | "match": { 34 | "title": "BROWN DOG!" 35 | } 36 | } 37 | } 38 | 39 | # Operator `and` 40 | GET /my_index/my_type/_search 41 | { 42 | "query": { 43 | "match": { 44 | "title": { 45 | "query": "BROWN DOG!", 46 | "operator": "and" 47 | } 48 | } 49 | } 50 | } 51 | 52 | # Minimum should match 53 | GET /my_index/my_type/_search 54 | { 55 | "query": { 56 | "match": { 57 | "title": { 58 | "query": "quick brown dog", 59 | "minimum_should_match": "75%" 60 | } 61 | } 62 | } 63 | } -------------------------------------------------------------------------------- /snippets/100_Full_Text_Search/15_Bool_query.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | POST /my_index/my_type/_bulk 10 | { "index": { "_id": 1 }} 11 | { "title": "The quick brown fox" } 12 | { "index": { "_id": 2 }} 13 | { "title": "The quick brown fox jumps over the lazy dog" } 14 | { "index": { "_id": 3 }} 15 | { "title": "The quick brown fox jumps over the quick dog" } 16 | { "index": { "_id": 4 }} 17 | { "title": "Brown fox brown dog" } 18 | 19 | # Bool query 20 | GET /my_index/my_type/_search 21 | { 22 | "query": { 23 | "bool": { 24 | "must": { 25 | "match": { 26 | "title": "quick" 27 | } 28 | }, 29 | "must_not": { 30 | "match": { 31 | "title": "lazy" 32 | } 33 | }, 34 | "should": [ 35 | { 36 | "match": { 37 | "title": "brown" 38 | } 39 | }, 40 | { 41 | "match": { 42 | "title": "dog" 43 | } 44 | } 45 | ] 46 | } 47 | } 48 | } 49 | 50 | # Minimum should match 51 | GET /my_index/my_type/_search 52 | { 53 | "query": { 54 | "bool": { 55 | "should": [ 56 | { 57 | "match": { 58 | "title": "brown" 59 | } 60 | }, 61 | { 62 | "match": { 63 | "title": "fox" 64 | } 65 | }, 66 | { 67 | "match": { 68 | "title": "dog" 69 | } 70 | } 71 | ], 72 | "minimum_should_match": 2 73 | } 74 | } 75 | } -------------------------------------------------------------------------------- /snippets/100_Full_Text_Search/30_Analysis.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` and map the: 5 | # `title` field to use the default `standard` analyzer, 6 | # `english_title` field to use the`english` analyzer 7 | PUT /my_index 8 | { 9 | "mappings": { 10 | "my_type": { 11 | "properties": { 12 | "title": { 13 | "type": "string" 14 | }, 15 | "english_title": { 16 | "type": "string", 17 | "analyzer": "english" 18 | } 19 | } 20 | } 21 | } 22 | } 23 | 24 | # Test the analysis of the `title` field 25 | GET /my_index/_analyze 26 | { 27 | "field": "my_type.title", <1> 28 | "text": "Foxes" 29 | } 30 | 31 | # Test the analysis of the `english_title` field 32 | GET /my_index/_analyze 33 | { 34 | "field": "my_type.english_title", <2> 35 | "text": "Foxes" 36 | } 37 | 38 | # Get query explanation for `title` vs `english_title` 39 | GET /my_index/my_type/_validate/query?explain 40 | { 41 | "query": { 42 | "bool": { 43 | "should": [ 44 | { 45 | "match": { 46 | "title": "Foxes" 47 | } 48 | }, 49 | { 50 | "match": { 51 | "english_title": "Foxes" 52 | } 53 | } 54 | ] 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /snippets/110_Multi_Field_Search/25_Best_fields.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | PUT /my_index/my_type/1 10 | { 11 | "title": "Quick brown rabbits", 12 | "body": "Brown rabbits are commonly seen." 13 | } 14 | 15 | PUT /my_index/my_type/2 16 | { 17 | "title": "Keeping pets healthy", 18 | "body": "My quick brown fox eats rabbits on a regular basis." 19 | } 20 | 21 | # Multi_match query 22 | GET /my_index/_search 23 | { 24 | "query": { 25 | "multi_match": { 26 | "query": "Quick brown fox", 27 | "type": "best_fields", 28 | "fields": [ 29 | "title", 30 | "body" 31 | ], 32 | "tie_breaker": 0.3, 33 | "minimum_should_match": "30%" 34 | } 35 | } 36 | } -------------------------------------------------------------------------------- /snippets/110_Multi_Field_Search/40_Bad_frequencies.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | POST /my_index/my_type/_bulk 10 | {"index":{"_id":1}} 11 | {"first_name":"John","last_name":"Smith"} 12 | {"index":{"_id":2}} 13 | {"first_name":"Peter","last_name":"Smith"} 14 | {"index":{"_id":3}} 15 | {"first_name":"Peter","last_name":"Jones"} 16 | {"index":{"_id":4}} 17 | {"first_name":"Mary","last_name":"Smith"} 18 | {"index":{"_id":5}} 19 | {"first_name":"Smith","last_name":"Johns"} 20 | 21 | # Firstname `smith` overwhelms lastname `smith` 22 | GET /my_index/_search 23 | { 24 | "query": { 25 | "multi_match": { 26 | "query": "Peter Smith", 27 | "fields": [ 28 | "*_name" 29 | ] 30 | } 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /snippets/110_Multi_Field_Search/40_Entity_search_problems.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | PUT /my_index/my_type/1 10 | { 11 | "street": "5 Poland Street", 12 | "city": "London", 13 | "country": "United Kingdom", 14 | "postcode": "W1V 3DG" 15 | } 16 | 17 | # A naive approach to multi-field search 18 | GET /my_index/_search 19 | { 20 | "query": { 21 | "multi_match": { 22 | "query": "Poland Street W1V", 23 | "type": "most_fields", 24 | "fields": [ 25 | "street", 26 | "city", 27 | "country", 28 | "postcode" 29 | ] 30 | } 31 | } 32 | } 33 | 34 | # The above query explained 35 | GET /my_index/_validate/query?explain 36 | { 37 | "query": { 38 | "multi_match": { 39 | "query": "Poland Street W1V", 40 | "type": "most_fields", 41 | "fields": [ 42 | "street", 43 | "city", 44 | "country", 45 | "postcode" 46 | ] 47 | } 48 | } 49 | } 50 | 51 | # With operator "and" - no results! 52 | GET /my_index/_search 53 | { 54 | "query": { 55 | "multi_match": { 56 | "query": "Poland Street W1V", 57 | "type": "most_fields", 58 | "operator": "and", 59 | "fields": [ 60 | "street", 61 | "city", 62 | "country", 63 | "postcode" 64 | ] 65 | } 66 | } 67 | } 68 | 69 | # The above query explained 70 | GET /my_index/_validate/query?explain 71 | { 72 | "query": { 73 | "multi_match": { 74 | "query": "Poland Street W1V", 75 | "type": "most_fields", 76 | "operator": "and", 77 | "fields": [ 78 | "street", 79 | "city", 80 | "country", 81 | "postcode" 82 | ] 83 | } 84 | } 85 | } 86 | 87 | 88 | -------------------------------------------------------------------------------- /snippets/110_Multi_Field_Search/45_Custom_all.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Copy first_name and last_name to the full_name field 5 | PUT /my_index 6 | { 7 | "settings": { 8 | "number_of_shards": 1 9 | }, 10 | "mappings": { 11 | "person": { 12 | "properties": { 13 | "first_name": { 14 | "type": "string", 15 | "copy_to": "full_name" 16 | }, 17 | "last_name": { 18 | "type": "string", 19 | "copy_to": "full_name" 20 | }, 21 | "full_name": { 22 | "type": "string" 23 | } 24 | } 25 | } 26 | } 27 | } 28 | 29 | # Index some example docs 30 | POST /my_index/person/_bulk 31 | {"index":{"_id":1}} 32 | {"first_name":"John","last_name":"Smith"} 33 | {"index":{"_id":2}} 34 | {"first_name":"Peter","last_name":"Smith"} 35 | {"index":{"_id":3}} 36 | {"first_name":"Peter","last_name":"Jones"} 37 | {"index":{"_id":4}} 38 | {"first_name":"Mary","last_name":"Smith"} 39 | {"index":{"_id":5}} 40 | {"first_name":"Smith","last_name":"Johns"} 41 | 42 | # With operator "and" 43 | GET /my_index/_search 44 | { 45 | "query": { 46 | "match": { 47 | "full_name": { 48 | "query": "Peter Smith", 49 | "operator": "and" 50 | } 51 | } 52 | } 53 | } 54 | 55 | # Good term frequencies 56 | GET /my_index/_search 57 | { 58 | "query": { 59 | "match": { 60 | "full_name": { 61 | "query": "Peter Smith" 62 | } 63 | } 64 | } 65 | } 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /snippets/110_Multi_Field_Search/55_Not_analyzed.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | # and set `title` to be not_analyzed 6 | PUT /my_index 7 | { 8 | "settings": { 9 | "number_of_shards": 1 10 | }, 11 | "mappings": { 12 | "person": { 13 | "properties": { 14 | "title": { 15 | "type": "string", 16 | "index": "not_analyzed" 17 | } 18 | } 19 | } 20 | } 21 | } 22 | 23 | # Index an example doc 24 | PUT /my_index/person/1 25 | { 26 | "title": "Mr", 27 | "first_name": "John", 28 | "last_name": "Smith" 29 | } 30 | 31 | # Explanation for multi_match 32 | GET /my_index/_validate/query?explain 33 | { 34 | "query": { 35 | "multi_match": { 36 | "query": "peter smith", 37 | "type": "cross_fields", 38 | "fields": [ 39 | "title", 40 | "first_name", 41 | "last_name" 42 | ] 43 | } 44 | } 45 | } -------------------------------------------------------------------------------- /snippets/120_Proximity_Matching/05_Match_phrase_query.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | POST /my_index/my_type/_bulk 10 | { "index": { "_id": 1 }} 11 | { "title": "The quick brown fox" } 12 | { "index": { "_id": 2 }} 13 | { "title": "The quick brown fox jumps over the lazy dog" } 14 | { "index": { "_id": 3 }} 15 | { "title": "The quick brown fox jumps over the quick dog" } 16 | { "index": { "_id": 4 }} 17 | { "title": "Brown fox brown dog" } 18 | 19 | # match_phrase query 20 | GET /my_index/my_type/_search 21 | { 22 | "query": { 23 | "match_phrase": { 24 | "title": "quick brown fox" 25 | } 26 | } 27 | } 28 | 29 | # match query, type phrase 30 | GET /my_index/my_type/_search 31 | { 32 | "query": { 33 | "match": { 34 | "title": { 35 | "type": "phrase", 36 | "query": "quick brown fox" 37 | } 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /snippets/120_Proximity_Matching/05_Term_positions.json: -------------------------------------------------------------------------------- 1 | # Term positions 2 | GET /_analyze?text=Quick brown fox 3 | 4 | -------------------------------------------------------------------------------- /snippets/120_Proximity_Matching/10_Slop.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | POST /my_index/my_type/_bulk 10 | { "index": { "_id": 1 }} 11 | { "title": "The quick brown fox" } 12 | { "index": { "_id": 2 }} 13 | { "title": "The quick brown fox jumps over the lazy dog" } 14 | { "index": { "_id": 3 }} 15 | { "title": "The quick brown fox jumps over the quick dog" } 16 | { "index": { "_id": 4 }} 17 | { "title": "Brown fox brown dog" } 18 | 19 | 20 | # Phrase query - doesn't match 21 | GET /my_index/my_type/_search 22 | { 23 | "query": { 24 | "match_phrase": { 25 | "title": { 26 | "query": "quick fox" 27 | } 28 | } 29 | } 30 | } 31 | 32 | 33 | # Proximity query with slop - matches 34 | GET /my_index/my_type/_search 35 | { 36 | "query": { 37 | "match_phrase": { 38 | "title": { 39 | "query": "quick fox", 40 | "slop": 1 41 | } 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /snippets/120_Proximity_Matching/15_Multi_value_fields.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index an example doc 9 | PUT /my_index/groups/1 10 | { 11 | "names": [ 12 | "John Abraham", 13 | "Lincoln Smith" 14 | ] 15 | } 16 | 17 | # Phrase "Abraham Lincoln" matches! 18 | GET /my_index/groups/_search 19 | { 20 | "query": { 21 | "match_phrase": { 22 | "names": "Abraham Lincoln" 23 | } 24 | } 25 | } 26 | 27 | # Delete `groups` mapping and data 28 | DELETE /my_index/groups/ 29 | 30 | # Map `names` to use position_increment_gap 31 | PUT /my_index/_mapping/groups 32 | { 33 | "properties": { 34 | "names": { 35 | "type": "string", 36 | "position_increment_gap": 100 37 | } 38 | } 39 | } 40 | 41 | # Reindex document 42 | PUT /my_index/groups/1 43 | { 44 | "names": [ 45 | "John Abraham", 46 | "Lincoln Smith" 47 | ] 48 | } 49 | 50 | # Phrase "Abraham Lincoln" no longer matches 51 | GET /my_index/groups/_search 52 | { 53 | "query": { 54 | "match_phrase": { 55 | "names": "Abraham Lincoln" 56 | } 57 | } 58 | } 59 | 60 | # But phrase "John Abraham" does 61 | GET /my_index/groups/_search 62 | { 63 | "query": { 64 | "match_phrase": { 65 | "names": "John Abraham" 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /snippets/120_Proximity_Matching/20_Scoring.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | POST /my_index/my_type/_bulk 10 | { "index": { "_id": 1 }} 11 | { "title": "The quick brown fox" } 12 | { "index": { "_id": 2 }} 13 | { "title": "The quick brown fox jumps over the lazy dog" } 14 | { "index": { "_id": 3 }} 15 | { "title": "The quick brown fox jumps over the quick dog" } 16 | { "index": { "_id": 4 }} 17 | { "title": "Brown fox brown dog" } 18 | 19 | # High slop value 20 | POST /my_index/my_type/_search 21 | { 22 | "query": { 23 | "match_phrase": { 24 | "title": { 25 | "query": "quick dog", 26 | "slop": 50 27 | } 28 | } 29 | } 30 | } -------------------------------------------------------------------------------- /snippets/120_Proximity_Matching/25_Relevance.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | POST /my_index/my_type/_bulk 10 | { "index": { "_id": 1 }} 11 | { "title": "The quick brown fox" } 12 | { "index": { "_id": 2 }} 13 | { "title": "The quick brown fox jumps over the lazy dog" } 14 | { "index": { "_id": 3 }} 15 | { "title": "The quick brown fox jumps over the quick dog" } 16 | { "index": { "_id": 4 }} 17 | { "title": "Brown fox brown dog" } 18 | 19 | # Combine phrase with match query to boost relevance 20 | GET /my_index/my_type/_search 21 | { 22 | "query": { 23 | "bool": { 24 | "must": { 25 | "match": { 26 | "title": { 27 | "query": "quick brown fox", 28 | "minimum_should_match": "30%" 29 | } 30 | } 31 | }, 32 | "should": { 33 | "match_phrase": { 34 | "title": { 35 | "query": "quick brown fox", 36 | "slop": 50 37 | } 38 | } 39 | } 40 | } 41 | } 42 | } -------------------------------------------------------------------------------- /snippets/120_Proximity_Matching/30_Performance.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | POST /my_index/my_type/_bulk 10 | { "index": { "_id": 1 }} 11 | { "title": "The quick brown fox" } 12 | { "index": { "_id": 2 }} 13 | { "title": "The quick brown fox jumps over the lazy dog" } 14 | { "index": { "_id": 3 }} 15 | { "title": "The quick brown fox jumps over the quick dog" } 16 | { "index": { "_id": 4 }} 17 | { "title": "Brown fox brown dog" } 18 | 19 | # Use rescore API for better performance 20 | GET /my_index/my_type/_search 21 | { 22 | "query": { 23 | "match": { 24 | "title": { 25 | "query": "quick brown fox", 26 | "minimum_should_match": "30%" 27 | } 28 | } 29 | }, 30 | "rescore": { 31 | "window_size": 50, 32 | "query": { 33 | "rescore_query": { 34 | "match_phrase": { 35 | "title": { 36 | "query": "quick brown fox", 37 | "slop": 50 38 | } 39 | } 40 | } 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /snippets/130_Partial_Matching/10_Prefix_query.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Map the `postcode` to be not_analyzed 5 | PUT /my_index 6 | { 7 | "mappings": { 8 | "address": { 9 | "properties": { 10 | "postcode": { 11 | "type": "string", 12 | "index": "not_analyzed" 13 | } 14 | } 15 | } 16 | } 17 | } 18 | 19 | # Index some example docs 20 | PUT /my_index/address/_bulk 21 | {"index":{"_id":1}} 22 | {"postcode":"W1V 3DG"} 23 | {"index":{"_id":2}} 24 | {"postcode":"W2F 8HW"} 25 | {"index":{"_id":3}} 26 | {"postcode":"W1F 7HW"} 27 | {"index":{"_id":4}} 28 | {"postcode":"WC1N 1LZ"} 29 | {"index":{"_id":5}} 30 | {"postcode":"SW5 0BE"} 31 | 32 | # Find postcodes beginning with the exact prefix "W1" 33 | GET /my_index/address/_search 34 | { 35 | "query": { 36 | "prefix": { 37 | "postcode": "W1" 38 | } 39 | } 40 | } -------------------------------------------------------------------------------- /snippets/130_Partial_Matching/15_Wildcard_regexp.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Map the `postcode` to be not_analyzed 5 | PUT /my_index 6 | { 7 | "mappings": { 8 | "address": { 9 | "properties": { 10 | "postcode": { 11 | "type": "string", 12 | "index": "not_analyzed" 13 | } 14 | } 15 | } 16 | } 17 | } 18 | 19 | # Index some example docs 20 | PUT /my_index/address/_bulk 21 | {"index":{"_id":1}} 22 | {"postcode":"W1V 3DG"} 23 | {"index":{"_id":2}} 24 | {"postcode":"W2F 8HW"} 25 | {"index":{"_id":3}} 26 | {"postcode":"W1F 7HW"} 27 | {"index":{"_id":4}} 28 | {"postcode":"WC1N 1LZ"} 29 | {"index":{"_id":5}} 30 | {"postcode":"SW5 0BE"} 31 | 32 | # Term-level wildcard query 33 | GET /my_index/address/_search 34 | { 35 | "query": { 36 | "wildcard": { 37 | "postcode": "W?F*HW" 38 | } 39 | } 40 | } 41 | 42 | # Term-level regular expression query 43 | GET /my_index/address/_search 44 | { 45 | "query": { 46 | "regexp": { 47 | "postcode": "W[0-9].+" 48 | } 49 | } 50 | } -------------------------------------------------------------------------------- /snippets/130_Partial_Matching/20_Match_phrase_prefix.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Create `my_index` with a single primary shard 5 | PUT /my_index 6 | { "settings": { "number_of_shards": 1 }} 7 | 8 | # Index some example docs 9 | PUT /my_index/my_type/1 10 | { 11 | "brand": "Johnnie Walker Black Label" 12 | } 13 | 14 | PUT /my_index/my_type/2 15 | { 16 | "brand": "Johnnie Walker Blue Label" 17 | } 18 | 19 | # Match phrase prefix query 20 | GET /my_index/_search 21 | { 22 | "query": { 23 | "match_phrase_prefix": { 24 | "brand": "Johnnie Walker Bl" 25 | } 26 | } 27 | } 28 | 29 | # Match phrase prefix query 30 | GET /my_index/_search 31 | { 32 | "query": { 33 | "match_phrase_prefix": { 34 | "brand": "Johnnie Walker Bla" 35 | } 36 | } 37 | } 38 | 39 | # Match phrase prefix query with slop 40 | GET /my_index/_search 41 | { 42 | "query": { 43 | "match": { 44 | "brand": { 45 | "type": "phrase_prefix", 46 | "query": "Walker Johnnie Bl", 47 | "slop": 10 48 | } 49 | } 50 | } 51 | } 52 | 53 | # Control the number of completions 54 | GET /my_index/_search 55 | { 56 | "query": { 57 | "match": { 58 | "brand": { 59 | "type": "phrase_prefix", 60 | "query": "Johnnie Walker Bla", 61 | "max_expansions": 50 62 | } 63 | } 64 | } 65 | } 66 | 67 | -------------------------------------------------------------------------------- /snippets/130_Partial_Matching/35_Postcodes.json: -------------------------------------------------------------------------------- 1 | # Delete the `my_index` index 2 | DELETE /my_index 3 | 4 | # Map the `postcode` field to use edge_ngrams at 5 | # index time, and the keyword tokenizer at search time 6 | PUT /my_index 7 | { 8 | "settings": { 9 | "analysis": { 10 | "filter": { 11 | "postcode_filter": { 12 | "type": "edge_ngram", 13 | "min_gram": 1, 14 | "max_gram": 8 15 | } 16 | }, 17 | "analyzer": { 18 | "postcode_index": { 19 | "tokenizer": "keyword", 20 | "filter": [ 21 | "postcode_filter" 22 | ] 23 | }, 24 | "postcode_search": { 25 | "tokenizer": "keyword" 26 | } 27 | } 28 | } 29 | }, 30 | "mappings": { 31 | "address": { 32 | "properties": { 33 | "postcode": { 34 | "type": "string", 35 | "analyzer": "postcode_index", 36 | "search_analyzer": "postcode_search" 37 | } 38 | } 39 | } 40 | } 41 | } 42 | 43 | # Index some example docs 44 | PUT /my_index/address/_bulk 45 | {"index":{"_id":1}} 46 | {"postcode":"W1V 3DG"} 47 | {"index":{"_id":2}} 48 | {"postcode":"W2F 8HW"} 49 | {"index":{"_id":3}} 50 | {"postcode":"W1F 7HW"} 51 | {"index":{"_id":4}} 52 | {"postcode":"WC1N 1LZ"} 53 | {"index":{"_id":5}} 54 | {"postcode":"SW5 0BE"} 55 | 56 | # Find postcodes beginning with the exact prefix "W1" 57 | GET /my_index/address/_search 58 | { 59 | "query": { 60 | "match": { 61 | "postcode": "W1" 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /snippets/300_Aggregations/30_histogram.json: -------------------------------------------------------------------------------- 1 | # Determine the top-selling car in each price range, using a histogram 2 | # and a nested terms bucket 3 | GET /cars/transactions/_search 4 | { 5 | "size" : 0, 6 | "aggs":{ 7 | "price":{ 8 | "histogram":{ 9 | "field":"price", 10 | "interval":20000 11 | }, 12 | "aggs":{ 13 | "revenue": { 14 | "sum": { 15 | "field" : "price" 16 | } 17 | } 18 | } 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /snippets/300_Aggregations/35_date_histogram.json: -------------------------------------------------------------------------------- 1 | 2 | # Create a histogram over time - this agg will generate a 3 | # histogram of sales over time 4 | GET /cars/transactions/_search 5 | { 6 | "size" : 0, 7 | "aggs": { 8 | "sales": { 9 | "date_histogram": { 10 | "field": "sold", 11 | "interval": "month", 12 | "format": "yyyy-MM-dd" 13 | } 14 | } 15 | } 16 | } 17 | 18 | # Display all months in the year, including empty buckets 19 | GET /cars/transactions/_search 20 | { 21 | "size" : 0, 22 | "aggs": { 23 | "sales": { 24 | "date_histogram": { 25 | "field": "sold", 26 | "interval": "month", 27 | "format": "yyyy-MM-dd", 28 | "min_doc_count" : 0, 29 | "extended_bounds" : { 30 | "min" : "2014-01-01", 31 | "max" : "2014-12-31" 32 | } 33 | } 34 | } 35 | } 36 | } 37 | 38 | # Find the average price of each month's top-selling car type 39 | GET /cars/transactions/_search 40 | { 41 | "size" : 0, 42 | "aggs": { 43 | "sales": { 44 | "date_histogram": { 45 | "field": "sold", 46 | "interval": "month", 47 | "format": "yyyy-MM-dd", 48 | "min_doc_count" : 0, 49 | "extended_bounds" : { 50 | "min" : "2014-01-01", 51 | "max" : "2014-12-31" 52 | } 53 | }, 54 | "aggs": { 55 | "top_selling": { 56 | "terms": { 57 | "field": "make", 58 | "size": 1 59 | }, 60 | "aggs": { 61 | "avg_price": { 62 | "avg": { "field": "price" } 63 | } 64 | } 65 | } 66 | } 67 | } 68 | } 69 | } -------------------------------------------------------------------------------- /snippets/300_Aggregations/40_scope.json: -------------------------------------------------------------------------------- 1 | 2 | # Recap of the previous examples so far -- just an aggregation, no query 3 | GET /cars/transactions/_search 4 | { 5 | "size" : 0, 6 | "aggs" : { 7 | "colors" : { 8 | "terms" : { 9 | "field" : "color" 10 | } 11 | } 12 | } 13 | } 14 | 15 | # The above request is equivalent to this (no query == match_all) 16 | GET /cars/transactions/_search 17 | { 18 | "size" : 0, 19 | "query" : { 20 | "match_all" : {} 21 | } 22 | "aggs" : { 23 | "colors" : { 24 | "terms" : { 25 | "field" : "color" 26 | } 27 | } 28 | } 29 | } 30 | 31 | # Adding a query to our request, aggregation operates in the "query scope" 32 | GET /cars/transactions/_search 33 | { 34 | "query" : { 35 | "match" : { 36 | "make" : "ford" 37 | } 38 | }, 39 | "aggs" : { 40 | "colors" : { 41 | "terms" : { 42 | "field" : "color" 43 | } 44 | } 45 | } 46 | } 47 | 48 | # Global bucket allows us to "escape" the query scope and calculate an agg 49 | # on all documents 50 | GET /cars/transactions/_search 51 | { 52 | "size" : 0, 53 | "query" : { 54 | "match" : { 55 | "make" : "ford" 56 | } 57 | }, 58 | "aggs" : { 59 | "single_avg_price": { 60 | "avg" : { "field" : "price" } 61 | }, 62 | "all": { 63 | "global" : {}, 64 | "aggs" : { 65 | "avg_price": { 66 | "avg" : { "field" : "price" } 67 | } 68 | 69 | } 70 | } 71 | } 72 | } -------------------------------------------------------------------------------- /stash/Arbitrary preference for search.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | Arbitrary string:: 3 | 4 | The `preference` parameter can be set to any arbitrary string, such as the 5 | session ID of a user, which would ensure that the user always gets results 6 | from the same node. While this is less useful when retrieving individual 7 | documents, it can be very useful when searching: two documents that are ranked 8 | as equally relevant by a search query may be returned in a different order by 9 | different shards. Always returning results from the same shard means that the 10 | user will see the results in a consistent order. 11 | -------------------------------------------------------------------------------- /stash/omit_norms.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | ==== Omitting Lucene Normalization 3 | 4 | Each field that is indexed also includes a special value called `norms`, 5 | which includes three components: 6 | 7 | - Document Boost: a per-document boost provided at index-time 8 | - Field Boost: a per-field boost also provided at index-time 9 | - Length Norm: a normalization where shorter fields obtain a higher boost 10 | relative to longer fields. This prevents long fields from matching higher simply 11 | because they are longer. 12 | 13 | By default, `norms` are calculated for any analyzed field, and not calculated 14 | for `not_analyzed` field. But if you are using one of the normalization schemes 15 | we just talked about (e.g. Keyword + lowercase), you may wish to disable `norms` 16 | to save space because you don't care about score at all. 17 | 18 | Disabling norms is as simple as setting `omit_norms: true` in your mapping. 19 | 20 | Similarly, you may wish to change the default `index_options` value. This 21 | configuration controls the type of data that is stored about each field. 22 | The options are: 23 | 24 | - docs: Indexes only the doc ID 25 | - freqs: Indexes doc IDs and term frequencies 26 | - positions: Indexes doc IDs, term frequencies and token positions 27 | 28 | Analyzed fields default to "positions", while `not_analyzed` default to "freqs". 29 | Again, if you are using Keyword + lowercase to make a case-insensitive `not_analyzed`, 30 | it may make sense to change `index_options` to "freqs". 31 | 32 | You only have a single token, so the position information is redundant and 33 | useless - it is just taking up space. -------------------------------------------------------------------------------- /stash/stopwords.asciidoc: -------------------------------------------------------------------------------- 1 | 2 | ==== Dealing with Stop-words 3 | 4 | Stop-words are "filler" words that don't provide much information to a sentence. 5 | Classic examples are words like "the", "and", "or", "but". It is common to remove 6 | stop-words in both structured and unstructured search. 7 | 8 | Elasticsearch provides a `Stop` filter which can be configured to remove 9 | words from the token stream. Stop-words can be provided in the mapping itself 10 | as an array of terms, or it can be configured in an external file. If your 11 | list of stop-words is large it is recommended to use the external file. 12 | 13 | Importantly, stop-words in your list must match a token _exactly_ to be removed. 14 | In almost all situations you should first lowercase the token stream before 15 | removing stop-words. 16 | 17 | If you didn't lowercase first, you may miss stop-words that have alternate case 18 | (e.g. "The" vs "the"). 19 | 20 | An example mapping which places a `Stop` filter after `Lowercase`: 21 | 22 | 23 | "analysis":{ 24 | "analyzer":{ 25 | "default-analyzer":{ 26 | "type":"custom", 27 | "tokenizer":"standard", 28 | "filter":[ "lowercase", "my_stop_filter" ] 29 | } 30 | }, 31 | "filter":{ 32 | "my_stop_filter":{ 33 | "type":"stop", 34 | "stopwords":[ 35 | "the", "a", "to", "but", "or", "and" 36 | ] 37 | } 38 | } 39 | } 40 | 41 | 42 | -------------------------------------------------------------------------------- /test/test.json: -------------------------------------------------------------------------------- 1 | GET /_search 2 | { 3 | "query": { 4 | "bool": { 5 | "must": [ 6 | { 7 | "match": { 8 | "myfield": { 9 | "query": "quick brown fox", 10 | "fuzziness": "AUTO", 11 | "minimum_should_match": "70%" 12 | } 13 | } 14 | } 15 | ], 16 | "should": [ 17 | { 18 | "match_phrase": { 19 | "query": "quick brown fox" 20 | } 21 | } 22 | ] 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /theme/epub/layout.html: -------------------------------------------------------------------------------- 1 | {{ doctype }} 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | {{ title }} 10 | 11 | 12 | {{ content }} 13 | 14 | 15 | -------------------------------------------------------------------------------- /theme/mobi/layout.html: -------------------------------------------------------------------------------- 1 | {{ doctype }} 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | {{ title }} 10 | 11 | 12 | {{ content }} 13 | 14 | 15 | -------------------------------------------------------------------------------- /theme/pdf/pdf.css: -------------------------------------------------------------------------------- 1 | @charset "UTF-8"; 2 | 3 | /*--------Put Your Custom CSS Rules Below--------*/ 4 | /*--- This oneoff overrides the code in https://github.com/oreillymedia/animal_theme/blob/master/pdf/pdf.css---*/ 5 | 6 | /* right align */ 7 | 8 | .alignmeright 9 | { 10 | text-align: right; 11 | } 12 | 13 | /* Font fall backs */ 14 | p 15 | { 16 | font-family: "MinionPro", "Symbola", "Arial Unicode MS", "DejaVu Sans", "Code2000"; 17 | } 18 | 19 | 20 | 21 | 22 | table tbody tr td p 23 | { 24 | font-family: "MinionPro", "Symbola", "Arial Unicode MS", "DejaVu Sans", "Code2000"; 25 | } 26 | 27 | pre 28 | { 29 | font-family: "UbuntuMono", "Arial Unicode MS", "Code2000"; 30 | } 31 | code 32 | { 33 | font-family: "UbuntuMono", "Arial Unicode MS", "DejaVu Sans", "Code2000"; 34 | } 35 | 36 | 37 | 38 | /*----Uncomment to turn on automatic code wrapping 39 | 40 | 41 | pre { 42 | white-space: pre-wrap; 43 | word-wrap: break-word; 44 | } 45 | ----*/ 46 | 47 | /*----Uncomment to change the TOC start page (set 48 | the number to one page _after_ the one you want; 49 | so 6 to start on v, 8 to start on vii, etc.) 50 | 51 | @page toc:first { 52 | counter-reset: page 6; 53 | } 54 | ----*/ 55 | 56 | /*----Uncomment to fix a bad break in the title 57 | (increase padding value to push down, decrease 58 | value to pull up) 59 | 60 | section[data-type="titlepage"] h1 { 61 | padding-left: 1.5in; 62 | } 63 | ----*/ 64 | 65 | /*----Uncomment to fix a bad break in the subtitle 66 | (increase padding value to push down, decrease 67 | value to pull up) 68 | 69 | section[data-type="titlepage"] h2 { 70 | padding-left: 1in; 71 | } 72 | ----*/ 73 | 74 | /*----Uncomment to fix a bad break in the author names 75 | (increase padding value to push down, decrease 76 | value to pull up) 77 | 78 | section[data-type="titlepage"] p.author { 79 | padding-left: 3in; 80 | } 81 | ----*/ 82 | -------------------------------------------------------------------------------- /theme/pdf/pdf.xsl: -------------------------------------------------------------------------------- 1 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /titlepage.html: -------------------------------------------------------------------------------- 1 |
2 |

Elasticsearch: The Definitive Guide

3 | 4 |

Clinton Gormley and Zachary Tong

5 |
6 | 8 | -------------------------------------------------------------------------------- /toc.html: -------------------------------------------------------------------------------- 1 | 2 |