├── .github └── workflows │ └── tests.yml ├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── SECURITY.md ├── analysis ├── analyzer │ ├── custom │ │ └── custom.go │ ├── keyword │ │ └── keyword.go │ ├── simple │ │ └── simple.go │ ├── standard │ │ └── standard.go │ └── web │ │ └── web.go ├── benchmark_test.go ├── char │ ├── asciifolding │ │ ├── asciifolding.go │ │ └── asciifolding_test.go │ ├── html │ │ └── html.go │ ├── regexp │ │ ├── regexp.go │ │ └── regexp_test.go │ └── zerowidthnonjoiner │ │ └── zerowidthnonjoiner.go ├── datetime │ ├── flexible │ │ ├── flexible.go │ │ └── flexible_test.go │ ├── iso │ │ ├── iso.go │ │ └── iso_test.go │ ├── optional │ │ └── optional.go │ ├── percent │ │ ├── percent.go │ │ └── percent_test.go │ ├── sanitized │ │ ├── sanitized.go │ │ └── sanitized_test.go │ └── timestamp │ │ ├── microseconds │ │ └── microseconds.go │ │ ├── milliseconds │ │ └── milliseconds.go │ │ ├── nanoseconds │ │ └── nanoseconds.go │ │ └── seconds │ │ └── seconds.go ├── freq.go ├── freq_test.go ├── lang │ ├── ar │ │ ├── analyzer_ar.go │ │ ├── analyzer_ar_test.go │ │ ├── arabic_normalize.go │ │ ├── arabic_normalize_test.go │ │ ├── stemmer_ar.go │ │ ├── stemmer_ar_test.go │ │ ├── stop_filter_ar.go │ │ └── stop_words_ar.go │ ├── bg │ │ ├── stop_filter_bg.go │ │ └── stop_words_bg.go │ ├── ca │ │ ├── articles_ca.go │ │ ├── elision_ca.go │ │ ├── elision_ca_test.go │ │ ├── stop_filter_ca.go │ │ └── stop_words_ca.go │ ├── cjk │ │ ├── analyzer_cjk.go │ │ ├── analyzer_cjk_test.go │ │ ├── cjk_bigram.go │ │ ├── cjk_bigram_test.go │ │ ├── cjk_width.go │ │ └── cjk_width_test.go │ ├── ckb │ │ ├── analyzer_ckb.go │ │ ├── analyzer_ckb_test.go │ │ ├── sorani_normalize.go │ │ ├── sorani_normalize_test.go │ │ ├── sorani_stemmer_filter.go │ │ ├── sorani_stemmer_filter_test.go │ │ ├── stop_filter_ckb.go │ │ └── stop_words_ckb.go │ ├── cs │ │ ├── stop_filter_cs.go │ │ └── stop_words_cs.go │ ├── da │ │ ├── analyzer_da.go │ │ ├── analyzer_da_test.go │ │ ├── stemmer_da.go │ │ ├── stop_filter_da.go │ │ └── stop_words_da.go │ ├── de │ │ ├── analyzer_de.go │ │ ├── analyzer_de_test.go │ │ ├── german_normalize.go │ │ ├── german_normalize_test.go │ │ ├── light_stemmer_de.go │ │ ├── stemmer_de_snowball.go │ │ ├── stemmer_de_test.go │ │ ├── stop_filter_de.go │ │ └── stop_words_de.go │ ├── el │ │ ├── stop_filter_el.go │ │ └── stop_words_el.go │ ├── en │ │ ├── analyzer_en.go │ │ ├── analyzer_en_test.go │ │ ├── plural_stemmer.go │ │ ├── plural_stemmer_test.go │ │ ├── possessive_filter_en.go │ │ ├── possessive_filter_en_test.go │ │ ├── stemmer_en_snowball.go │ │ ├── stemmer_en_test.go │ │ ├── stop_filter_en.go │ │ └── stop_words_en.go │ ├── es │ │ ├── analyzer_es.go │ │ ├── analyzer_es_test.go │ │ ├── light_stemmer_es.go │ │ ├── spanish_normalize.go │ │ ├── spanish_normalize_test.go │ │ ├── stemmer_es_snowball.go │ │ ├── stemmer_es_snowball_test.go │ │ ├── stop_filter_es.go │ │ └── stop_words_es.go │ ├── eu │ │ ├── stop_filter_eu.go │ │ └── stop_words_eu.go │ ├── fa │ │ ├── analyzer_fa.go │ │ ├── analyzer_fa_test.go │ │ ├── persian_normalize.go │ │ ├── persian_normalize_test.go │ │ ├── stop_filter_fa.go │ │ └── stop_words_fa.go │ ├── fi │ │ ├── analyzer_fi.go │ │ ├── analyzer_fi_test.go │ │ ├── stemmer_fi.go │ │ ├── stop_filter_fi.go │ │ └── stop_words_fi.go │ ├── fr │ │ ├── analyzer_fr.go │ │ ├── analyzer_fr_test.go │ │ ├── articles_fr.go │ │ ├── elision_fr.go │ │ ├── elision_fr_test.go │ │ ├── light_stemmer_fr.go │ │ ├── light_stemmer_fr_test.go │ │ ├── minimal_stemmer_fr.go │ │ ├── minimal_stemmer_fr_test.go │ │ ├── stemmer_fr_snowball.go │ │ ├── stemmer_fr_snowball_test.go │ │ ├── stop_filter_fr.go │ │ └── stop_words_fr.go │ ├── ga │ │ ├── articles_ga.go │ │ ├── elision_ga.go │ │ ├── elision_ga_test.go │ │ ├── stop_filter_ga.go │ │ └── stop_words_ga.go │ ├── gl │ │ ├── stop_filter_gl.go │ │ └── stop_words_gl.go │ ├── hi │ │ ├── analyzer_hi.go │ │ ├── analyzer_hi_test.go │ │ ├── hindi_normalize.go │ │ ├── hindi_normalize_test.go │ │ ├── hindi_stemmer_filter.go │ │ ├── hindi_stemmer_filter_test.go │ │ ├── stop_filter_hi.go │ │ └── stop_words_hi.go │ ├── hr │ │ ├── analyzer_hr.go │ │ ├── analyzer_hr_test.go │ │ ├── stemmer_hr.go │ │ ├── stop_filter_hr.go │ │ ├── stop_words_hr.go │ │ └── suffix_transformation_hr.go │ ├── hu │ │ ├── analyzer_hu.go │ │ ├── analyzer_hu_test.go │ │ ├── stemmer_hu.go │ │ ├── stop_filter_hu.go │ │ └── stop_words_hu.go │ ├── hy │ │ ├── stop_filter_hy.go │ │ └── stop_words_hy.go │ ├── id │ │ ├── stop_filter_id.go │ │ └── stop_words_id.go │ ├── in │ │ ├── indic_normalize.go │ │ ├── indic_normalize_test.go │ │ └── scripts.go │ ├── it │ │ ├── analyzer_it.go │ │ ├── analyzer_it_test.go │ │ ├── articles_it.go │ │ ├── elision_it.go │ │ ├── elision_it_test.go │ │ ├── light_stemmer_it.go │ │ ├── light_stemmer_it_test.go │ │ ├── stemmer_it_snowball.go │ │ ├── stemmer_it_snowball_test.go │ │ ├── stop_filter_it.go │ │ └── stop_words_it.go │ ├── nl │ │ ├── analyzer_nl.go │ │ ├── analyzer_nl_test.go │ │ ├── stemmer_nl.go │ │ ├── stop_filter_nl.go │ │ └── stop_words_nl.go │ ├── no │ │ ├── analyzer_no.go │ │ ├── analyzer_no_test.go │ │ ├── stemmer_no.go │ │ ├── stop_filter_no.go │ │ └── stop_words_no.go │ ├── pl │ │ ├── analyzer_pl.go │ │ ├── analyzer_pl_test.go │ │ ├── stemmer_pl.go │ │ ├── stemmer_pl_test.go │ │ ├── stempel │ │ │ ├── LICENSE │ │ │ ├── cell.go │ │ │ ├── diff.go │ │ │ ├── diff_test.go │ │ │ ├── file.go │ │ │ ├── file_test.go │ │ │ ├── fuzz.go │ │ │ ├── javadata │ │ │ │ ├── README.md │ │ │ │ ├── fuzz.go │ │ │ │ ├── input.go │ │ │ │ └── input_test.go │ │ │ ├── multi_trie.go │ │ │ ├── pl │ │ │ │ ├── pl_PL.dic.gz │ │ │ │ └── stemmer_20000.tbl │ │ │ ├── row.go │ │ │ ├── strenum.go │ │ │ ├── strenum_test.go │ │ │ └── trie.go │ │ ├── stop_filter_pl.go │ │ └── stop_words_pl.go │ ├── pt │ │ ├── analyzer_pt.go │ │ ├── analyzer_pt_test.go │ │ ├── light_stemmer_pt.go │ │ ├── light_stemmer_pt_test.go │ │ ├── stop_filter_pt.go │ │ └── stop_words_pt.go │ ├── ro │ │ ├── analyzer_ro.go │ │ ├── analyzer_ro_test.go │ │ ├── stemmer_ro.go │ │ ├── stop_filter_ro.go │ │ └── stop_words_ro.go │ ├── ru │ │ ├── analyzer_ru.go │ │ ├── analyzer_ru_test.go │ │ ├── stemmer_ru.go │ │ ├── stemmer_ru_test.go │ │ ├── stop_filter_ru.go │ │ └── stop_words_ru.go │ ├── sv │ │ ├── analyzer_sv.go │ │ ├── analyzer_sv_test.go │ │ ├── stemmer_sv.go │ │ ├── stop_filter_sv.go │ │ └── stop_words_sv.go │ └── tr │ │ ├── analyzer_tr.go │ │ ├── analyzer_tr_test.go │ │ ├── stemmer_tr.go │ │ ├── stemmer_tr_test.go │ │ ├── stop_filter_tr.go │ │ └── stop_words_tr.go ├── test_words.txt ├── token │ ├── apostrophe │ │ ├── apostrophe.go │ │ └── apostrophe_test.go │ ├── camelcase │ │ ├── camelcase.go │ │ ├── camelcase_test.go │ │ ├── parser.go │ │ └── states.go │ ├── compound │ │ ├── dict.go │ │ └── dict_test.go │ ├── edgengram │ │ ├── edgengram.go │ │ └── edgengram_test.go │ ├── elision │ │ ├── elision.go │ │ └── elision_test.go │ ├── hierarchy │ │ ├── hierarchy.go │ │ └── hierarchy_test.go │ ├── keyword │ │ ├── keyword.go │ │ └── keyword_test.go │ ├── length │ │ ├── length.go │ │ └── length_test.go │ ├── lowercase │ │ ├── lowercase.go │ │ └── lowercase_test.go │ ├── ngram │ │ ├── ngram.go │ │ └── ngram_test.go │ ├── porter │ │ ├── porter.go │ │ └── porter_test.go │ ├── reverse │ │ ├── reverse.go │ │ └── reverse_test.go │ ├── shingle │ │ ├── shingle.go │ │ └── shingle_test.go │ ├── snowball │ │ ├── snowball.go │ │ └── snowball_test.go │ ├── stop │ │ ├── stop.go │ │ └── stop_test.go │ ├── truncate │ │ ├── truncate.go │ │ └── truncate_test.go │ ├── unicodenorm │ │ ├── unicodenorm.go │ │ └── unicodenorm_test.go │ └── unique │ │ ├── unique.go │ │ └── unique_test.go ├── tokenizer │ ├── character │ │ ├── character.go │ │ └── character_test.go │ ├── exception │ │ ├── exception.go │ │ └── exception_test.go │ ├── letter │ │ └── letter.go │ ├── regexp │ │ ├── regexp.go │ │ └── regexp_test.go │ ├── single │ │ ├── single.go │ │ └── single_test.go │ ├── unicode │ │ ├── unicode.go │ │ └── unicode_test.go │ ├── web │ │ ├── web.go │ │ └── web_test.go │ └── whitespace │ │ ├── whitespace.go │ │ └── whitespace_test.go ├── tokenmap.go ├── tokenmap │ └── custom.go ├── tokenmap_test.go ├── type.go ├── util.go └── util_test.go ├── builder.go ├── builder_test.go ├── cmd └── bleve │ ├── .gitignore │ ├── cmd │ ├── bulk.go │ ├── check.go │ ├── count.go │ ├── create.go │ ├── dictionary.go │ ├── dump.go │ ├── dumpDoc.go │ ├── dumpFields.go │ ├── fields.go │ ├── index.go │ ├── mapping.go │ ├── query.go │ ├── registry.go │ ├── root.go │ ├── scorch.go │ └── scorch │ │ ├── ascii.go │ │ ├── deleted.go │ │ ├── info.go │ │ ├── internal.go │ │ ├── root.go │ │ └── snapshot.go │ ├── gendocs.go │ ├── main.go │ └── vendor │ ├── github.com │ ├── inconshreveable │ │ └── mousetrap │ │ │ ├── LICENSE │ │ │ ├── trap_others.go │ │ │ ├── trap_windows.go │ │ │ └── trap_windows_1.4.go │ └── spf13 │ │ ├── cobra │ │ ├── LICENSE.txt │ │ ├── bash_completions.go │ │ ├── cobra.go │ │ ├── command.go │ │ ├── command_notwin.go │ │ └── command_win.go │ │ └── pflag │ │ ├── LICENSE │ │ ├── bool.go │ │ ├── bool_slice.go │ │ ├── count.go │ │ ├── duration.go │ │ ├── flag.go │ │ ├── float32.go │ │ ├── float64.go │ │ ├── golangflag.go │ │ ├── int.go │ │ ├── int32.go │ │ ├── int64.go │ │ ├── int8.go │ │ ├── int_slice.go │ │ ├── ip.go │ │ ├── ip_slice.go │ │ ├── ipmask.go │ │ ├── ipnet.go │ │ ├── string.go │ │ ├── string_array.go │ │ ├── string_slice.go │ │ ├── uint.go │ │ ├── uint16.go │ │ ├── uint32.go │ │ ├── uint64.go │ │ ├── uint8.go │ │ └── uint_slice.go │ └── manifest ├── config.go ├── config ├── README.md └── config.go ├── config_app.go ├── config_disk.go ├── data └── test │ └── sample-data.json ├── doc.go ├── docs ├── bleve.png ├── geo.md ├── scoring.md ├── sort_facet.md ├── sort_facet_supporting_docs │ ├── indexSizeVsNumDocs.png │ └── queryTimevsNumDocs.png ├── synonyms.md └── vectors.md ├── document ├── document.go ├── document_test.go ├── field.go ├── field_boolean.go ├── field_composite.go ├── field_datetime.go ├── field_geopoint.go ├── field_geopoint_test.go ├── field_geoshape.go ├── field_ip.go ├── field_ip_test.go ├── field_numeric.go ├── field_numeric_test.go ├── field_synonym.go ├── field_text.go ├── field_vector.go ├── field_vector_base64.go └── field_vector_base64_test.go ├── error.go ├── examples_test.go ├── geo ├── README.md ├── benchmark_geohash_test.go ├── geo.go ├── geo_dist.go ├── geo_dist_test.go ├── geo_s2plugin_impl.go ├── geo_test.go ├── geohash.go ├── geohash_test.go ├── parse.go ├── parse_test.go ├── sloppy.go └── versus_test.go ├── go.mod ├── go.sum ├── index.go ├── index ├── scorch │ ├── README.md │ ├── builder.go │ ├── builder_test.go │ ├── empty.go │ ├── event.go │ ├── event_test.go │ ├── field_dict_test.go │ ├── int.go │ ├── int_test.go │ ├── introducer.go │ ├── merge.go │ ├── merge_test.go │ ├── mergeplan │ │ ├── merge_plan.go │ │ ├── merge_plan_test.go │ │ └── sort.go │ ├── optimize.go │ ├── optimize_knn.go │ ├── persister.go │ ├── reader_test.go │ ├── regexp.go │ ├── regexp_test.go │ ├── rollback.go │ ├── rollback_test.go │ ├── scorch.go │ ├── scorch_test.go │ ├── segment_plugin.go │ ├── snapshot_index.go │ ├── snapshot_index_dict.go │ ├── snapshot_index_doc.go │ ├── snapshot_index_str.go │ ├── snapshot_index_test.go │ ├── snapshot_index_tfr.go │ ├── snapshot_index_thes.go │ ├── snapshot_index_vr.go │ ├── snapshot_segment.go │ ├── snapshot_vector_index.go │ ├── stats.go │ └── unadorned.go └── upsidedown │ ├── analysis.go │ ├── analysis_test.go │ ├── benchmark_all.sh │ ├── benchmark_boltdb_test.go │ ├── benchmark_common_test.go │ ├── benchmark_gtreap_test.go │ ├── benchmark_null_test.go │ ├── dump.go │ ├── dump_test.go │ ├── field_cache.go │ ├── field_dict.go │ ├── field_dict_test.go │ ├── index_reader.go │ ├── reader.go │ ├── reader_test.go │ ├── row.go │ ├── row_merge.go │ ├── row_merge_test.go │ ├── row_test.go │ ├── stats.go │ ├── store │ ├── boltdb │ │ ├── iterator.go │ │ ├── reader.go │ │ ├── stats.go │ │ ├── store.go │ │ ├── store_test.go │ │ └── writer.go │ ├── goleveldb │ │ ├── batch.go │ │ ├── config.go │ │ ├── iterator.go │ │ ├── reader.go │ │ ├── store.go │ │ ├── store_test.go │ │ └── writer.go │ ├── gtreap │ │ ├── iterator.go │ │ ├── reader.go │ │ ├── store.go │ │ ├── store_test.go │ │ └── writer.go │ ├── metrics │ │ ├── batch.go │ │ ├── iterator.go │ │ ├── metrics_test.go │ │ ├── reader.go │ │ ├── stats.go │ │ ├── store.go │ │ ├── store_test.go │ │ ├── util.go │ │ └── writer.go │ ├── moss │ │ ├── batch.go │ │ ├── iterator.go │ │ ├── lower.go │ │ ├── lower_test.go │ │ ├── reader.go │ │ ├── stats.go │ │ ├── store.go │ │ ├── store_test.go │ │ └── writer.go │ └── null │ │ ├── null.go │ │ └── null_test.go │ ├── upsidedown.go │ ├── upsidedown.pb.go │ ├── upsidedown.proto │ └── upsidedown_test.go ├── index_alias.go ├── index_alias_impl.go ├── index_alias_impl_test.go ├── index_impl.go ├── index_meta.go ├── index_meta_test.go ├── index_stats.go ├── index_test.go ├── mapping.go ├── mapping ├── analysis.go ├── document.go ├── examples_test.go ├── field.go ├── index.go ├── mapping.go ├── mapping_no_vectors.go ├── mapping_test.go ├── mapping_vectors.go ├── mapping_vectors_test.go ├── reflect.go ├── reflect_test.go └── synonym.go ├── mapping_vector.go ├── numeric ├── bin.go ├── bin_test.go ├── float.go ├── float_test.go ├── prefix_coded.go └── prefix_coded_test.go ├── pre_search.go ├── query.go ├── query_bench_test.go ├── registry ├── analyzer.go ├── cache.go ├── char_filter.go ├── datetime_parser.go ├── fragment_formatter.go ├── fragmenter.go ├── highlighter.go ├── index_type.go ├── registry.go ├── store.go ├── synonym_source.go ├── token_filter.go ├── token_maps.go └── tokenizer.go ├── scripts ├── build_children.sh ├── merge-coverprofile.go ├── old_build_script.txt └── project-code-coverage.sh ├── search.go ├── search ├── collector.go ├── collector │ ├── bench_test.go │ ├── eligible.go │ ├── heap.go │ ├── knn.go │ ├── list.go │ ├── search_test.go │ ├── slice.go │ ├── topn.go │ └── topn_test.go ├── explanation.go ├── facet │ ├── benchmark_data.txt │ ├── facet_builder_datetime.go │ ├── facet_builder_numeric.go │ ├── facet_builder_numeric_test.go │ ├── facet_builder_terms.go │ └── facet_builder_terms_test.go ├── facets_builder.go ├── facets_builder_test.go ├── highlight │ ├── format │ │ ├── ansi │ │ │ └── ansi.go │ │ ├── html │ │ │ ├── html.go │ │ │ └── html_test.go │ │ └── plain │ │ │ ├── plain.go │ │ │ └── plain_test.go │ ├── fragmenter │ │ └── simple │ │ │ ├── simple.go │ │ │ └── simple_test.go │ ├── highlighter.go │ ├── highlighter │ │ ├── ansi │ │ │ └── ansi.go │ │ ├── html │ │ │ └── html.go │ │ └── simple │ │ │ ├── fragment_scorer_simple.go │ │ │ ├── fragment_scorer_simple_test.go │ │ │ ├── highlighter_simple.go │ │ │ └── highlighter_simple_test.go │ ├── term_locations.go │ └── term_locations_test.go ├── levenshtein.go ├── levenshtein_test.go ├── pool.go ├── pool_test.go ├── query │ ├── bool_field.go │ ├── boolean.go │ ├── boost.go │ ├── conjunction.go │ ├── date_range.go │ ├── date_range_string.go │ ├── date_range_test.go │ ├── disjunction.go │ ├── docid.go │ ├── fuzzy.go │ ├── geo_boundingbox.go │ ├── geo_boundingpolygon.go │ ├── geo_distance.go │ ├── geo_shape.go │ ├── ip_range.go │ ├── knn.go │ ├── match.go │ ├── match_all.go │ ├── match_none.go │ ├── match_phrase.go │ ├── match_phrase_test.go │ ├── multi_phrase.go │ ├── numeric_range.go │ ├── phrase.go │ ├── prefix.go │ ├── query.go │ ├── query_string.go │ ├── query_string.y │ ├── query_string.y.go │ ├── query_string_lex.go │ ├── query_string_lex_test.go │ ├── query_string_parser.go │ ├── query_string_parser_test.go │ ├── query_test.go │ ├── regexp.go │ ├── term.go │ ├── term_range.go │ └── wildcard.go ├── scorer │ ├── scorer_conjunction.go │ ├── scorer_constant.go │ ├── scorer_constant_test.go │ ├── scorer_disjunction.go │ ├── scorer_knn.go │ ├── scorer_knn_test.go │ ├── scorer_term.go │ ├── scorer_term_test.go │ └── sqrt_cache.go ├── search.go ├── search_test.go ├── searcher │ ├── base_test.go │ ├── geoshape_contains_test.go │ ├── geoshape_intersects_test.go │ ├── geoshape_within_test.go │ ├── optimize_knn.go │ ├── optimize_no_knn.go │ ├── ordered_searchers_list.go │ ├── search_boolean.go │ ├── search_boolean_test.go │ ├── search_conjunction.go │ ├── search_conjunction_test.go │ ├── search_disjunction.go │ ├── search_disjunction_heap.go │ ├── search_disjunction_slice.go │ ├── search_disjunction_test.go │ ├── search_docid.go │ ├── search_docid_test.go │ ├── search_filter.go │ ├── search_fuzzy.go │ ├── search_fuzzy_test.go │ ├── search_geoboundingbox.go │ ├── search_geoboundingbox_test.go │ ├── search_geopointdistance.go │ ├── search_geopointdistance_test.go │ ├── search_geopolygon.go │ ├── search_geopolygon_test.go │ ├── search_geoshape.go │ ├── search_geoshape_circle_test.go │ ├── search_geoshape_envelope_test.go │ ├── search_geoshape_geometrycollection_test.go │ ├── search_geoshape_linestring_test.go │ ├── search_geoshape_points_test.go │ ├── search_geoshape_polygon_test.go │ ├── search_ip_range.go │ ├── search_ip_range_test.go │ ├── search_knn.go │ ├── search_match_all.go │ ├── search_match_all_test.go │ ├── search_match_none.go │ ├── search_match_none_test.go │ ├── search_multi_term.go │ ├── search_numeric_range.go │ ├── search_numeric_range_test.go │ ├── search_phrase.go │ ├── search_phrase_test.go │ ├── search_regexp.go │ ├── search_regexp_test.go │ ├── search_term.go │ ├── search_term_prefix.go │ ├── search_term_range.go │ ├── search_term_range_test.go │ └── search_term_test.go ├── sort.go ├── sort_test.go ├── util.go └── util_test.go ├── search_knn.go ├── search_knn_test.go ├── search_no_knn.go ├── search_test.go ├── size └── sizes.go ├── test ├── integration.go ├── integration_test.go ├── ip_field_test.go ├── knn │ └── knn_dataset_queries.zip ├── tests │ ├── alias │ │ ├── datasets │ │ │ ├── shard0 │ │ │ │ ├── a.json │ │ │ │ └── c.json │ │ │ └── shard1 │ │ │ │ ├── b.json │ │ │ │ └── d.json │ │ ├── mapping.json │ │ └── searches.json │ ├── basic │ │ ├── data │ │ │ ├── a.json │ │ │ ├── b.json │ │ │ ├── c.json │ │ │ └── d.json │ │ ├── mapping.json │ │ └── searches.json │ ├── employee │ │ ├── data │ │ │ └── emp10508560.json │ │ ├── mapping.json │ │ └── searches.json │ ├── facet │ │ ├── data │ │ │ ├── a.json │ │ │ ├── b.json │ │ │ ├── c.json │ │ │ ├── d.json │ │ │ ├── e.json │ │ │ ├── f.json │ │ │ ├── g.json │ │ │ ├── h.json │ │ │ ├── i.json │ │ │ └── j.json │ │ ├── mapping.json │ │ └── searches.json │ ├── fosdem │ │ ├── data │ │ │ ├── 3311@FOSDEM15@fosdem.org.json │ │ │ ├── 3492@FOSDEM15@fosdem.org.json │ │ │ ├── 3496@FOSDEM15@fosdem.org.json │ │ │ ├── 3505@FOSDEM15@fosdem.org.json │ │ │ └── 3507@FOSDEM15@fosdem.org.json │ │ ├── mapping.json │ │ └── searches.json │ ├── geo │ │ ├── data │ │ │ ├── amoeba_brewery.json │ │ │ ├── brewpub_on_the_green.json │ │ │ ├── capital_city_brewing_company.json │ │ │ ├── communiti_brewery.json │ │ │ ├── firehouse_grill_brewery.json │ │ │ ├── hook_ladder_brewing_company.json │ │ │ ├── jack_s_brewing.json │ │ │ ├── social_brewery.json │ │ │ └── sweet_water_tavern_and_brewery.json │ │ ├── mapping.json │ │ └── searches.json │ ├── geoshapes │ │ ├── data │ │ │ ├── circle_halairport.json │ │ │ ├── envelope_brockwell_park.json │ │ │ ├── geometrycollection_tvm.json │ │ │ ├── linestring_putney_bridge.json │ │ │ ├── multilinestring_old_airport_road.json │ │ │ ├── multipoint_blr_stadiums.json │ │ │ ├── multipolygon_london_parks.json │ │ │ ├── point_museum_of_london.json │ │ │ └── polygon_cubbonpark.json │ │ ├── mapping.json │ │ └── searches.json │ ├── phrase │ │ ├── data │ │ │ ├── a.json │ │ │ └── b.json │ │ ├── mapping.json │ │ └── searches.json │ └── sort │ │ ├── data │ │ ├── a.json │ │ ├── b.json │ │ ├── c.json │ │ ├── d.json │ │ ├── e.json │ │ └── f.json │ │ ├── mapping.json │ │ └── searches.json ├── versus_score_test.go └── versus_test.go └── util ├── extract.go └── json.go /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | pull_request: 6 | name: Tests 7 | jobs: 8 | test: 9 | strategy: 10 | matrix: 11 | go-version: [1.22.x, 1.23.x, 1.24.x] 12 | platform: [ubuntu-latest, macos-latest, windows-latest] 13 | runs-on: ${{ matrix.platform }} 14 | steps: 15 | - name: Install Go 16 | uses: actions/setup-go@v1 17 | with: 18 | go-version: ${{ matrix.go-version }} 19 | - name: Checkout code 20 | uses: actions/checkout@v2 21 | - name: Test 22 | run: | 23 | go version 24 | go test -race ./... 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #* 2 | *.sublime-* 3 | *~ 4 | .#* 5 | .project 6 | .settings 7 | **/.idea/ 8 | **/*.iml 9 | .DS_Store 10 | query_string.y.go.tmp 11 | /analysis/token_filters/cld2/cld2-read-only 12 | /analysis/token_filters/cld2/libcld2_full.a 13 | /cmd/bleve/bleve 14 | vendor/** 15 | !vendor/manifest 16 | /y.output 17 | /search/query/y.output 18 | *.test 19 | tags 20 | go.sum 21 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | 3 | language: go 4 | 5 | go: 6 | - "1.21.x" 7 | - "1.22.x" 8 | - "1.23.x" 9 | 10 | script: 11 | - go get golang.org/x/tools/cmd/cover 12 | - go get github.com/mattn/goveralls 13 | - go get github.com/kisielk/errcheck 14 | - go get -u github.com/FiloSottile/gvt 15 | - gvt restore 16 | - go test -race -v $(go list ./... | grep -v vendor/) 17 | - go vet $(go list ./... | grep -v vendor/) 18 | - go test ./test -v -indexType scorch 19 | - errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/); 20 | - scripts/project-code-coverage.sh 21 | - scripts/build_children.sh 22 | 23 | notifications: 24 | email: 25 | - fts-team@couchbase.com 26 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Bleve 2 | 3 | We look forward to your contributions, but ask that you first review these guidelines. 4 | 5 | ## Sign the CLA 6 | 7 | As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements. 8 | 9 | ## Submitting a Pull Request 10 | 11 | All types of contributions are welcome, but please keep the following in mind: 12 | 13 | - If you're planning a large change, you should really discuss it in a github issue or on the google group first. This helps avoid duplicate effort and spending time on something that may not be merged. 14 | - Existing tests should continue to pass, new tests for the contribution are nice to have. 15 | - All code should have gone through `go fmt` 16 | - All code should pass `go vet` 17 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | We support the latest release (for example, bleve v2.5.x). 6 | 7 | ## Reporting a Vulnerability 8 | 9 | All security issues for this project should be reported via email to [security@couchbase.com](mailto:security@couchbase.com) and [fts-team@couchbase.com](mailto:fts-team@couchbase.com). 10 | 11 | This mail will be delivered to the owners of this project. 12 | 13 | - To ensure your report is NOT marked as spam, please include the word "security/vulnerability" along with the project name (blevesearch/bleve) in the subject of the email. 14 | - Please be as descriptive as possible while explaining the issue, and a testcase highlighting the issue is always welcome. 15 | 16 | Your email will be acknowledged at the soonest possible. 17 | -------------------------------------------------------------------------------- /analysis/analyzer/keyword/keyword.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package keyword 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/tokenizer/single" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | const Name = "keyword" 24 | 25 | func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) { 26 | keywordTokenizer, err := cache.TokenizerNamed(single.Name) 27 | if err != nil { 28 | return nil, err 29 | } 30 | rv := analysis.DefaultAnalyzer{ 31 | Tokenizer: keywordTokenizer, 32 | } 33 | return &rv, nil 34 | } 35 | 36 | func init() { 37 | err := registry.RegisterAnalyzer(Name, AnalyzerConstructor) 38 | if err != nil { 39 | panic(err) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /analysis/analyzer/simple/simple.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package simple 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 20 | "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" 21 | "github.com/blevesearch/bleve/v2/registry" 22 | ) 23 | 24 | const Name = "simple" 25 | 26 | func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) { 27 | tokenizer, err := cache.TokenizerNamed(letter.Name) 28 | if err != nil { 29 | return nil, err 30 | } 31 | toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) 32 | if err != nil { 33 | return nil, err 34 | } 35 | rv := analysis.DefaultAnalyzer{ 36 | Tokenizer: tokenizer, 37 | TokenFilters: []analysis.TokenFilter{ 38 | toLowerFilter, 39 | }, 40 | } 41 | return &rv, nil 42 | } 43 | 44 | func init() { 45 | err := registry.RegisterAnalyzer(Name, AnalyzerConstructor) 46 | if err != nil { 47 | panic(err) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /analysis/char/html/html.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package html 16 | 17 | import ( 18 | "bytes" 19 | "regexp" 20 | 21 | "github.com/blevesearch/bleve/v2/analysis" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | const Name = "html" 26 | 27 | var htmlCharFilterRegexp = regexp.MustCompile(`\s]+))?)+\s*|\s*)/?>`) 28 | 29 | type CharFilter struct { 30 | r *regexp.Regexp 31 | replacement []byte 32 | } 33 | 34 | func New() *CharFilter { 35 | return &CharFilter{ 36 | r: htmlCharFilterRegexp, 37 | replacement: []byte(" "), 38 | } 39 | } 40 | 41 | func (s *CharFilter) Filter(input []byte) []byte { 42 | return s.r.ReplaceAllFunc( 43 | input, func(in []byte) []byte { 44 | return bytes.Repeat(s.replacement, len(in)) 45 | }) 46 | } 47 | 48 | func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) { 49 | return New(), nil 50 | } 51 | 52 | func init() { 53 | err := registry.RegisterCharFilter(Name, CharFilterConstructor) 54 | if err != nil { 55 | panic(err) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /analysis/char/zerowidthnonjoiner/zerowidthnonjoiner.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package zerowidthnonjoiner 16 | 17 | import ( 18 | "regexp" 19 | 20 | "github.com/blevesearch/bleve/v2/analysis" 21 | regexpCharFilter "github.com/blevesearch/bleve/v2/analysis/char/regexp" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | const Name = "zero_width_spaces" 26 | 27 | var zeroWidthNonJoinerRegexp = regexp.MustCompile(`\x{200C}`) 28 | 29 | func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) { 30 | replaceBytes := []byte(" ") 31 | return regexpCharFilter.New(zeroWidthNonJoinerRegexp, replaceBytes), nil 32 | } 33 | 34 | func init() { 35 | err := registry.RegisterCharFilter(Name, CharFilterConstructor) 36 | if err != nil { 37 | panic(err) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /analysis/datetime/optional/optional.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package optional 16 | 17 | import ( 18 | "time" 19 | 20 | "github.com/blevesearch/bleve/v2/analysis" 21 | "github.com/blevesearch/bleve/v2/analysis/datetime/flexible" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | const Name = "dateTimeOptional" 26 | 27 | const rfc3339NoTimezone = "2006-01-02T15:04:05" 28 | const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05" 29 | const rfc3339Offset = "2006-01-02 15:04:05 -0700" 30 | const rfc3339NoTime = "2006-01-02" 31 | 32 | var layouts = []string{ 33 | time.RFC3339Nano, 34 | time.RFC3339, 35 | rfc3339NoTimezone, 36 | rfc3339NoTimezoneNoT, 37 | rfc3339Offset, 38 | rfc3339NoTime, 39 | } 40 | 41 | func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) { 42 | return flexible.New(layouts), nil 43 | } 44 | 45 | func init() { 46 | err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor) 47 | if err != nil { 48 | panic(err) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /analysis/freq_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package analysis 16 | 17 | import ( 18 | index "github.com/blevesearch/bleve_index_api" 19 | "reflect" 20 | "testing" 21 | ) 22 | 23 | func TestTokenFrequency(t *testing.T) { 24 | tokens := TokenStream{ 25 | &Token{ 26 | Term: []byte("water"), 27 | Position: 1, 28 | Start: 0, 29 | End: 5, 30 | }, 31 | &Token{ 32 | Term: []byte("water"), 33 | Position: 2, 34 | Start: 6, 35 | End: 11, 36 | }, 37 | } 38 | expectedResult := index.TokenFrequencies{ 39 | "water": &index.TokenFreq{ 40 | Term: []byte("water"), 41 | Locations: []*index.TokenLocation{ 42 | { 43 | Position: 1, 44 | Start: 0, 45 | End: 5, 46 | }, 47 | { 48 | Position: 2, 49 | Start: 6, 50 | End: 11, 51 | }, 52 | }, 53 | }, 54 | } 55 | expectedResult["water"].SetFrequency(2) 56 | result := TokenFrequency(tokens, nil, index.IncludeTermVectors) 57 | if !reflect.DeepEqual(result, expectedResult) { 58 | t.Errorf("expected %#v, got %#v", expectedResult, result) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /analysis/lang/ar/stop_filter_ar.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ar 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/bg/stop_filter_bg.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package bg 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/ca/articles_ca.go: -------------------------------------------------------------------------------- 1 | package ca 2 | 3 | import ( 4 | "github.com/blevesearch/bleve/v2/analysis" 5 | "github.com/blevesearch/bleve/v2/registry" 6 | ) 7 | 8 | const ArticlesName = "articles_ca" 9 | 10 | // this content was obtained from: 11 | // lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis 12 | 13 | var CatalanArticles = []byte(` 14 | d 15 | l 16 | m 17 | n 18 | s 19 | t 20 | `) 21 | 22 | func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { 23 | rv := analysis.NewTokenMap() 24 | err := rv.LoadBytes(CatalanArticles) 25 | return rv, err 26 | } 27 | 28 | func init() { 29 | err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor) 30 | if err != nil { 31 | panic(err) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /analysis/lang/ca/elision_ca.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ca 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/blevesearch/bleve/v2/analysis" 21 | "github.com/blevesearch/bleve/v2/analysis/token/elision" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | const ElisionName = "elision_ca" 26 | 27 | func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 28 | articlesTokenMap, err := cache.TokenMapNamed(ArticlesName) 29 | if err != nil { 30 | return nil, fmt.Errorf("error building elision filter: %v", err) 31 | } 32 | return elision.NewElisionFilter(articlesTokenMap), nil 33 | } 34 | 35 | func init() { 36 | err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor) 37 | if err != nil { 38 | panic(err) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /analysis/lang/ca/stop_filter_ca.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ca 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/ckb/stop_filter_ckb.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ckb 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/cs/stop_filter_cs.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cs 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/da/stemmer_da.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package da 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/danish" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_da_snowball" 26 | 27 | type DanishStemmerFilter struct { 28 | } 29 | 30 | func NewDanishStemmerFilter() *DanishStemmerFilter { 31 | return &DanishStemmerFilter{} 32 | } 33 | 34 | func (s *DanishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | danish.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func DanishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewDanishStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, DanishStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/da/stop_filter_da.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package da 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/de/stemmer_de_snowball.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package de 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/german" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_de_snowball" 26 | 27 | type GermanStemmerFilter struct { 28 | } 29 | 30 | func NewGermanStemmerFilter() *GermanStemmerFilter { 31 | return &GermanStemmerFilter{} 32 | } 33 | 34 | func (s *GermanStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | german.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func GermanStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewGermanStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, GermanStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/de/stop_filter_de.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2017 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package de 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/el/stop_filter_el.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package el 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/el/stop_words_el.go: -------------------------------------------------------------------------------- 1 | package el 2 | 3 | import ( 4 | "github.com/blevesearch/bleve/v2/analysis" 5 | "github.com/blevesearch/bleve/v2/registry" 6 | ) 7 | 8 | const StopName = "stop_el" 9 | 10 | // this content was obtained from: 11 | // lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/ 12 | // ` was changed to ' to allow for literal string 13 | 14 | var GreekStopWords = []byte(`# Lucene Greek Stopwords list 15 | # Note: by default this file is used after GreekLowerCaseFilter, 16 | # so when modifying this file use 'σ' instead of 'ς' 17 | ο 18 | η 19 | το 20 | οι 21 | τα 22 | του 23 | τησ 24 | των 25 | τον 26 | την 27 | και 28 | κι 29 | κ 30 | ειμαι 31 | εισαι 32 | ειναι 33 | ειμαστε 34 | ειστε 35 | στο 36 | στον 37 | στη 38 | στην 39 | μα 40 | αλλα 41 | απο 42 | για 43 | προσ 44 | με 45 | σε 46 | ωσ 47 | παρα 48 | αντι 49 | κατα 50 | μετα 51 | θα 52 | να 53 | δε 54 | δεν 55 | μη 56 | μην 57 | επι 58 | ενω 59 | εαν 60 | αν 61 | τοτε 62 | που 63 | πωσ 64 | ποιοσ 65 | ποια 66 | ποιο 67 | ποιοι 68 | ποιεσ 69 | ποιων 70 | ποιουσ 71 | αυτοσ 72 | αυτη 73 | αυτο 74 | αυτοι 75 | αυτων 76 | αυτουσ 77 | αυτεσ 78 | αυτα 79 | εκεινοσ 80 | εκεινη 81 | εκεινο 82 | εκεινοι 83 | εκεινεσ 84 | εκεινα 85 | εκεινων 86 | εκεινουσ 87 | οπωσ 88 | ομωσ 89 | ισωσ 90 | οσο 91 | οτι 92 | `) 93 | 94 | func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { 95 | rv := analysis.NewTokenMap() 96 | err := rv.LoadBytes(GreekStopWords) 97 | return rv, err 98 | } 99 | 100 | func init() { 101 | err := registry.RegisterTokenMap(StopName, TokenMapConstructor) 102 | if err != nil { 103 | panic(err) 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /analysis/lang/en/plural_stemmer_test.go: -------------------------------------------------------------------------------- 1 | package en 2 | 3 | import "testing" 4 | 5 | func TestEnglishPluralStemmer(t *testing.T) { 6 | data := []struct { 7 | In, Out string 8 | }{ 9 | {"dresses", "dress"}, 10 | {"dress", "dress"}, 11 | {"axes", "axe"}, 12 | {"ad", "ad"}, 13 | {"ads", "ad"}, 14 | {"gas", "ga"}, 15 | {"sass", "sass"}, 16 | {"berries", "berry"}, 17 | {"dresses", "dress"}, 18 | {"spies", "spy"}, 19 | {"shoes", "shoe"}, 20 | {"headaches", "headache"}, 21 | {"computer", "computer"}, 22 | {"dressing", "dressing"}, 23 | {"clothes", "clothe"}, 24 | {"DRESSES", "dress"}, 25 | {"frog", "frog"}, 26 | {"dress", "dress"}, 27 | {"runs", "run"}, 28 | {"pies", "pie"}, 29 | {"foxes", "fox"}, 30 | {"axes", "axe"}, 31 | {"foes", "fo"}, 32 | {"dishes", "dish"}, 33 | {"snitches", "snitch"}, 34 | {"cliches", "cliche"}, 35 | {"forests", "forest"}, 36 | {"yes", "ye"}, 37 | } 38 | 39 | for _, datum := range data { 40 | stemmed := stem(datum.In) 41 | 42 | if stemmed != datum.Out { 43 | t.Errorf("expected %v but got %v", datum.Out, stemmed) 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /analysis/lang/en/stemmer_en_snowball.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package en 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/english" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_en_snowball" 26 | 27 | type EnglishStemmerFilter struct { 28 | } 29 | 30 | func NewEnglishStemmerFilter() *EnglishStemmerFilter { 31 | return &EnglishStemmerFilter{} 32 | } 33 | 34 | func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | english.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewEnglishStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/en/stop_filter_en.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package en 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/es/stemmer_es_snowball.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package es 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/spanish" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_es_snowball" 26 | 27 | type SpanishStemmerFilter struct { 28 | } 29 | 30 | func NewSpanishStemmerFilter() *SpanishStemmerFilter { 31 | return &SpanishStemmerFilter{} 32 | } 33 | 34 | func (s *SpanishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | spanish.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func SpanishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewSpanishStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, SpanishStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/es/stop_filter_es.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2017 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package es 15 | 16 | import ( 17 | "github.com/blevesearch/bleve/v2/analysis" 18 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | ) 21 | 22 | func StopTokenFilterConstructor(config map[string]interface{}, 23 | cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/eu/stop_filter_eu.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package eu 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/fa/stop_filter_fa.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fa 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/fi/stemmer_fi.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fi 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/finnish" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_fi_snowball" 26 | 27 | type FinnishStemmerFilter struct { 28 | } 29 | 30 | func NewFinnishStemmerFilter() *FinnishStemmerFilter { 31 | return &FinnishStemmerFilter{} 32 | } 33 | 34 | func (s *FinnishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | finnish.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func FinnishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewFinnishStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, FinnishStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/fi/stop_filter_fi.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fi 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/fr/articles_fr.go: -------------------------------------------------------------------------------- 1 | package fr 2 | 3 | import ( 4 | "github.com/blevesearch/bleve/v2/analysis" 5 | "github.com/blevesearch/bleve/v2/registry" 6 | ) 7 | 8 | const ArticlesName = "articles_fr" 9 | 10 | // this content was obtained from: 11 | // lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis 12 | 13 | var FrenchArticles = []byte(` 14 | l 15 | m 16 | t 17 | qu 18 | n 19 | s 20 | j 21 | d 22 | c 23 | jusqu 24 | quoiqu 25 | lorsqu 26 | puisqu 27 | `) 28 | 29 | func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { 30 | rv := analysis.NewTokenMap() 31 | err := rv.LoadBytes(FrenchArticles) 32 | return rv, err 33 | } 34 | 35 | func init() { 36 | err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor) 37 | if err != nil { 38 | panic(err) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /analysis/lang/fr/elision_fr.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fr 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/blevesearch/bleve/v2/analysis" 21 | "github.com/blevesearch/bleve/v2/analysis/token/elision" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | const ElisionName = "elision_fr" 26 | 27 | func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 28 | articlesTokenMap, err := cache.TokenMapNamed(ArticlesName) 29 | if err != nil { 30 | return nil, fmt.Errorf("error building elision filter: %v", err) 31 | } 32 | return elision.NewElisionFilter(articlesTokenMap), nil 33 | } 34 | 35 | func init() { 36 | err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor) 37 | if err != nil { 38 | panic(err) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /analysis/lang/fr/elision_fr_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fr 16 | 17 | import ( 18 | "reflect" 19 | "testing" 20 | 21 | "github.com/blevesearch/bleve/v2/analysis" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | func TestFrenchElision(t *testing.T) { 26 | tests := []struct { 27 | input analysis.TokenStream 28 | output analysis.TokenStream 29 | }{ 30 | { 31 | input: analysis.TokenStream{ 32 | &analysis.Token{ 33 | Term: []byte("l'avion"), 34 | }, 35 | }, 36 | output: analysis.TokenStream{ 37 | &analysis.Token{ 38 | Term: []byte("avion"), 39 | }, 40 | }, 41 | }, 42 | } 43 | 44 | cache := registry.NewCache() 45 | elisionFilter, err := cache.TokenFilterNamed(ElisionName) 46 | if err != nil { 47 | t.Fatal(err) 48 | } 49 | for _, test := range tests { 50 | actual := elisionFilter.Filter(test.input) 51 | if !reflect.DeepEqual(actual, test.output) { 52 | t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term) 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /analysis/lang/fr/stemmer_fr_snowball.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fr 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/french" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_fr_snowball" 26 | 27 | type FrenchStemmerFilter struct { 28 | } 29 | 30 | func NewFrenchStemmerFilter() *FrenchStemmerFilter { 31 | return &FrenchStemmerFilter{} 32 | } 33 | 34 | func (s *FrenchStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | french.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func FrenchStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewFrenchStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, FrenchStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/fr/stop_filter_fr.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package fr 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/ga/articles_ga.go: -------------------------------------------------------------------------------- 1 | package ga 2 | 3 | import ( 4 | "github.com/blevesearch/bleve/v2/analysis" 5 | "github.com/blevesearch/bleve/v2/registry" 6 | ) 7 | 8 | const ArticlesName = "articles_ga" 9 | 10 | // this content was obtained from: 11 | // lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis 12 | 13 | var IrishArticles = []byte(` 14 | d 15 | m 16 | b 17 | `) 18 | 19 | func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { 20 | rv := analysis.NewTokenMap() 21 | err := rv.LoadBytes(IrishArticles) 22 | return rv, err 23 | } 24 | 25 | func init() { 26 | err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor) 27 | if err != nil { 28 | panic(err) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /analysis/lang/ga/elision_ga.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ga 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/blevesearch/bleve/v2/analysis" 21 | "github.com/blevesearch/bleve/v2/analysis/token/elision" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | const ElisionName = "elision_ga" 26 | 27 | func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 28 | articlesTokenMap, err := cache.TokenMapNamed(ArticlesName) 29 | if err != nil { 30 | return nil, fmt.Errorf("error building elision filter: %v", err) 31 | } 32 | return elision.NewElisionFilter(articlesTokenMap), nil 33 | } 34 | 35 | func init() { 36 | err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor) 37 | if err != nil { 38 | panic(err) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /analysis/lang/ga/elision_ga_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ga 16 | 17 | import ( 18 | "reflect" 19 | "testing" 20 | 21 | "github.com/blevesearch/bleve/v2/analysis" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | func TestFrenchElision(t *testing.T) { 26 | tests := []struct { 27 | input analysis.TokenStream 28 | output analysis.TokenStream 29 | }{ 30 | { 31 | input: analysis.TokenStream{ 32 | &analysis.Token{ 33 | Term: []byte("b'fhearr"), 34 | }, 35 | }, 36 | output: analysis.TokenStream{ 37 | &analysis.Token{ 38 | Term: []byte("fhearr"), 39 | }, 40 | }, 41 | }, 42 | } 43 | 44 | cache := registry.NewCache() 45 | elisionFilter, err := cache.TokenFilterNamed(ElisionName) 46 | if err != nil { 47 | t.Fatal(err) 48 | } 49 | for _, test := range tests { 50 | actual := elisionFilter.Filter(test.input) 51 | if !reflect.DeepEqual(actual, test.output) { 52 | t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term) 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /analysis/lang/ga/stop_filter_ga.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ga 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/gl/stop_filter_gl.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package gl 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/hi/stop_filter_hi.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package hi 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/hr/stop_filter_hr.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package hr 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/hu/stemmer_hu.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package hu 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/hungarian" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_hu_snowball" 26 | 27 | type HungarianStemmerFilter struct { 28 | } 29 | 30 | func NewHungarianStemmerFilter() *HungarianStemmerFilter { 31 | return &HungarianStemmerFilter{} 32 | } 33 | 34 | func (s *HungarianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | hungarian.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func HungarianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewHungarianStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, HungarianStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/hu/stop_filter_hu.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package hu 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/hy/stop_filter_hy.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package hy 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/hy/stop_words_hy.go: -------------------------------------------------------------------------------- 1 | package hy 2 | 3 | import ( 4 | "github.com/blevesearch/bleve/v2/analysis" 5 | "github.com/blevesearch/bleve/v2/registry" 6 | ) 7 | 8 | const StopName = "stop_hy" 9 | 10 | // this content was obtained from: 11 | // lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/ 12 | // ` was changed to ' to allow for literal string 13 | 14 | var ArmenianStopWords = []byte(`# example set of Armenian stopwords. 15 | այդ 16 | այլ 17 | այն 18 | այս 19 | դու 20 | դուք 21 | եմ 22 | են 23 | ենք 24 | ես 25 | եք 26 | է 27 | էի 28 | էին 29 | էինք 30 | էիր 31 | էիք 32 | էր 33 | ըստ 34 | թ 35 | ի 36 | ին 37 | իսկ 38 | իր 39 | կամ 40 | համար 41 | հետ 42 | հետո 43 | մենք 44 | մեջ 45 | մի 46 | ն 47 | նա 48 | նաև 49 | նրա 50 | նրանք 51 | որ 52 | որը 53 | որոնք 54 | որպես 55 | ու 56 | ում 57 | պիտի 58 | վրա 59 | և 60 | `) 61 | 62 | func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { 63 | rv := analysis.NewTokenMap() 64 | err := rv.LoadBytes(ArmenianStopWords) 65 | return rv, err 66 | } 67 | 68 | func init() { 69 | err := registry.RegisterTokenMap(StopName, TokenMapConstructor) 70 | if err != nil { 71 | panic(err) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /analysis/lang/id/stop_filter_id.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package id 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/in/indic_normalize.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package in 16 | 17 | import ( 18 | "bytes" 19 | 20 | "github.com/blevesearch/bleve/v2/analysis" 21 | "github.com/blevesearch/bleve/v2/registry" 22 | ) 23 | 24 | const NormalizeName = "normalize_in" 25 | 26 | type IndicNormalizeFilter struct { 27 | } 28 | 29 | func NewIndicNormalizeFilter() *IndicNormalizeFilter { 30 | return &IndicNormalizeFilter{} 31 | } 32 | 33 | func (s *IndicNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 34 | for _, token := range input { 35 | runes := bytes.Runes(token.Term) 36 | runes = normalize(runes) 37 | token.Term = analysis.BuildTermFromRunes(runes) 38 | } 39 | return input 40 | } 41 | 42 | func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 43 | return NewIndicNormalizeFilter(), nil 44 | } 45 | 46 | func init() { 47 | err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor) 48 | if err != nil { 49 | panic(err) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /analysis/lang/it/articles_it.go: -------------------------------------------------------------------------------- 1 | package it 2 | 3 | import ( 4 | "github.com/blevesearch/bleve/v2/analysis" 5 | "github.com/blevesearch/bleve/v2/registry" 6 | ) 7 | 8 | const ArticlesName = "articles_it" 9 | 10 | // this content was obtained from: 11 | // lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis 12 | 13 | var ItalianArticles = []byte(` 14 | c 15 | l 16 | all 17 | dall 18 | dell 19 | nell 20 | sull 21 | coll 22 | pell 23 | gl 24 | agl 25 | dagl 26 | degl 27 | negl 28 | sugl 29 | un 30 | m 31 | t 32 | s 33 | v 34 | d 35 | `) 36 | 37 | func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) { 38 | rv := analysis.NewTokenMap() 39 | err := rv.LoadBytes(ItalianArticles) 40 | return rv, err 41 | } 42 | 43 | func init() { 44 | err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor) 45 | if err != nil { 46 | panic(err) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /analysis/lang/it/elision_it.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package it 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/blevesearch/bleve/v2/analysis" 21 | "github.com/blevesearch/bleve/v2/analysis/token/elision" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | const ElisionName = "elision_it" 26 | 27 | func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 28 | articlesTokenMap, err := cache.TokenMapNamed(ArticlesName) 29 | if err != nil { 30 | return nil, fmt.Errorf("error building elision filter: %v", err) 31 | } 32 | return elision.NewElisionFilter(articlesTokenMap), nil 33 | } 34 | 35 | func init() { 36 | err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor) 37 | if err != nil { 38 | panic(err) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /analysis/lang/it/elision_it_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package it 16 | 17 | import ( 18 | "reflect" 19 | "testing" 20 | 21 | "github.com/blevesearch/bleve/v2/analysis" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | func TestItalianElision(t *testing.T) { 26 | tests := []struct { 27 | input analysis.TokenStream 28 | output analysis.TokenStream 29 | }{ 30 | { 31 | input: analysis.TokenStream{ 32 | &analysis.Token{ 33 | Term: []byte("dell'Italia"), 34 | }, 35 | }, 36 | output: analysis.TokenStream{ 37 | &analysis.Token{ 38 | Term: []byte("Italia"), 39 | }, 40 | }, 41 | }, 42 | } 43 | 44 | cache := registry.NewCache() 45 | elisionFilter, err := cache.TokenFilterNamed(ElisionName) 46 | if err != nil { 47 | t.Fatal(err) 48 | } 49 | for _, test := range tests { 50 | actual := elisionFilter.Filter(test.input) 51 | if !reflect.DeepEqual(actual, test.output) { 52 | t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term) 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /analysis/lang/it/stemmer_it_snowball.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package it 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/italian" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_it_snowball" 26 | 27 | type ItalianStemmerFilter struct { 28 | } 29 | 30 | func NewItalianStemmerFilter() *ItalianStemmerFilter { 31 | return &ItalianStemmerFilter{} 32 | } 33 | 34 | func (s *ItalianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | italian.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func ItalianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewItalianStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, ItalianStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/it/stop_filter_it.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package it 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/nl/stemmer_nl.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package nl 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/dutch" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_nl_snowball" 26 | 27 | type DutchStemmerFilter struct { 28 | } 29 | 30 | func NewDutchStemmerFilter() *DutchStemmerFilter { 31 | return &DutchStemmerFilter{} 32 | } 33 | 34 | func (s *DutchStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | dutch.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func DutchStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewDutchStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, DutchStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/nl/stop_filter_nl.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package nl 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/no/stemmer_no.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package no 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/norwegian" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_no_snowball" 26 | 27 | type NorwegianStemmerFilter struct { 28 | } 29 | 30 | func NewNorwegianStemmerFilter() *NorwegianStemmerFilter { 31 | return &NorwegianStemmerFilter{} 32 | } 33 | 34 | func (s *NorwegianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | norwegian.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func NorwegianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewNorwegianStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, NorwegianStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/no/stop_filter_no.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package no 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/pl/stempel/cell.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package stempel 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/blevesearch/stempel/javadata" 21 | ) 22 | 23 | type cell struct { 24 | ref int32 25 | cmd int32 26 | } 27 | 28 | func (c *cell) String() string { 29 | return fmt.Sprintf("ref(%d) cmd(%d)", c.ref, c.cmd) 30 | } 31 | 32 | func newCell(r *javadata.Reader) (*cell, error) { 33 | cmd, err := r.ReadInt32() 34 | if err != nil { 35 | return nil, fmt.Errorf("error reading cell cmd: %v", err) 36 | } 37 | _, err = r.ReadInt32() 38 | if err != nil { 39 | return nil, fmt.Errorf("error reading cell cnt: %v", err) 40 | } 41 | ref, err := r.ReadInt32() 42 | if err != nil { 43 | return nil, fmt.Errorf("error reading cell ref: %v", err) 44 | } 45 | _, err = r.ReadInt32() 46 | if err != nil { 47 | return nil, fmt.Errorf("error reading cell skip: %v", err) 48 | } 49 | return &cell{ 50 | cmd: cmd, 51 | ref: ref, 52 | }, nil 53 | } 54 | -------------------------------------------------------------------------------- /analysis/lang/pl/stempel/fuzz.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build gofuzz 16 | // +build gofuzz 17 | 18 | package stempel 19 | 20 | var fuzzTrie Trie 21 | 22 | func init() { 23 | var err error 24 | fuzzTrie, err = Open("pl/stemmer_20000.tbl") 25 | if err != nil { 26 | panic(err) 27 | } 28 | } 29 | 30 | func Fuzz(data []byte) int { 31 | inRunes := []rune(string(data)) 32 | diff := fuzzTrie.GetLastOnPath(inRunes) 33 | _ = Diff(inRunes, diff) 34 | return 1 35 | } 36 | -------------------------------------------------------------------------------- /analysis/lang/pl/stempel/javadata/README.md: -------------------------------------------------------------------------------- 1 | # javadata 2 | 3 | Go library to read data written with java.io.DataOutput 4 | -------------------------------------------------------------------------------- /analysis/lang/pl/stempel/javadata/fuzz.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build gofuzz 16 | // +build gofuzz 17 | 18 | package javadata 19 | 20 | import "bytes" 21 | 22 | func Fuzz(data []byte) int { 23 | br := bytes.NewReader(data) 24 | jdr := NewReader(br) 25 | 26 | var err error 27 | for err == nil { 28 | _, err = jdr.ReadUTF() 29 | } 30 | if err != nil { 31 | return 0 32 | } 33 | return 1 34 | } 35 | -------------------------------------------------------------------------------- /analysis/lang/pl/stempel/pl/pl_PL.dic.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blevesearch/bleve/68d10040f2fd7ad8b6912d5d95788a7ae7aa0fbb/analysis/lang/pl/stempel/pl/pl_PL.dic.gz -------------------------------------------------------------------------------- /analysis/lang/pl/stempel/pl/stemmer_20000.tbl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blevesearch/bleve/68d10040f2fd7ad8b6912d5d95788a7ae7aa0fbb/analysis/lang/pl/stempel/pl/stemmer_20000.tbl -------------------------------------------------------------------------------- /analysis/lang/pl/stempel/strenum.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package stempel 16 | 17 | import ( 18 | "io" 19 | ) 20 | 21 | type strEnum struct { 22 | r []rune 23 | from int 24 | by int 25 | } 26 | 27 | func newStrEnum(s []rune, up bool) *strEnum { 28 | rv := &strEnum{ 29 | r: s, 30 | } 31 | if up { 32 | rv.from = 0 33 | rv.by = 1 34 | } else { 35 | rv.from = len(s) - 1 36 | rv.by = -1 37 | } 38 | return rv 39 | } 40 | 41 | func (s *strEnum) next() (rune, error) { 42 | if s.from < 0 || s.from >= len(s.r) { 43 | return 0, io.EOF 44 | } 45 | rv := s.r[s.from] 46 | s.from += s.by 47 | return rv, nil 48 | } 49 | -------------------------------------------------------------------------------- /analysis/lang/pl/stempel/strenum_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package stempel 16 | 17 | import ( 18 | "fmt" 19 | "io" 20 | "reflect" 21 | "testing" 22 | ) 23 | 24 | func TestStrenumNext(t *testing.T) { 25 | 26 | tests := []struct { 27 | in []rune 28 | up bool 29 | expect []rune 30 | }{ 31 | { 32 | in: []rune{'h', 'e', 'l', 'l', 'o'}, 33 | up: true, 34 | expect: []rune{'h', 'e', 'l', 'l', 'o'}, 35 | }, 36 | { 37 | in: []rune{'h', 'e', 'l', 'l', 'o'}, 38 | up: false, 39 | expect: []rune{'o', 'l', 'l', 'e', 'h'}, 40 | }, 41 | } 42 | 43 | for _, test := range tests { 44 | t.Run(fmt.Sprintf("%s-up-%t", string(test.in), test.up), func(t *testing.T) { 45 | strenum := newStrEnum(test.in, test.up) 46 | var got []rune 47 | next, err := strenum.next() 48 | for err == nil { 49 | got = append(got, next) 50 | next, err = strenum.next() 51 | } 52 | if err != io.EOF { 53 | t.Errorf("next got err: %v", err) 54 | } 55 | if !reflect.DeepEqual(got, test.expect) { 56 | t.Errorf("expected %v, got %v", test.expect, got) 57 | } 58 | }) 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /analysis/lang/pl/stop_filter_pl.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package pl 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/pt/stop_filter_pt.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package pt 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/ro/stemmer_ro.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ro 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/romanian" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_ro_snowball" 26 | 27 | type RomanianStemmerFilter struct { 28 | } 29 | 30 | func NewRomanianStemmerFilter() *RomanianStemmerFilter { 31 | return &RomanianStemmerFilter{} 32 | } 33 | 34 | func (s *RomanianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | romanian.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func RomanianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewRomanianStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, RomanianStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/ro/stop_filter_ro.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ro 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/ru/stemmer_ru.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ru 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/russian" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_ru_snowball" 26 | 27 | type RussianStemmerFilter struct { 28 | } 29 | 30 | func NewRussianStemmerFilter() *RussianStemmerFilter { 31 | return &RussianStemmerFilter{} 32 | } 33 | 34 | func (s *RussianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | russian.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func RussianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewRussianStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, RussianStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/ru/stop_filter_ru.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package ru 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/sv/stemmer_sv.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package sv 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/swedish" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_sv_snowball" 26 | 27 | type SwedishStemmerFilter struct { 28 | } 29 | 30 | func NewSwedishStemmerFilter() *SwedishStemmerFilter { 31 | return &SwedishStemmerFilter{} 32 | } 33 | 34 | func (s *SwedishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | swedish.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func SwedishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewSwedishStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, SwedishStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/sv/stop_filter_sv.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package sv 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/lang/tr/stemmer_tr.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package tr 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | 21 | "github.com/blevesearch/snowballstem" 22 | "github.com/blevesearch/snowballstem/turkish" 23 | ) 24 | 25 | const SnowballStemmerName = "stemmer_tr_snowball" 26 | 27 | type TurkishStemmerFilter struct { 28 | } 29 | 30 | func NewTurkishStemmerFilter() *TurkishStemmerFilter { 31 | return &TurkishStemmerFilter{} 32 | } 33 | 34 | func (s *TurkishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream { 35 | for _, token := range input { 36 | env := snowballstem.NewEnv(string(token.Term)) 37 | turkish.Stem(env) 38 | token.Term = []byte(env.Current()) 39 | } 40 | return input 41 | } 42 | 43 | func TurkishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 44 | return NewTurkishStemmerFilter(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenFilter(SnowballStemmerName, TurkishStemmerFilterConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/lang/tr/stop_filter_tr.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package tr 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/analysis/token/stop" 20 | "github.com/blevesearch/bleve/v2/registry" 21 | ) 22 | 23 | func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { 24 | tokenMap, err := cache.TokenMapNamed(StopName) 25 | if err != nil { 26 | return nil, err 27 | } 28 | return stop.NewStopTokensFilter(tokenMap), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/test_words.txt: -------------------------------------------------------------------------------- 1 | # full line comment 2 | marty 3 | steve # trailing comment 4 | | different format of comment 5 | dustin 6 | siri | different style trailing comment 7 | multiple words with different whitespace -------------------------------------------------------------------------------- /analysis/tokenizer/letter/letter.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package letter 16 | 17 | import ( 18 | "unicode" 19 | 20 | "github.com/blevesearch/bleve/v2/analysis" 21 | "github.com/blevesearch/bleve/v2/analysis/tokenizer/character" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | const Name = "letter" 26 | 27 | func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { 28 | return character.NewCharacterTokenizer(unicode.IsLetter), nil 29 | } 30 | 31 | func init() { 32 | err := registry.RegisterTokenizer(Name, TokenizerConstructor) 33 | if err != nil { 34 | panic(err) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /analysis/tokenizer/single/single.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package single 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/analysis" 19 | "github.com/blevesearch/bleve/v2/registry" 20 | ) 21 | 22 | const Name = "single" 23 | 24 | type SingleTokenTokenizer struct { 25 | } 26 | 27 | func NewSingleTokenTokenizer() *SingleTokenTokenizer { 28 | return &SingleTokenTokenizer{} 29 | } 30 | 31 | func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream { 32 | return analysis.TokenStream{ 33 | &analysis.Token{ 34 | Term: input, 35 | Position: 1, 36 | Start: 0, 37 | End: len(input), 38 | Type: analysis.AlphaNumeric, 39 | }, 40 | } 41 | } 42 | 43 | func SingleTokenTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { 44 | return NewSingleTokenTokenizer(), nil 45 | } 46 | 47 | func init() { 48 | err := registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor) 49 | if err != nil { 50 | panic(err) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /analysis/tokenizer/whitespace/whitespace.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package whitespace 16 | 17 | import ( 18 | "unicode" 19 | 20 | "github.com/blevesearch/bleve/v2/analysis" 21 | "github.com/blevesearch/bleve/v2/analysis/tokenizer/character" 22 | "github.com/blevesearch/bleve/v2/registry" 23 | ) 24 | 25 | const Name = "whitespace" 26 | 27 | func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { 28 | return character.NewCharacterTokenizer(notSpace), nil 29 | } 30 | 31 | func notSpace(r rune) bool { 32 | return !unicode.IsSpace(r) 33 | } 34 | 35 | func init() { 36 | err := registry.RegisterTokenizer(Name, TokenizerConstructor) 37 | if err != nil { 38 | panic(err) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /analysis/tokenmap_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package analysis 16 | 17 | import ( 18 | "reflect" 19 | "testing" 20 | ) 21 | 22 | func TestTokenMapLoadFile(t *testing.T) { 23 | tokenMap := NewTokenMap() 24 | err := tokenMap.LoadFile("test_words.txt") 25 | if err != nil { 26 | t.Fatal(err) 27 | } 28 | 29 | expectedTokens := NewTokenMap() 30 | expectedTokens.AddToken("marty") 31 | expectedTokens.AddToken("steve") 32 | expectedTokens.AddToken("dustin") 33 | expectedTokens.AddToken("siri") 34 | expectedTokens.AddToken("multiple") 35 | expectedTokens.AddToken("words") 36 | expectedTokens.AddToken("with") 37 | expectedTokens.AddToken("different") 38 | expectedTokens.AddToken("whitespace") 39 | 40 | if !reflect.DeepEqual(tokenMap, expectedTokens) { 41 | t.Errorf("expected %#v, got %#v", expectedTokens, tokenMap) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /cmd/bleve/.gitignore: -------------------------------------------------------------------------------- 1 | /vendor/github.com/spf13/cobra/cobra 2 | /vendor/github.com/spf13/cobra/doc 3 | -------------------------------------------------------------------------------- /cmd/bleve/cmd/count.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2016 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cmd 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/spf13/cobra" 21 | ) 22 | 23 | // countCmd represents the count command 24 | var countCmd = &cobra.Command{ 25 | Use: "count [index path]", 26 | Short: "counts the number documents in the index", 27 | Long: `The count command will count the number of documents in the index.`, 28 | RunE: func(cmd *cobra.Command, args []string) error { 29 | count, err := idx.DocCount() 30 | if err != nil { 31 | return fmt.Errorf("error counting docs in index: %v", err) 32 | } 33 | fmt.Printf("%d\n", count) 34 | return nil 35 | }, 36 | } 37 | 38 | func init() { 39 | RootCmd.AddCommand(countCmd) 40 | } 41 | -------------------------------------------------------------------------------- /cmd/bleve/cmd/fields.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2016 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cmd 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/spf13/cobra" 21 | ) 22 | 23 | // fieldsCmd represents the fields command 24 | var fieldsCmd = &cobra.Command{ 25 | Use: "fields [index path]", 26 | Short: "lists the fields in this index", 27 | Long: `The fields command will list the fields used in this index.`, 28 | RunE: func(cmd *cobra.Command, args []string) error { 29 | i, err := idx.Advanced() 30 | if err != nil { 31 | return fmt.Errorf("error getting index: %v", err) 32 | } 33 | r, err := i.Reader() 34 | if err != nil { 35 | return fmt.Errorf("error getting index reader: %v", err) 36 | } 37 | fields, err := r.Fields() 38 | if err != nil { 39 | return fmt.Errorf("error getting fields: %v", err) 40 | } 41 | for i, field := range fields { 42 | fmt.Printf("%d - %s\n", i, field) 43 | } 44 | return nil 45 | }, 46 | } 47 | 48 | func init() { 49 | RootCmd.AddCommand(fieldsCmd) 50 | } 51 | -------------------------------------------------------------------------------- /cmd/bleve/cmd/mapping.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2016 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cmd 16 | 17 | import ( 18 | "encoding/json" 19 | "fmt" 20 | "log" 21 | 22 | "github.com/spf13/cobra" 23 | ) 24 | 25 | // mappingCmd represents the mapping command 26 | var mappingCmd = &cobra.Command{ 27 | Use: "mapping [index path]", 28 | Short: "prints the mapping used for this index", 29 | Long: `The mapping command prints a JSON representation of the mapping used for this index.`, 30 | Run: func(cmd *cobra.Command, args []string) { 31 | mapping := idx.Mapping() 32 | jsonBytes, err := json.MarshalIndent(mapping, "", " ") 33 | if err != nil { 34 | log.Fatal(err) 35 | } 36 | fmt.Printf("%s\n", jsonBytes) 37 | }, 38 | } 39 | 40 | func init() { 41 | RootCmd.AddCommand(mappingCmd) 42 | } 43 | -------------------------------------------------------------------------------- /cmd/bleve/cmd/scorch.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2017 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package cmd 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/cmd/bleve/cmd/scorch" 19 | ) 20 | 21 | // make scorch command-line tool a bleve sub-command 22 | 23 | func init() { 24 | RootCmd.AddCommand(scorch.RootCmd) 25 | } 26 | -------------------------------------------------------------------------------- /cmd/bleve/cmd/scorch/deleted.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2017 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package scorch 16 | 17 | import ( 18 | "fmt" 19 | "strconv" 20 | 21 | "github.com/spf13/cobra" 22 | ) 23 | 24 | // deletedCmd represents the deleted command 25 | var deletedCmd = &cobra.Command{ 26 | Use: "deleted", 27 | Short: "deleted prints the deleted bitmap for segments in the index snapshot", 28 | Long: `The delete command prints the deleted bitmap for segments in the index snapshot.`, 29 | RunE: func(cmd *cobra.Command, args []string) error { 30 | 31 | if len(args) < 2 { 32 | return fmt.Errorf("snapshot epoch required") 33 | } else if len(args) < 3 { 34 | snapshotEpoch, err := strconv.ParseUint(args[1], 10, 64) 35 | if err != nil { 36 | return err 37 | } 38 | snapshot, err := index.LoadSnapshot(snapshotEpoch) 39 | if err != nil { 40 | return err 41 | } 42 | segments := snapshot.Segments() 43 | for i, segmentSnap := range segments { 44 | deleted := segmentSnap.Deleted() 45 | fmt.Printf("%d %v\n", i, deleted) 46 | } 47 | } 48 | 49 | return nil 50 | }, 51 | } 52 | 53 | func init() { 54 | RootCmd.AddCommand(deletedCmd) 55 | } 56 | -------------------------------------------------------------------------------- /cmd/bleve/cmd/scorch/info.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2017 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package scorch 16 | 17 | import ( 18 | "fmt" 19 | 20 | "github.com/spf13/cobra" 21 | ) 22 | 23 | // dictCmd represents the dict command 24 | var infoCmd = &cobra.Command{ 25 | Use: "info", 26 | Short: "info prints basic info about the index", 27 | Long: `The info command prints basic info about the index.`, 28 | RunE: func(cmd *cobra.Command, args []string) error { 29 | 30 | reader, err := index.Reader() 31 | if err != nil { 32 | return err 33 | } 34 | 35 | count, err := reader.DocCount() 36 | if err != nil { 37 | return err 38 | } 39 | 40 | fmt.Printf("doc count: %d\n", count) 41 | 42 | // var numSnapshots int 43 | // var rootSnapshot uint64 44 | // index.VisitBoltSnapshots(func(snapshotEpoch uint64) error { 45 | // if rootSnapshot == 0 { 46 | // rootSnapshot = snapshotEpoch 47 | // } 48 | // numSnapshots++ 49 | // return nil 50 | // }) 51 | // fmt.Printf("has %d snapshot(s), root: %d\n", numSnapshots, rootSnapshot) 52 | 53 | return nil 54 | }, 55 | } 56 | 57 | func init() { 58 | RootCmd.AddCommand(infoCmd) 59 | } 60 | -------------------------------------------------------------------------------- /cmd/bleve/gendocs.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2016 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build ignore 16 | // +build ignore 17 | 18 | package main 19 | 20 | import ( 21 | "fmt" 22 | 23 | "github.com/blevesearch/bleve/v2/cmd/bleve/cmd" 24 | 25 | "github.com/spf13/cobra/doc" 26 | ) 27 | 28 | // you can generate markdown docs by running 29 | // 30 | // $ go run gendocs.go 31 | // 32 | // this also requires doc sub-package of cobra 33 | // which is not kept in this repo 34 | // you can acquire it by running 35 | // 36 | // $ gvt restore 37 | 38 | func main() { 39 | cmd.RootCmd.DisableAutoGenTag = true 40 | identity := func(s string) string { 41 | return fmt.Sprintf(`{{< relref "docs/%s" >}}`, s) 42 | } 43 | emptyStr := func(s string) string { return "" } 44 | doc.GenMarkdownTreeCustom(cmd.RootCmd, "./", emptyStr, identity) 45 | } 46 | -------------------------------------------------------------------------------- /cmd/bleve/main.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2016 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/cmd/bleve/cmd" 19 | 20 | // to support standard set of build tags 21 | _ "github.com/blevesearch/bleve/v2/config" 22 | ) 23 | 24 | func main() { 25 | cmd.Execute() 26 | } 27 | -------------------------------------------------------------------------------- /cmd/bleve/vendor/github.com/inconshreveable/mousetrap/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2014 Alan Shreve 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /cmd/bleve/vendor/github.com/inconshreveable/mousetrap/trap_others.go: -------------------------------------------------------------------------------- 1 | // +build !windows 2 | 3 | package mousetrap 4 | 5 | // StartedByExplorer returns true if the program was invoked by the user 6 | // double-clicking on the executable from explorer.exe 7 | // 8 | // It is conservative and returns false if any of the internal calls fail. 9 | // It does not guarantee that the program was run from a terminal. It only can tell you 10 | // whether it was launched from explorer.exe 11 | // 12 | // On non-Windows platforms, it always returns false. 13 | func StartedByExplorer() bool { 14 | return false 15 | } 16 | -------------------------------------------------------------------------------- /cmd/bleve/vendor/github.com/inconshreveable/mousetrap/trap_windows_1.4.go: -------------------------------------------------------------------------------- 1 | // +build windows 2 | // +build go1.4 3 | 4 | package mousetrap 5 | 6 | import ( 7 | "os" 8 | "syscall" 9 | "unsafe" 10 | ) 11 | 12 | func getProcessEntry(pid int) (*syscall.ProcessEntry32, error) { 13 | snapshot, err := syscall.CreateToolhelp32Snapshot(syscall.TH32CS_SNAPPROCESS, 0) 14 | if err != nil { 15 | return nil, err 16 | } 17 | defer syscall.CloseHandle(snapshot) 18 | var procEntry syscall.ProcessEntry32 19 | procEntry.Size = uint32(unsafe.Sizeof(procEntry)) 20 | if err = syscall.Process32First(snapshot, &procEntry); err != nil { 21 | return nil, err 22 | } 23 | for { 24 | if procEntry.ProcessID == uint32(pid) { 25 | return &procEntry, nil 26 | } 27 | err = syscall.Process32Next(snapshot, &procEntry) 28 | if err != nil { 29 | return nil, err 30 | } 31 | } 32 | } 33 | 34 | // StartedByExplorer returns true if the program was invoked by the user double-clicking 35 | // on the executable from explorer.exe 36 | // 37 | // It is conservative and returns false if any of the internal calls fail. 38 | // It does not guarantee that the program was run from a terminal. It only can tell you 39 | // whether it was launched from explorer.exe 40 | func StartedByExplorer() bool { 41 | pe, err := getProcessEntry(os.Getppid()) 42 | if err != nil { 43 | return false 44 | } 45 | return "explorer.exe" == syscall.UTF16ToString(pe.ExeFile[:]) 46 | } 47 | -------------------------------------------------------------------------------- /cmd/bleve/vendor/github.com/spf13/cobra/command_notwin.go: -------------------------------------------------------------------------------- 1 | // +build !windows 2 | 3 | package cobra 4 | 5 | var preExecHookFn func(*Command) 6 | -------------------------------------------------------------------------------- /cmd/bleve/vendor/github.com/spf13/cobra/command_win.go: -------------------------------------------------------------------------------- 1 | // +build windows 2 | 3 | package cobra 4 | 5 | import ( 6 | "os" 7 | "time" 8 | 9 | "github.com/inconshreveable/mousetrap" 10 | ) 11 | 12 | var preExecHookFn = preExecHook 13 | 14 | // enables an information splash screen on Windows if the CLI is started from explorer.exe. 15 | var MousetrapHelpText string = `This is a command line tool 16 | 17 | You need to open cmd.exe and run it from there. 18 | ` 19 | 20 | func preExecHook(c *Command) { 21 | if mousetrap.StartedByExplorer() { 22 | c.Print(MousetrapHelpText) 23 | time.Sleep(5 * time.Second) 24 | os.Exit(1) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /cmd/bleve/vendor/github.com/spf13/pflag/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Alex Ogier. All rights reserved. 2 | Copyright (c) 2012 The Go Authors. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | * Neither the name of Google Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /cmd/bleve/vendor/manifest: -------------------------------------------------------------------------------- 1 | { 2 | "version": 0, 3 | "dependencies": [ 4 | { 5 | "importpath": "github.com/inconshreveable/mousetrap", 6 | "repository": "https://github.com/inconshreveable/mousetrap", 7 | "vcs": "git", 8 | "revision": "76626ae9c91c4f2a10f34cad8ce83ea42c93bb75", 9 | "branch": "master", 10 | "notests": true 11 | }, 12 | { 13 | "importpath": "github.com/spf13/cobra", 14 | "repository": "https://github.com/spf13/cobra", 15 | "vcs": "git", 16 | "revision": "b5d8e8f46a2f829f755b6e33b454e25c61c935e1", 17 | "branch": "master", 18 | "notests": true 19 | }, 20 | { 21 | "importpath": "github.com/spf13/pflag", 22 | "repository": "https://github.com/spf13/pflag", 23 | "vcs": "git", 24 | "revision": "9ff6c6923cfffbcd502984b8e0c80539a94968b7", 25 | "branch": "master", 26 | "notests": true 27 | } 28 | ] 29 | } 30 | -------------------------------------------------------------------------------- /config/README.md: -------------------------------------------------------------------------------- 1 | # Bleve Config 2 | 3 | **NOTE** you probably do not need this package. It is only intended for general purpose applications that want to include large parts of Bleve regardless of whether or not the code is directly using it. 4 | 5 | ## General Purpose Applications 6 | 7 | A general purpose application, that must allow users to express the need for Bleve components at runtime can accomplish this by: 8 | 9 | ``` 10 | import _ "github.com/blevesearch/bleve/config" 11 | ``` -------------------------------------------------------------------------------- /config_app.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build appengine || appenginevm 16 | // +build appengine appenginevm 17 | 18 | package bleve 19 | 20 | // in the appengine environment we cannot support disk based indexes 21 | // so we do no extra configuration in this method 22 | func initDisk() { 23 | 24 | } 25 | -------------------------------------------------------------------------------- /config_disk.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build !appengine && !appenginevm 16 | // +build !appengine,!appenginevm 17 | 18 | package bleve 19 | 20 | import "github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb" 21 | 22 | // in normal environments we configure boltdb as the default storage 23 | func initDisk() { 24 | // default kv store 25 | Config.DefaultKVStore = boltdb.Name 26 | } 27 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | /* 16 | Package bleve is a library for indexing and searching text. 17 | 18 | Example Opening New Index, Indexing Data 19 | 20 | message := struct{ 21 | Id: "example" 22 | From: "xyz@couchbase.com", 23 | Body: "bleve indexing is easy", 24 | } 25 | 26 | mapping := bleve.NewIndexMapping() 27 | index, _ := bleve.New("example.bleve", mapping) 28 | index.Index(message.Id, message) 29 | 30 | Example Opening Existing Index, Searching Data 31 | 32 | index, _ := bleve.Open("example.bleve") 33 | query := bleve.NewQueryStringQuery("bleve") 34 | searchRequest := bleve.NewSearchRequest(query) 35 | searchResult, _ := index.Search(searchRequest) 36 | */ 37 | package bleve 38 | -------------------------------------------------------------------------------- /docs/bleve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blevesearch/bleve/68d10040f2fd7ad8b6912d5d95788a7ae7aa0fbb/docs/bleve.png -------------------------------------------------------------------------------- /docs/geo.md: -------------------------------------------------------------------------------- 1 | # Geo spatial search 2 | 3 | Redirect to [geo/README.md](https://github.com/blevesearch/bleve/blob/master/geo/README.md) 4 | -------------------------------------------------------------------------------- /docs/sort_facet_supporting_docs/indexSizeVsNumDocs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blevesearch/bleve/68d10040f2fd7ad8b6912d5d95788a7ae7aa0fbb/docs/sort_facet_supporting_docs/indexSizeVsNumDocs.png -------------------------------------------------------------------------------- /docs/sort_facet_supporting_docs/queryTimevsNumDocs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blevesearch/bleve/68d10040f2fd7ad8b6912d5d95788a7ae7aa0fbb/docs/sort_facet_supporting_docs/queryTimevsNumDocs.png -------------------------------------------------------------------------------- /document/field.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package document 16 | 17 | import ( 18 | index "github.com/blevesearch/bleve_index_api" 19 | ) 20 | 21 | type Field interface { 22 | // Name returns the path of the field from the root DocumentMapping. 23 | // A root field path is "field", a subdocument field is "parent.field". 24 | Name() string 25 | // ArrayPositions returns the intermediate document and field indices 26 | // required to resolve the field value in the document. For example, if the 27 | // field path is "doc1.doc2.field" where doc1 and doc2 are slices or 28 | // arrays, ArrayPositions returns 2 indices used to resolve "doc2" value in 29 | // "doc1", then "field" in "doc2". 30 | ArrayPositions() []uint64 31 | Options() index.FieldIndexingOptions 32 | Analyze() 33 | Value() []byte 34 | 35 | // NumPlainTextBytes should return the number of plain text bytes 36 | // that this field represents - this is a common metric for tracking 37 | // the rate of indexing 38 | NumPlainTextBytes() uint64 39 | 40 | Size() int 41 | 42 | EncodedFieldType() byte 43 | AnalyzedLength() int 44 | AnalyzedTokenFrequencies() index.TokenFrequencies 45 | } 46 | -------------------------------------------------------------------------------- /document/field_geopoint_test.go: -------------------------------------------------------------------------------- 1 | package document 2 | 3 | import "testing" 4 | 5 | func TestGeoPointField(t *testing.T) { 6 | gf := NewGeoPointField("loc", []uint64{}, 0.0015, 0.0015) 7 | gf.Analyze() 8 | numTokens := gf.AnalyzedLength() 9 | tokenFreqs := gf.AnalyzedTokenFrequencies() 10 | if numTokens != 8 { 11 | t.Errorf("expected 8 tokens, got %d", numTokens) 12 | } 13 | if len(tokenFreqs) != 8 { 14 | t.Errorf("expected 8 token freqs") 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /document/field_ip_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2021 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package document 16 | 17 | import ( 18 | "bytes" 19 | "net" 20 | "testing" 21 | ) 22 | 23 | func TestIPField(t *testing.T) { 24 | nf := NewIPField("ip", []uint64{}, net.IPv4(192, 168, 1, 1)) 25 | nf.Analyze() 26 | if nf.length != 1 { 27 | t.Errorf("expected 1 token") 28 | } 29 | if len(nf.value) != 16 { 30 | t.Errorf("stored value should be in 16 byte ipv6 format") 31 | } 32 | if !bytes.Equal(nf.value, []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 192, 168, 1, 1}) { 33 | t.Errorf("wrong value stored, expected 192.168.1.1, got %q", nf.value.String()) 34 | } 35 | if len(nf.frequencies) != 1 { 36 | t.Errorf("expected 1 token freqs") 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /document/field_numeric_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package document 16 | 17 | import ( 18 | "testing" 19 | ) 20 | 21 | func TestNumericField(t *testing.T) { 22 | nf := NewNumericField("age", []uint64{}, 3.4) 23 | nf.Analyze() 24 | numTokens := nf.AnalyzedLength() 25 | tokenFreqs := nf.AnalyzedTokenFrequencies() 26 | if numTokens != 16 { 27 | t.Errorf("expected 16 tokens") 28 | } 29 | if len(tokenFreqs) != 16 { 30 | t.Errorf("expected 16 token freqs") 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /geo/benchmark_geohash_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2019 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package geo 16 | 17 | import ( 18 | "testing" 19 | ) 20 | 21 | func BenchmarkGeoHashLen5NewDecode(b *testing.B) { 22 | b.ResetTimer() 23 | hash := "d3hn3" 24 | for i := 0; i < b.N; i++ { 25 | _, _ = DecodeGeoHash(hash) 26 | } 27 | } 28 | 29 | func BenchmarkGeoHashLen6NewDecode(b *testing.B) { 30 | b.ResetTimer() 31 | hash := "u4pruy" 32 | for i := 0; i < b.N; i++ { 33 | _, _ = DecodeGeoHash(hash) 34 | } 35 | } 36 | 37 | func BenchmarkGeoHashLen7NewDecode(b *testing.B) { 38 | b.ResetTimer() 39 | hash := "u4pruyd" 40 | for i := 0; i < b.N; i++ { 41 | _, _ = DecodeGeoHash(hash) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /index/scorch/empty.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package scorch 16 | 17 | import segment "github.com/blevesearch/scorch_segment_api/v2" 18 | 19 | type emptyPostingsIterator struct{} 20 | 21 | func (e *emptyPostingsIterator) Next() (segment.Posting, error) { 22 | return nil, nil 23 | } 24 | 25 | func (e *emptyPostingsIterator) Advance(uint64) (segment.Posting, error) { 26 | return nil, nil 27 | } 28 | 29 | func (e *emptyPostingsIterator) Size() int { 30 | return 0 31 | } 32 | 33 | func (e *emptyPostingsIterator) BytesRead() uint64 { 34 | return 0 35 | } 36 | 37 | func (e *emptyPostingsIterator) ResetBytesRead(uint64) {} 38 | 39 | func (e *emptyPostingsIterator) BytesWritten() uint64 { return 0 } 40 | 41 | var anEmptyPostingsIterator = &emptyPostingsIterator{} 42 | -------------------------------------------------------------------------------- /index/scorch/mergeplan/sort.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2017 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package mergeplan 16 | 17 | type byLiveSizeDescending []Segment 18 | 19 | func (a byLiveSizeDescending) Len() int { return len(a) } 20 | 21 | func (a byLiveSizeDescending) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 22 | 23 | func (a byLiveSizeDescending) Less(i, j int) bool { 24 | if a[i].LiveSize() != a[j].LiveSize() { 25 | return a[i].LiveSize() > a[j].LiveSize() 26 | } 27 | return a[i].Id() < a[j].Id() 28 | } 29 | -------------------------------------------------------------------------------- /index/scorch/regexp_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package scorch 16 | 17 | import ( 18 | "regexp/syntax" 19 | "testing" 20 | ) 21 | 22 | func TestLiteralPrefix(t *testing.T) { 23 | tests := []struct { 24 | input, expected string 25 | }{ 26 | {"", ""}, 27 | {"hello", "hello"}, 28 | {"hello.?", "hello"}, 29 | {"hello$", "hello"}, 30 | {`[h][e][l][l][o].*world`, "hello"}, 31 | {`[h-h][e-e][l-l][l-l][o-o].*world`, "hello"}, 32 | {".*", ""}, 33 | {"h.*", "h"}, 34 | {"h.?", "h"}, 35 | {"h[a-z]", "h"}, 36 | {`h\s`, "h"}, 37 | {`(hello)world`, ""}, 38 | {`日本語`, "日本語"}, 39 | {`日本語\w`, "日本語"}, 40 | {`^hello`, ""}, 41 | {`^`, ""}, 42 | {`$`, ""}, 43 | {`(?i)mArTy`, ""}, 44 | } 45 | 46 | for i, test := range tests { 47 | s, err := syntax.Parse(test.input, syntax.Perl) 48 | if err != nil { 49 | t.Fatalf("expected no syntax.Parse error, got: %v", err) 50 | } 51 | 52 | got := literalPrefix(s) 53 | if test.expected != got { 54 | t.Fatalf("test: %d, %+v, got: %s", i, test, got) 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /index/upsidedown/benchmark_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | BENCHMARKS=`grep "func Benchmark" *_test.go | sed 's/.*func //' | sed s/\(.*{//` 4 | 5 | for BENCHMARK in $BENCHMARKS 6 | do 7 | go test -v -run=xxx -bench=^$BENCHMARK$ -benchtime=10s -tags 'forestdb leveldb' | grep -v ok | grep -v PASS 8 | done 9 | -------------------------------------------------------------------------------- /index/upsidedown/row_merge_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package upsidedown 16 | 17 | import ( 18 | "bytes" 19 | "encoding/binary" 20 | "testing" 21 | ) 22 | 23 | func TestPartialMerge(t *testing.T) { 24 | 25 | tests := []struct { 26 | in [][]byte 27 | out uint64 28 | }{ 29 | { 30 | in: [][]byte{dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr}, 31 | out: 5, 32 | }, 33 | } 34 | 35 | mo := &upsideDownMerge{} 36 | for _, test := range tests { 37 | curr := test.in[0] 38 | for _, next := range test.in[1:] { 39 | var ok bool 40 | curr, ok = mo.PartialMerge([]byte("key"), curr, next) 41 | if !ok { 42 | t.Errorf("expected partial merge ok") 43 | } 44 | } 45 | actual := decodeCount(curr) 46 | if actual != test.out { 47 | t.Errorf("expected %d, got %d", test.out, actual) 48 | } 49 | } 50 | 51 | } 52 | 53 | func decodeCount(in []byte) uint64 { 54 | buf := bytes.NewBuffer(in) 55 | count, _ := binary.ReadUvarint(buf) 56 | return count 57 | } 58 | -------------------------------------------------------------------------------- /index/upsidedown/store/boltdb/stats.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package boltdb 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/util" 19 | ) 20 | 21 | type stats struct { 22 | s *Store 23 | } 24 | 25 | func (s *stats) MarshalJSON() ([]byte, error) { 26 | bs := s.s.db.Stats() 27 | return util.MarshalJSON(bs) 28 | } 29 | -------------------------------------------------------------------------------- /index/upsidedown/store/goleveldb/batch.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package goleveldb 16 | 17 | import ( 18 | "github.com/blevesearch/goleveldb/leveldb" 19 | store "github.com/blevesearch/upsidedown_store_api" 20 | ) 21 | 22 | type Batch struct { 23 | store *Store 24 | merge *store.EmulatedMerge 25 | batch *leveldb.Batch 26 | } 27 | 28 | func (b *Batch) Set(key, val []byte) { 29 | b.batch.Put(key, val) 30 | } 31 | 32 | func (b *Batch) Delete(key []byte) { 33 | b.batch.Delete(key) 34 | } 35 | 36 | func (b *Batch) Merge(key, val []byte) { 37 | b.merge.Merge(key, val) 38 | } 39 | 40 | func (b *Batch) Reset() { 41 | b.batch.Reset() 42 | b.merge = store.NewEmulatedMerge(b.store.mo) 43 | } 44 | 45 | func (b *Batch) Close() error { 46 | b.batch.Reset() 47 | b.batch = nil 48 | b.merge = nil 49 | return nil 50 | } 51 | -------------------------------------------------------------------------------- /index/upsidedown/store/goleveldb/iterator.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package goleveldb 16 | 17 | import "github.com/blevesearch/goleveldb/leveldb/iterator" 18 | 19 | type Iterator struct { 20 | store *Store 21 | iterator iterator.Iterator 22 | } 23 | 24 | func (ldi *Iterator) Seek(key []byte) { 25 | ldi.iterator.Seek(key) 26 | } 27 | 28 | func (ldi *Iterator) Next() { 29 | ldi.iterator.Next() 30 | } 31 | 32 | func (ldi *Iterator) Current() ([]byte, []byte, bool) { 33 | if ldi.Valid() { 34 | return ldi.Key(), ldi.Value(), true 35 | } 36 | return nil, nil, false 37 | } 38 | 39 | func (ldi *Iterator) Key() []byte { 40 | return ldi.iterator.Key() 41 | } 42 | 43 | func (ldi *Iterator) Value() []byte { 44 | return ldi.iterator.Value() 45 | } 46 | 47 | func (ldi *Iterator) Valid() bool { 48 | return ldi.iterator.Valid() 49 | } 50 | 51 | func (ldi *Iterator) Close() error { 52 | ldi.iterator.Release() 53 | return nil 54 | } 55 | -------------------------------------------------------------------------------- /index/upsidedown/store/metrics/batch.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2015 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package metrics 16 | 17 | import store "github.com/blevesearch/upsidedown_store_api" 18 | 19 | type Batch struct { 20 | s *Store 21 | o store.KVBatch 22 | } 23 | 24 | func (b *Batch) Set(key, val []byte) { 25 | b.o.Set(key, val) 26 | } 27 | 28 | func (b *Batch) Delete(key []byte) { 29 | b.o.Delete(key) 30 | } 31 | 32 | func (b *Batch) Merge(key, val []byte) { 33 | b.s.timerBatchMerge.Time(func() { 34 | b.o.Merge(key, val) 35 | }) 36 | } 37 | 38 | func (b *Batch) Reset() { 39 | b.o.Reset() 40 | } 41 | 42 | func (b *Batch) Close() error { 43 | err := b.o.Close() 44 | b.o = nil 45 | return err 46 | } 47 | -------------------------------------------------------------------------------- /index/upsidedown/store/metrics/iterator.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2015 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package metrics 16 | 17 | import store "github.com/blevesearch/upsidedown_store_api" 18 | 19 | type Iterator struct { 20 | s *Store 21 | o store.KVIterator 22 | } 23 | 24 | func (i *Iterator) Seek(x []byte) { 25 | i.s.timerIteratorSeek.Time(func() { 26 | i.o.Seek(x) 27 | }) 28 | } 29 | 30 | func (i *Iterator) Next() { 31 | i.s.timerIteratorNext.Time(func() { 32 | i.o.Next() 33 | }) 34 | } 35 | 36 | func (i *Iterator) Current() ([]byte, []byte, bool) { 37 | return i.o.Current() 38 | } 39 | 40 | func (i *Iterator) Key() []byte { 41 | return i.o.Key() 42 | } 43 | 44 | func (i *Iterator) Value() []byte { 45 | return i.o.Value() 46 | } 47 | 48 | func (i *Iterator) Valid() bool { 49 | return i.o.Valid() 50 | } 51 | 52 | func (i *Iterator) Close() error { 53 | err := i.o.Close() 54 | if err != nil { 55 | i.s.AddError("Iterator.Close", err, nil) 56 | } 57 | return err 58 | } 59 | -------------------------------------------------------------------------------- /index/upsidedown/store/metrics/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2015 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package metrics 16 | 17 | import ( 18 | "fmt" 19 | 20 | store "github.com/blevesearch/upsidedown_store_api" 21 | ) 22 | 23 | type Writer struct { 24 | s *Store 25 | o store.KVWriter 26 | } 27 | 28 | func (w *Writer) Close() error { 29 | err := w.o.Close() 30 | if err != nil { 31 | w.s.AddError("Writer.Close", err, nil) 32 | } 33 | return err 34 | } 35 | 36 | func (w *Writer) NewBatch() store.KVBatch { 37 | return &Batch{s: w.s, o: w.o.NewBatch()} 38 | } 39 | 40 | func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) { 41 | buf, b, err := w.o.NewBatchEx(options) 42 | if err != nil { 43 | return nil, nil, err 44 | } 45 | return buf, &Batch{s: w.s, o: b}, nil 46 | } 47 | 48 | func (w *Writer) ExecuteBatch(b store.KVBatch) (err error) { 49 | batch, ok := b.(*Batch) 50 | if !ok { 51 | return fmt.Errorf("wrong type of batch") 52 | } 53 | w.s.timerWriterExecuteBatch.Time(func() { 54 | err = w.o.ExecuteBatch(batch.o) 55 | if err != nil { 56 | w.s.AddError("Writer.ExecuteBatch", err, nil) 57 | } 58 | }) 59 | return 60 | } 61 | -------------------------------------------------------------------------------- /index/upsidedown/store/moss/stats.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package moss 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/util" 19 | store "github.com/blevesearch/upsidedown_store_api" 20 | ) 21 | 22 | type stats struct { 23 | s *Store 24 | } 25 | 26 | func (s *stats) statsMap() map[string]interface{} { 27 | ms := map[string]interface{}{} 28 | 29 | var err error 30 | ms["moss"], err = s.s.ms.Stats() 31 | if err != nil { 32 | return ms 33 | } 34 | 35 | if s.s.llstore != nil { 36 | if o, ok := s.s.llstore.(store.KVStoreStats); ok { 37 | ms["kv"] = o.StatsMap() 38 | } 39 | } 40 | 41 | _, exists := ms["kv"] 42 | if !exists && s.s.llstats != nil { 43 | ms["kv"] = s.s.llstats() 44 | } 45 | 46 | if msw, ok := s.s.llstore.(*mossStoreWrapper); ok { 47 | ms["store_histograms"] = msw.histograms() 48 | } 49 | 50 | ms["coll_histograms"] = s.s.ms.Histograms().String() 51 | 52 | return ms 53 | } 54 | 55 | func (s *stats) MarshalJSON() ([]byte, error) { 56 | m := s.statsMap() 57 | return util.MarshalJSON(m) 58 | } 59 | -------------------------------------------------------------------------------- /index/upsidedown/upsidedown.proto: -------------------------------------------------------------------------------- 1 | message BackIndexTermsEntry { 2 | required uint32 field = 1; 3 | repeated string terms = 2; 4 | } 5 | 6 | message BackIndexStoreEntry { 7 | required uint32 field = 1; 8 | repeated uint64 arrayPositions = 2; 9 | } 10 | 11 | message BackIndexRowValue { 12 | repeated BackIndexTermsEntry termsEntries = 1; 13 | repeated BackIndexStoreEntry storedEntries = 2; 14 | } 15 | -------------------------------------------------------------------------------- /index_alias.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package bleve 16 | 17 | // An IndexAlias is a wrapper around one or more 18 | // Index objects. It has two distinct modes of 19 | // operation. 20 | // 1. When it points to a single index, ALL index 21 | // operations are valid and will be passed through 22 | // to the underlying index. 23 | // 2. When it points to more than one index, the only 24 | // valid operation is Search. In this case the 25 | // search will be performed across all the 26 | // underlying indexes and the results merged. 27 | // Calls to Add/Remove/Swap the underlying indexes 28 | // are atomic, so you can safely change the 29 | // underlying Index objects while other components 30 | // are performing operations. 31 | type IndexAlias interface { 32 | Index 33 | 34 | Add(i ...Index) 35 | Remove(i ...Index) 36 | Swap(in, out []Index) 37 | } 38 | -------------------------------------------------------------------------------- /index_meta_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package bleve 16 | 17 | import ( 18 | "os" 19 | "testing" 20 | ) 21 | 22 | func TestIndexMeta(t *testing.T) { 23 | var testIndexPath = "doesnotexit.bleve" 24 | defer func() { 25 | err := os.RemoveAll(testIndexPath) 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | }() 30 | 31 | // open non-existent meta should give an error 32 | _, err := openIndexMeta(testIndexPath) 33 | if err == nil { 34 | t.Errorf("expected error, got nil") 35 | } 36 | 37 | // create meta 38 | im := &indexMeta{Storage: "boltdb"} 39 | err = im.Save(testIndexPath) 40 | if err != nil { 41 | t.Error(err) 42 | } 43 | im = nil 44 | 45 | // open a meta that exists 46 | im, err = openIndexMeta(testIndexPath) 47 | if err != nil { 48 | t.Error(err) 49 | } 50 | if im.Storage != "boltdb" { 51 | t.Errorf("expected storage 'boltdb', got '%s'", im.Storage) 52 | } 53 | 54 | // save a meta that already exists 55 | err = im.Save(testIndexPath) 56 | if err == nil { 57 | t.Errorf("expected error, got nil") 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /mapping/mapping_no_vectors.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build !vectors 16 | // +build !vectors 17 | 18 | package mapping 19 | 20 | func NewVectorFieldMapping() *FieldMapping { 21 | return nil 22 | } 23 | 24 | func NewVectorBase64FieldMapping() *FieldMapping { 25 | return nil 26 | } 27 | 28 | func (fm *FieldMapping) processVector(propertyMightBeVector interface{}, 29 | pathString string, path []string, indexes []uint64, context *walkContext) bool { 30 | return false 31 | } 32 | 33 | func (fm *FieldMapping) processVectorBase64(propertyMightBeVector interface{}, 34 | pathString string, path []string, indexes []uint64, context *walkContext) { 35 | 36 | } 37 | 38 | // ----------------------------------------------------------------------------- 39 | // document validation functions 40 | 41 | func validateFieldMapping(field *FieldMapping, parentName string, 42 | fieldAliasCtx map[string]*FieldMapping) error { 43 | return validateFieldType(field) 44 | } 45 | -------------------------------------------------------------------------------- /mapping/reflect_test.go: -------------------------------------------------------------------------------- 1 | package mapping 2 | 3 | import ( 4 | "reflect" 5 | "testing" 6 | ) 7 | 8 | func TestLookupPropertyPath(t *testing.T) { 9 | tests := []struct { 10 | input interface{} 11 | path string 12 | output interface{} 13 | }{ 14 | { 15 | input: map[string]interface{}{ 16 | "Type": "a", 17 | }, 18 | path: "Type", 19 | output: "a", 20 | }, 21 | { 22 | input: struct { 23 | Type string 24 | }{ 25 | Type: "b", 26 | }, 27 | path: "Type", 28 | output: "b", 29 | }, 30 | { 31 | input: &struct { 32 | Type string 33 | }{ 34 | Type: "b", 35 | }, 36 | path: "Type", 37 | output: "b", 38 | }, 39 | } 40 | 41 | for _, test := range tests { 42 | actual := lookupPropertyPath(test.input, test.path) 43 | if !reflect.DeepEqual(actual, test.output) { 44 | t.Fatalf("expected '%v', got '%v', for path '%s' in %+v", test.output, actual, test.path, test.input) 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /mapping_vector.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build vectors 16 | // +build vectors 17 | 18 | package bleve 19 | 20 | import "github.com/blevesearch/bleve/v2/mapping" 21 | 22 | func NewVectorFieldMapping() *mapping.FieldMapping { 23 | return mapping.NewVectorFieldMapping() 24 | } 25 | 26 | func NewVectorBase64FieldMapping() *mapping.FieldMapping { 27 | return mapping.NewVectorBase64FieldMapping() 28 | } 29 | -------------------------------------------------------------------------------- /numeric/bin.go: -------------------------------------------------------------------------------- 1 | package numeric 2 | 3 | var interleaveMagic = []uint64{ 4 | 0x5555555555555555, 5 | 0x3333333333333333, 6 | 0x0F0F0F0F0F0F0F0F, 7 | 0x00FF00FF00FF00FF, 8 | 0x0000FFFF0000FFFF, 9 | 0x00000000FFFFFFFF, 10 | 0xAAAAAAAAAAAAAAAA, 11 | } 12 | 13 | var interleaveShift = []uint{1, 2, 4, 8, 16} 14 | 15 | // Interleave the first 32 bits of each uint64 16 | // apdated from org.apache.lucene.util.BitUtil 17 | // which was adapted from: 18 | // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN 19 | func Interleave(v1, v2 uint64) uint64 { 20 | v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4] 21 | v1 = (v1 | (v1 << interleaveShift[3])) & interleaveMagic[3] 22 | v1 = (v1 | (v1 << interleaveShift[2])) & interleaveMagic[2] 23 | v1 = (v1 | (v1 << interleaveShift[1])) & interleaveMagic[1] 24 | v1 = (v1 | (v1 << interleaveShift[0])) & interleaveMagic[0] 25 | v2 = (v2 | (v2 << interleaveShift[4])) & interleaveMagic[4] 26 | v2 = (v2 | (v2 << interleaveShift[3])) & interleaveMagic[3] 27 | v2 = (v2 | (v2 << interleaveShift[2])) & interleaveMagic[2] 28 | v2 = (v2 | (v2 << interleaveShift[1])) & interleaveMagic[1] 29 | v2 = (v2 | (v2 << interleaveShift[0])) & interleaveMagic[0] 30 | return (v2 << 1) | v1 31 | } 32 | 33 | // Deinterleave the 32-bit value starting at position 0 34 | // to get the other 32-bit value, shift it by 1 first 35 | func Deinterleave(b uint64) uint64 { 36 | b &= interleaveMagic[0] 37 | b = (b ^ (b >> interleaveShift[0])) & interleaveMagic[1] 38 | b = (b ^ (b >> interleaveShift[1])) & interleaveMagic[2] 39 | b = (b ^ (b >> interleaveShift[2])) & interleaveMagic[3] 40 | b = (b ^ (b >> interleaveShift[3])) & interleaveMagic[4] 41 | b = (b ^ (b >> interleaveShift[4])) & interleaveMagic[5] 42 | return b 43 | } 44 | -------------------------------------------------------------------------------- /numeric/bin_test.go: -------------------------------------------------------------------------------- 1 | package numeric 2 | 3 | import "testing" 4 | 5 | func TestInterleaveDeinterleave(t *testing.T) { 6 | tests := []struct { 7 | v1 uint64 8 | v2 uint64 9 | }{ 10 | {0, 0}, 11 | {1, 1}, 12 | {27, 39}, 13 | {1<<32 - 1, 1<<32 - 1}, // largest that should still work 14 | } 15 | 16 | for _, test := range tests { 17 | i := Interleave(test.v1, test.v2) 18 | gotv1 := Deinterleave(i) 19 | gotv2 := Deinterleave(i >> 1) 20 | if gotv1 != test.v1 { 21 | t.Errorf("expected v1: %d, got %d, interleaved was %x", test.v1, gotv1, i) 22 | } 23 | if gotv2 != test.v2 { 24 | t.Errorf("expected v2: %d, got %d, interleaved was %x", test.v2, gotv2, i) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /numeric/float.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package numeric 16 | 17 | import ( 18 | "math" 19 | ) 20 | 21 | func Float64ToInt64(f float64) int64 { 22 | fasint := int64(math.Float64bits(f)) 23 | if fasint < 0 { 24 | fasint = fasint ^ 0x7fffffffffffffff 25 | } 26 | return fasint 27 | } 28 | 29 | func Int64ToFloat64(i int64) float64 { 30 | if i < 0 { 31 | i ^= 0x7fffffffffffffff 32 | } 33 | return math.Float64frombits(uint64(i)) 34 | } 35 | -------------------------------------------------------------------------------- /registry/index_type.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2015 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package registry 16 | 17 | import ( 18 | "fmt" 19 | 20 | index "github.com/blevesearch/bleve_index_api" 21 | ) 22 | 23 | func RegisterIndexType(name string, constructor IndexTypeConstructor) error { 24 | _, exists := indexTypes[name] 25 | if exists { 26 | return fmt.Errorf("attempted to register duplicate index encoding named '%s'", name) 27 | } 28 | indexTypes[name] = constructor 29 | return nil 30 | } 31 | 32 | type IndexTypeConstructor func(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) 33 | type IndexTypeRegistry map[string]IndexTypeConstructor 34 | 35 | func IndexTypeConstructorByName(name string) IndexTypeConstructor { 36 | return indexTypes[name] 37 | } 38 | 39 | func IndexTypesAndInstances() ([]string, []string) { 40 | var types []string 41 | var instances []string 42 | for name := range indexTypes { 43 | types = append(types, name) 44 | } 45 | return types, instances 46 | } 47 | -------------------------------------------------------------------------------- /scripts/build_children.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Get last child project build number 4 | BUILD_NUM=$(curl -s 'https://api.travis-ci.org/repos/blevesearch/beer-search/builds' | grep -o '^\[{"id":[0-9]*,' | grep -o '[0-9]' | tr -d '\n') 5 | # Restart last child project build 6 | curl -X POST https://api.travis-ci.org/builds/$BUILD_NUM/restart --header "Authorization: token "$AUTH_TOKEN 7 | -------------------------------------------------------------------------------- /scripts/merge-coverprofile.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2016 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build ignore 16 | // +build ignore 17 | 18 | package main 19 | 20 | import ( 21 | "bufio" 22 | "fmt" 23 | "os" 24 | "strconv" 25 | "strings" 26 | ) 27 | 28 | func main() { 29 | 30 | modeline := "" 31 | blocks := map[string]int{} 32 | scanner := bufio.NewScanner(os.Stdin) 33 | for scanner.Scan() { 34 | line := scanner.Text() 35 | if !strings.HasPrefix(line, "mode:") { 36 | lastSpace := strings.LastIndex(line, " ") 37 | prefix := line[0:lastSpace] 38 | suffix := line[lastSpace+1:] 39 | count, err := strconv.Atoi(suffix) 40 | if err != nil { 41 | fmt.Printf("error parsing count: %v", err) 42 | continue 43 | } 44 | existingCount, exists := blocks[prefix] 45 | if exists { 46 | blocks[prefix] = existingCount + count 47 | } else { 48 | blocks[prefix] = count 49 | } 50 | } else if modeline == "" { 51 | modeline = line 52 | } 53 | } 54 | if err := scanner.Err(); err != nil { 55 | fmt.Fprintln(os.Stderr, "reading standard input:", err) 56 | } 57 | 58 | fmt.Println(modeline) 59 | for k, v := range blocks { 60 | fmt.Printf("%s %d\n", k, v) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /scripts/old_build_script.txt: -------------------------------------------------------------------------------- 1 | old build script 2 | # remove old icu 3 | sudo apt-get -y remove libicu48 4 | 5 | # install snappy 6 | sudo apt-get -y install libsnappy-dev 7 | 8 | # install newer icu 9 | curl -o /tmp/icu4c-53_1-RHEL6-x64.tgz http://download.icu-project.org/files/icu4c/53.1/icu4c-53_1-RHEL6-x64.tgz 10 | sudo tar zxvf /tmp/icu4c-53_1-RHEL6-x64.tgz -C / 11 | 12 | # install leveldb 13 | curl -O https://leveldb.googlecode.com/files/leveldb-1.15.0.tar.gz 14 | tar zxvf leveldb-1.15.0.tar.gz 15 | cd leveldb-1.15.0 16 | make 17 | sudo cp --preserve=links libleveldb.* /usr/local/lib 18 | sudo cp -r include/leveldb /usr/local/include/ 19 | sudo ldconfig 20 | cd .. 21 | 22 | #install cld2 23 | cd analysis/token_filters/cld2 24 | svn checkout http://cld2.googlecode.com/svn/trunk/ cld2-read-only 25 | cd cld2-read-only/internal/ 26 | ./compile_libs.sh 27 | sudo cp *.so /usr/local/lib 28 | sudo ldconfig 29 | cd ../../../../.. -------------------------------------------------------------------------------- /scripts/project-code-coverage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "mode: count" > acc.out 4 | for Dir in . $(find ./* -maxdepth 10 -type d | grep -v vendor); 5 | do 6 | if ls $Dir/*.go &> /dev/null; 7 | then 8 | returnval=`go test -coverprofile=profile.out -covermode=count $Dir` 9 | echo ${returnval} 10 | if [[ ${returnval} != *FAIL* ]] 11 | then 12 | if [ -f profile.out ] 13 | then 14 | cat profile.out | grep -v "mode: count" >> acc.out 15 | fi 16 | else 17 | exit 1 18 | fi 19 | fi 20 | done 21 | 22 | # collect integration test coverage 23 | echo "mode: count" > integration-acc.out 24 | INTPACKS=`go list ./... | grep -v vendor | grep -v utils | grep -v 'store/test' | grep -v docs | xargs | sed 's/ /,/g'` 25 | returnval=`go test -coverpkg=$INTPACKS -coverprofile=profile.out -covermode=count ./test` 26 | if [[ ${returnval} != *FAIL* ]] 27 | then 28 | if [ -f profile.out ] 29 | then 30 | cat profile.out | grep -v "mode: count" >> integration-acc.out 31 | fi 32 | else 33 | exit 1 34 | fi 35 | 36 | cat acc.out integration-acc.out | go run scripts/merge-coverprofile.go > merged.out 37 | 38 | if [ -n "$COVERALLS" ] 39 | then 40 | export GIT_BRANCH=$TRAVIS_BRANCH 41 | goveralls -service drone.io -coverprofile=merged.out -repotoken $COVERALLS 42 | fi 43 | 44 | if [ -n "$COVERHTML" ] 45 | then 46 | go tool cover -html=merged.out 47 | fi 48 | 49 | rm -rf ./profile.out 50 | rm -rf ./acc.out 51 | rm -rf ./integration-acc.out 52 | rm -rf ./merged.out 53 | -------------------------------------------------------------------------------- /search/collector/bench_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2016 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package collector 16 | 17 | import ( 18 | "context" 19 | "math/rand" 20 | "strconv" 21 | "testing" 22 | 23 | "github.com/blevesearch/bleve/v2/search" 24 | index "github.com/blevesearch/bleve_index_api" 25 | ) 26 | 27 | type createCollector func() search.Collector 28 | 29 | func benchHelper(numOfMatches int, cc createCollector, b *testing.B) { 30 | matches := make([]*search.DocumentMatch, 0, numOfMatches) 31 | for i := 0; i < numOfMatches; i++ { 32 | matches = append(matches, &search.DocumentMatch{ 33 | IndexInternalID: index.IndexInternalID(strconv.Itoa(i)), 34 | Score: rand.Float64(), 35 | }) 36 | } 37 | 38 | b.ResetTimer() 39 | 40 | for run := 0; run < b.N; run++ { 41 | searcher := &stubSearcher{ 42 | matches: matches, 43 | } 44 | collector := cc() 45 | err := collector.Collect(context.Background(), searcher, &stubReader{}) 46 | if err != nil { 47 | b.Fatal(err) 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /search/explanation.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package search 16 | 17 | import ( 18 | "encoding/json" 19 | "fmt" 20 | "reflect" 21 | 22 | "github.com/blevesearch/bleve/v2/size" 23 | ) 24 | 25 | var reflectStaticSizeExplanation int 26 | 27 | func init() { 28 | var e Explanation 29 | reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size()) 30 | } 31 | 32 | type Explanation struct { 33 | Value float64 `json:"value"` 34 | Message string `json:"message"` 35 | PartialMatch bool `json:"partial_match,omitempty"` 36 | Children []*Explanation `json:"children,omitempty"` 37 | } 38 | 39 | func (expl *Explanation) String() string { 40 | js, err := json.MarshalIndent(expl, "", " ") 41 | if err != nil { 42 | return fmt.Sprintf("error serializing explanation to json: %v", err) 43 | } 44 | return string(js) 45 | } 46 | 47 | func (expl *Explanation) Size() int { 48 | sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr + 49 | len(expl.Message) 50 | 51 | for _, entry := range expl.Children { 52 | sizeInBytes += entry.Size() 53 | } 54 | 55 | return sizeInBytes 56 | } 57 | -------------------------------------------------------------------------------- /search/highlight/highlighter/simple/fragment_scorer_simple.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package simple 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/search" 19 | "github.com/blevesearch/bleve/v2/search/highlight" 20 | ) 21 | 22 | // FragmentScorer will score fragments by how many 23 | // unique terms occur in the fragment with no regard for 24 | // any boost values used in the original query 25 | type FragmentScorer struct { 26 | tlm search.TermLocationMap 27 | } 28 | 29 | func NewFragmentScorer(tlm search.TermLocationMap) *FragmentScorer { 30 | return &FragmentScorer{ 31 | tlm: tlm, 32 | } 33 | } 34 | 35 | func (s *FragmentScorer) Score(f *highlight.Fragment) { 36 | score := 0.0 37 | OUTER: 38 | for _, locations := range s.tlm { 39 | for _, location := range locations { 40 | if location.ArrayPositions.Equals(f.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End { 41 | score += 1.0 42 | // once we find a term in the fragment 43 | // don't care about additional matches 44 | continue OUTER 45 | } 46 | } 47 | } 48 | f.Score = score 49 | } 50 | -------------------------------------------------------------------------------- /search/query/boost.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package query 16 | 17 | import "fmt" 18 | 19 | type Boost float64 20 | 21 | func (b *Boost) Value() float64 { 22 | if b == nil { 23 | return 1.0 24 | } 25 | return float64(*b) 26 | } 27 | 28 | func (b *Boost) GoString() string { 29 | if b == nil { 30 | return "boost unspecified" 31 | } 32 | return fmt.Sprintf("%f", *b) 33 | } 34 | -------------------------------------------------------------------------------- /search/scorer/sqrt_cache.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package scorer 16 | 17 | import ( 18 | "math" 19 | ) 20 | 21 | var SqrtCache []float64 22 | 23 | const MaxSqrtCache = 64 24 | 25 | func init() { 26 | SqrtCache = make([]float64, MaxSqrtCache) 27 | for i := 0; i < MaxSqrtCache; i++ { 28 | SqrtCache[i] = math.Sqrt(float64(i)) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /search/searcher/optimize_knn.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build vectors 16 | // +build vectors 17 | 18 | package searcher 19 | 20 | import ( 21 | "context" 22 | 23 | "github.com/blevesearch/bleve/v2/search" 24 | index "github.com/blevesearch/bleve_index_api" 25 | ) 26 | 27 | func optimizeKNN(ctx context.Context, indexReader index.IndexReader, 28 | qsearchers []search.Searcher) error { 29 | var octx index.VectorOptimizableContext 30 | var err error 31 | 32 | for _, searcher := range qsearchers { 33 | // Only applicable to KNN Searchers. 34 | o, ok := searcher.(index.VectorOptimizable) 35 | if !ok { 36 | continue 37 | } 38 | 39 | octx, err = o.VectorOptimize(ctx, octx) 40 | if err != nil { 41 | return err 42 | } 43 | } 44 | 45 | // No KNN searchers. 46 | if octx == nil { 47 | return nil 48 | } 49 | 50 | // Postings lists and iterators replaced in the pointer to the 51 | // vector reader 52 | return octx.Finish() 53 | } 54 | -------------------------------------------------------------------------------- /search/searcher/optimize_no_knn.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //go:build !vectors 16 | // +build !vectors 17 | 18 | package searcher 19 | 20 | import ( 21 | "context" 22 | 23 | "github.com/blevesearch/bleve/v2/search" 24 | index "github.com/blevesearch/bleve_index_api" 25 | ) 26 | 27 | func optimizeKNN(ctx context.Context, indexReader index.IndexReader, 28 | qsearchers []search.Searcher) error { 29 | // No-op 30 | return nil 31 | } 32 | -------------------------------------------------------------------------------- /search/searcher/ordered_searchers_list.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package searcher 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2/search" 19 | ) 20 | 21 | type OrderedSearcherList []search.Searcher 22 | 23 | // sort.Interface 24 | 25 | func (otrl OrderedSearcherList) Len() int { 26 | return len(otrl) 27 | } 28 | 29 | func (otrl OrderedSearcherList) Less(i, j int) bool { 30 | return otrl[i].Count() < otrl[j].Count() 31 | } 32 | 33 | func (otrl OrderedSearcherList) Swap(i, j int) { 34 | otrl[i], otrl[j] = otrl[j], otrl[i] 35 | } 36 | 37 | type OrderedPositionalSearcherList struct { 38 | searchers []search.Searcher 39 | index []int 40 | } 41 | 42 | // sort.Interface 43 | 44 | func (otrl OrderedPositionalSearcherList) Len() int { 45 | return len(otrl.searchers) 46 | } 47 | 48 | func (otrl OrderedPositionalSearcherList) Less(i, j int) bool { 49 | return otrl.searchers[i].Count() < otrl.searchers[j].Count() 50 | } 51 | 52 | func (otrl OrderedPositionalSearcherList) Swap(i, j int) { 53 | otrl.searchers[i], otrl.searchers[j] = otrl.searchers[j], otrl.searchers[i] 54 | otrl.index[i], otrl.index[j] = otrl.index[j], otrl.index[i] 55 | } 56 | -------------------------------------------------------------------------------- /search/searcher/search_numeric_range_test.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package searcher 16 | 17 | import ( 18 | "reflect" 19 | "testing" 20 | 21 | "github.com/blevesearch/bleve/v2/numeric" 22 | ) 23 | 24 | func TestSplitRange(t *testing.T) { 25 | min := numeric.Float64ToInt64(1.0) 26 | max := numeric.Float64ToInt64(5.0) 27 | ranges := splitInt64Range(min, max, 4) 28 | enumerated := ranges.Enumerate(nil) 29 | if len(enumerated) != 135 { 30 | t.Errorf("expected 135 terms, got %d", len(enumerated)) 31 | } 32 | 33 | } 34 | 35 | func TestIncrementBytes(t *testing.T) { 36 | tests := []struct { 37 | in []byte 38 | out []byte 39 | }{ 40 | { 41 | in: []byte{0}, 42 | out: []byte{1}, 43 | }, 44 | { 45 | in: []byte{0, 0}, 46 | out: []byte{0, 1}, 47 | }, 48 | { 49 | in: []byte{0, 255}, 50 | out: []byte{1, 0}, 51 | }, 52 | } 53 | 54 | for _, test := range tests { 55 | actual := incrementBytes(test.in) 56 | if !reflect.DeepEqual(actual, test.out) { 57 | t.Errorf("expected %#v, got %#v", test.out, actual) 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /test/integration.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package test 16 | 17 | import ( 18 | "github.com/blevesearch/bleve/v2" 19 | ) 20 | 21 | type SearchTest struct { 22 | Search *bleve.SearchRequest `json:"search"` 23 | Result *bleve.SearchResult `json:"result"` 24 | Comment string `json:"comment"` 25 | } 26 | 27 | type SearchTests []*SearchTest 28 | -------------------------------------------------------------------------------- /test/knn/knn_dataset_queries.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blevesearch/bleve/68d10040f2fd7ad8b6912d5d95788a7ae7aa0fbb/test/knn/knn_dataset_queries.zip -------------------------------------------------------------------------------- /test/tests/alias/datasets/shard0/a.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "a" 3 | } -------------------------------------------------------------------------------- /test/tests/alias/datasets/shard0/c.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "c" 3 | } -------------------------------------------------------------------------------- /test/tests/alias/datasets/shard1/b.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "b" 3 | } -------------------------------------------------------------------------------- /test/tests/alias/datasets/shard1/d.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "d" 3 | } -------------------------------------------------------------------------------- /test/tests/alias/mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "default_analyzer": "keyword" 3 | } 4 | -------------------------------------------------------------------------------- /test/tests/alias/searches.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "comment": "match all across shards", 4 | "search": { 5 | "from": 0, 6 | "size": 10, 7 | "sort": ["-_score", "_id"], 8 | "query": { 9 | "match_all": {} 10 | } 11 | }, 12 | "result": { 13 | "total_hits": 4, 14 | "hits": [ 15 | { 16 | "id": "a" 17 | }, 18 | { 19 | "id": "b" 20 | }, 21 | { 22 | "id": "c" 23 | }, 24 | { 25 | "id": "d" 26 | } 27 | ] 28 | } 29 | }, 30 | { 31 | "comment": "search after b (page 2 when size=2)", 32 | "search": { 33 | "from": 0, 34 | "size": 2, 35 | "sort": ["name"], 36 | "search_after": ["b"], 37 | "query": { 38 | "match_all": {} 39 | } 40 | }, 41 | "result": { 42 | "total_hits": 4, 43 | "hits": [ 44 | { 45 | "id": "c" 46 | }, 47 | { 48 | "id": "d" 49 | } 50 | ] 51 | } 52 | }, 53 | { 54 | "comment": "search before c (page 1 when size=2)", 55 | "search": { 56 | "from": 0, 57 | "size": 2, 58 | "sort": ["name"], 59 | "search_before": ["c"], 60 | "query": { 61 | "match_all": {} 62 | } 63 | }, 64 | "result": { 65 | "total_hits": 4, 66 | "hits": [ 67 | { 68 | "id": "a" 69 | }, 70 | { 71 | "id": "b" 72 | } 73 | ] 74 | } 75 | } 76 | ] -------------------------------------------------------------------------------- /test/tests/basic/data/a.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "a", 3 | "name": "marty", 4 | "age": 19, 5 | "title": "mista", 6 | "tags": ["gopher", "belieber"] 7 | } -------------------------------------------------------------------------------- /test/tests/basic/data/b.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "b", 3 | "name": "steve has long & complicated name", 4 | "age": 27, 5 | "birthday": "2001-09-09T01:46:40Z", 6 | "title": "missess" 7 | } -------------------------------------------------------------------------------- /test/tests/basic/data/c.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "c", 3 | "name": "bob walks home", 4 | "age": 64, 5 | "birthday": "2014-05-13T16:53:20Z", 6 | "title": "masta" 7 | } -------------------------------------------------------------------------------- /test/tests/basic/data/d.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "d", 3 | "name": "bobbleheaded wings top the phone", 4 | "age": 72, 5 | "birthday": "2014-05-13T16:53:20Z", 6 | "title": "mizz" 7 | } -------------------------------------------------------------------------------- /test/tests/basic/mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "types": { 3 | "person": { 4 | "properties": { 5 | "name": { 6 | "fields": [ 7 | { 8 | "include_term_vectors": true, 9 | "include_in_all": true, 10 | "index": true, 11 | "store": true, 12 | "analyzer": "en", 13 | "type": "text" 14 | } 15 | ], 16 | "dynamic": true, 17 | "enabled": true 18 | }, 19 | "id": { 20 | "dynamic": false, 21 | "enabled": false 22 | } 23 | } 24 | } 25 | }, 26 | "default_type": "person" 27 | } -------------------------------------------------------------------------------- /test/tests/employee/data/emp10508560.json: -------------------------------------------------------------------------------- 1 | { 2 | "salary": 104561.8, 3 | "_type": "emp", 4 | "name": "Deirdre Reed", 5 | "mutated": 0, 6 | "is_manager": true, 7 | "dept": "Accounts", 8 | "join_date": "2003-05-28T21:29:00", 9 | "manages": { 10 | "team_size": 9, 11 | "reports": [ 12 | "Gallia Julián", 13 | "Duvessa Nicolás", 14 | "Beryl Thomas", 15 | "Deirdre Julián", 16 | "Antonia Gerónimo", 17 | "Ciara Young", 18 | "Riona Richardson IX", 19 | "Severin Jr.", 20 | "Perdita Morgan" 21 | ] 22 | }, 23 | "languages_known": [ 24 | "English", 25 | "Spanish", 26 | "German", 27 | "Italian", 28 | "French", 29 | "Arabic", 30 | "Africans", 31 | "Hindi", 32 | "Vietnamese", 33 | "Urdu", 34 | "Dutch", 35 | "Quechua", 36 | "Japanese", 37 | "Chinese", 38 | "Nepalese", 39 | "Thai", 40 | "Malay" 41 | ], 42 | "emp_id": "10508560", 43 | "email": "deirdre@mcdiabetes.com" 44 | } -------------------------------------------------------------------------------- /test/tests/employee/mapping.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /test/tests/employee/searches.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "comment": "test array position output", 4 | "search": { 5 | "from": 0, 6 | "size": 10, 7 | "sort": ["-_score", "_id"], 8 | "query": { 9 | "field": "manages.reports", 10 | "term": "julián" 11 | }, 12 | "includeLocations": true 13 | }, 14 | "result": { 15 | "total_hits": 1, 16 | "hits": [ 17 | { 18 | "id": "emp10508560", 19 | "locations": { 20 | "manages.reports": { 21 | "julián": [ 22 | { 23 | "pos": 2, 24 | "start": 7, 25 | "end": 14, 26 | "array_positions":[0] 27 | }, 28 | { 29 | "pos": 2, 30 | "start": 8, 31 | "end": 15, 32 | "array_positions":[3] 33 | } 34 | ] 35 | } 36 | } 37 | } 38 | ] 39 | } 40 | } 41 | ] -------------------------------------------------------------------------------- /test/tests/facet/data/a.json: -------------------------------------------------------------------------------- 1 | { 2 | "category": "inventory", 3 | "type": "book", 4 | "rating": 2, 5 | "updated": "2014-11-25" 6 | } -------------------------------------------------------------------------------- /test/tests/facet/data/b.json: -------------------------------------------------------------------------------- 1 | { 2 | "category": "inventory", 3 | "type": "book", 4 | "rating": 7, 5 | "updated": "2013-07-25" 6 | } -------------------------------------------------------------------------------- /test/tests/facet/data/c.json: -------------------------------------------------------------------------------- 1 | { 2 | "category": "inventory", 3 | "type": "book", 4 | "rating": 1, 5 | "updated": "2014-03-03" 6 | } -------------------------------------------------------------------------------- /test/tests/facet/data/d.json: -------------------------------------------------------------------------------- 1 | { 2 | "category": "inventory", 3 | "type": "book", 4 | "rating": 9, 5 | "updated": "2014-09-16" 6 | } -------------------------------------------------------------------------------- /test/tests/facet/data/e.json: -------------------------------------------------------------------------------- 1 | { 2 | "category": "inventory", 3 | "type": "book", 4 | "rating": 5, 5 | "updated": "2014-11-15" 6 | } -------------------------------------------------------------------------------- /test/tests/facet/data/f.json: -------------------------------------------------------------------------------- 1 | { 2 | "category": "inventory", 3 | "type": "movie", 4 | "rating": 3, 5 | "updated": "2017-06-05" 6 | } -------------------------------------------------------------------------------- /test/tests/facet/data/g.json: -------------------------------------------------------------------------------- 1 | { 2 | "category": "inventory", 3 | "type": "movie", 4 | "rating": 9, 5 | "updated": "2011-10-03" 6 | } -------------------------------------------------------------------------------- /test/tests/facet/data/h.json: -------------------------------------------------------------------------------- 1 | { 2 | "category": "inventory", 3 | "type": "movie", 4 | "rating": 9, 5 | "updated": "2019-08-26" 6 | } -------------------------------------------------------------------------------- /test/tests/facet/data/i.json: -------------------------------------------------------------------------------- 1 | { 2 | "category": "inventory", 3 | "type": "movie", 4 | "rating": 1, 5 | "updated": "2014-12-14" 6 | } -------------------------------------------------------------------------------- /test/tests/facet/data/j.json: -------------------------------------------------------------------------------- 1 | { 2 | "category": "inventory", 3 | "type": "game", 4 | "rating": 9, 5 | "updated": "2013-10-20" 6 | } -------------------------------------------------------------------------------- /test/tests/facet/mapping.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /test/tests/fosdem/data/3311@FOSDEM15@fosdem.org.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "From Prolog to Erlang to Haskell to Lisp to TLC and then back to Prolog I have journeyed, and I'd like to share some of the beautiful", 3 | "category": "Word" 4 | } -------------------------------------------------------------------------------- /test/tests/fosdem/data/3492@FOSDEM15@fosdem.org.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "different cats", 3 | "category": "Perl" 4 | } -------------------------------------------------------------------------------- /test/tests/fosdem/data/3496@FOSDEM15@fosdem.org.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "many cats", 3 | "category": "Perl" 4 | } -------------------------------------------------------------------------------- /test/tests/fosdem/data/3505@FOSDEM15@fosdem.org.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "From Prolog to Erlang to Haskell to Lisp to TLC and then back to Prolog I have journeyed, and I'd like to share some of the beautiful", 3 | "category": "Perl" 4 | } -------------------------------------------------------------------------------- /test/tests/fosdem/data/3507@FOSDEM15@fosdem.org.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "From Prolog to Erlang to Haskell to Gel to TLC and then back to Prolog I have journeyed, and I'd like to share some of the beautiful", 3 | "category": "Perl" 4 | } -------------------------------------------------------------------------------- /test/tests/geo/data/amoeba_brewery.json: -------------------------------------------------------------------------------- 1 | {"name":"amoeba brewery","city":"bangalore","state":"KAR","code":"","country":"India","phone":"","website":"","type":"brewery","updated":"2019-09-17 20:00:20","description":"brewery near cb office","address":[],"geo":{"accuracy":"APPROXIMATE","lat":12.97467,"lon":77.60490}} -------------------------------------------------------------------------------- /test/tests/geo/data/brewpub_on_the_green.json: -------------------------------------------------------------------------------- 1 | {"name":"Brewpub-on-the-Green","city":"Fremont","state":"California","code":"","country":"United States","phone":"","website":"","type":"brewery","updated":"2010-07-22 20:00:20","description":"","address":[],"geo":{"accuracy":"APPROXIMATE","lat":37.5483,"lon":-121.989}} -------------------------------------------------------------------------------- /test/tests/geo/data/capital_city_brewing_company.json: -------------------------------------------------------------------------------- 1 | {"name":"Capital City Brewing Company","city":"Washington","state":"District of Columbia","code":"20005","country":"United States","phone":"202.628.2222","website":"http://www.capcitybrew.com","type":"brewery","updated":"2010-07-22 20:00:20","description":"Washington DC's first brewpub since prohibition, Capitol City Brewing Co. opened its doors in 1992. Our first location still stands in Downtown DC, at 11th and H St., NW. Our company policy is to bring the fine craft of brewing to every person who lives and visits our region, as well as treating them to a wonderful meal and a great experience.","address":["1100 New York Ave, NW"],"geo":{"accuracy":"ROOFTOP","lat":38.8999,"lon":-77.0272}} -------------------------------------------------------------------------------- /test/tests/geo/data/communiti_brewery.json: -------------------------------------------------------------------------------- 1 | {"name":"communiti brewery","city":"bangalore","state":"KAR","code":"","country":"India","phone":"","website":"","type":"brewery","updated":"2019-09-17 20:00:20","description":"brewery near cb office","address":[],"geo":{"accuracy":"APPROXIMATE","lat":12.97237,"lon":77.608237}} -------------------------------------------------------------------------------- /test/tests/geo/data/firehouse_grill_brewery.json: -------------------------------------------------------------------------------- 1 | {"name":"Firehouse Grill & Brewery","city":"Sunnyvale","state":"California","code":"94086","country":"United States","phone":"1-408-773-9500","website":"","type":"brewery","updated":"2010-07-22 20:00:20","description":"","address":["111 South Murphy Avenue"],"geo":{"accuracy":"RANGE_INTERPOLATED","lat":37.3775,"lon":-122.03}} -------------------------------------------------------------------------------- /test/tests/geo/data/hook_ladder_brewing_company.json: -------------------------------------------------------------------------------- 1 | {"name":"Hook & Ladder Brewing Company","city":"Silver Spring","state":"Maryland","code":"20910","country":"United States","phone":"301.565.4522","website":"http://www.hookandladderbeer.com","type":"brewery","updated":"2010-07-22 20:00:20","description":"At Hook & Ladder Brewing we believe in great beer in the company of good friends, so we bring you three great beers for your drinking pleasure (please drink responsibly). Each of our beers is carefully crafted with the finest quality ingredients for a distinctive taste we know you will enjoy. Try one tonight, you just might get hooked. Through our own experiences in the fire and rescue service we have chosen the Hook & Ladder as a symbol of pride and honor to pay tribute to the brave men and women who serve and protect our communities.","address":["8113 Fenton St."],"geo":{"accuracy":"ROOFTOP","lat":38.9911,"lon":-77.0237}} -------------------------------------------------------------------------------- /test/tests/geo/data/jack_s_brewing.json: -------------------------------------------------------------------------------- 1 | {"name":"Jack's Brewing","city":"Fremont","state":"California","code":"94538","country":"United States","phone":"1-510-796-2036","website":"","type":"brewery","updated":"2010-07-22 20:00:20","description":"","address":["39176 Argonaut Way"],"geo":{"accuracy":"ROOFTOP","lat":37.5441,"lon":-121.988}} -------------------------------------------------------------------------------- /test/tests/geo/data/social_brewery.json: -------------------------------------------------------------------------------- 1 | {"name":"social brewery","city":"bangalore","state":"KAR","code":"","country":"India","phone":"","website":"","type":"brewery","updated":"2019-09-17 20:00:20","description":"brewery near cb office, but outside the polygon","address":[],"geo":{"accuracy":"APPROXIMATE","lat":12.9736946,"lon":77.6042133}} -------------------------------------------------------------------------------- /test/tests/geo/data/sweet_water_tavern_and_brewery.json: -------------------------------------------------------------------------------- 1 | {"name":"Sweet Water Tavern and Brewery","city":"Sterling","state":"Virginia","code":"20121","country":"United States","phone":"(703) 449-1108","website":"http://www.greatamericanrestaurants.com/sweetMainSter/index.htm","type":"brewery","updated":"2010-07-22 20:00:20","description":"","address":["45980 Waterview Plaza"],"geo":{"accuracy":"RANGE_INTERPOLATED","lat":39.0324,"lon":-77.4097}} -------------------------------------------------------------------------------- /test/tests/geo/mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "types": { 3 | "brewery": { 4 | "properties": { 5 | "name": { 6 | "fields": [ 7 | { 8 | "include_term_vectors": true, 9 | "include_in_all": true, 10 | "index": true, 11 | "store": true, 12 | "analyzer": "keyword", 13 | "type": "text" 14 | } 15 | ], 16 | "dynamic": true, 17 | "enabled": true 18 | }, 19 | "geo": { 20 | "fields": [ 21 | { 22 | "include_term_vectors": true, 23 | "include_in_all": true, 24 | "index": true, 25 | "store": true, 26 | "type": "geopoint" 27 | } 28 | ], 29 | "dynamic": true, 30 | "enabled": true 31 | } 32 | } 33 | } 34 | }, 35 | "default_type": "brewery" 36 | } 37 | -------------------------------------------------------------------------------- /test/tests/geoshapes/data/circle_halairport.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hal airpork circular region", 3 | "city": "bangalore", 4 | "type": "geoshapes", 5 | "description": "circle covering the hal airport", 6 | "region": { 7 | "type": "Circle", 8 | "coordinates": [ 9 | 77.6698637008667, 10 | 12.951865687866821 11 | ], 12 | "radius": "2.4km" 13 | } 14 | } -------------------------------------------------------------------------------- /test/tests/geoshapes/data/envelope_brockwell_park.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "brockwell park envelope", 3 | "city": "london", 4 | "type": "geoshapes", 5 | "description": "brockwell park envelope", 6 | "region": { 7 | "type": "envelope", 8 | "coordinates": [ 9 | [ 10 | -0.11278152465820314, 11 | 51.44579626059569 12 | ], 13 | [ 14 | -0.10037899017333984, 15 | 51.45566490761856 16 | ] 17 | ] 18 | } 19 | } -------------------------------------------------------------------------------- /test/tests/geoshapes/data/linestring_putney_bridge.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "linestring for putney bridge", 3 | "city": "london", 4 | "type": "geoshapes", 5 | "description": "linestring for putney bridge", 6 | "region": { 7 | "type": "linestring", 8 | "coordinates": [ 9 | [ 10 | -0.21183013916015625, 11 | 51.46791083061189 12 | ], 13 | [ 14 | -0.21431922912597656, 15 | 51.465504685939706 16 | ] 17 | ] 18 | } 19 | } -------------------------------------------------------------------------------- /test/tests/geoshapes/data/multilinestring_old_airport_road.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "road routes", 3 | "city": "bangalore", 4 | "type": "geoshapes", 5 | "description": "multilinestrings approximating the roads indiranagar 100ft and old airport port road", 6 | "region": { 7 | "type": "multilinestring", 8 | "coordinates": [ 9 | [ 10 | [ 11 | 77.64081001281738, 12 | 12.983398626256326 13 | ], 14 | [ 15 | 77.64166831970213, 16 | 12.960648472679763 17 | ] 18 | ], 19 | [ [ 20 | 77.64192581176758, 21 | 12.960564828571133 22 | ], 23 | [ 24 | 77.66990661621094, 25 | 12.958390071883693 26 | ] 27 | ], 28 | [ [ 29 | 77.67016410827637, 30 | 12.958055492245812 31 | ], 32 | [ 33 | 77.68106460571289, 34 | 12.954626025039444 35 | ] 36 | ], 37 | [ [ 38 | 77.68149375915527, 39 | 12.954542378907867 40 | ], 41 | [ 42 | 77.7011489868164, 43 | 12.957219041184294 44 | ] 45 | ] 46 | ] 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /test/tests/geoshapes/data/multipoint_blr_stadiums.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "multipoints for stadiums", 3 | "city": "bangalore", 4 | "type": "geoshapes", 5 | "description": "contains 3 points", 6 | "region": { 7 | "type": "multipoint", 8 | "coordinates": [ 9 | [ 10 | 77.5929594039917, 11 | 12.969347306502671 12 | ], 13 | [ 14 | 77.6004695892334, 15 | 12.979007674139009 16 | ], 17 | [ 18 | 77.60068416595459, 19 | 12.961735843534306 20 | ] 21 | ] 22 | } 23 | } -------------------------------------------------------------------------------- /test/tests/geoshapes/data/point_museum_of_london.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "geopoint for the museum of london", 3 | "city": "london", 4 | "type": "geoshapes", 5 | "description": "geopoint for the museum of london", 6 | "region": { 7 | "type": "point", 8 | "coordinates": [ 9 | -0.09613037109375, 10 | 51.51803669675129 11 | ] 12 | } 13 | } -------------------------------------------------------------------------------- /test/tests/geoshapes/data/polygon_cubbonpark.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cubbon park polygon", 3 | "city": "bangalore", 4 | "type": "geoshapes", 5 | "description": "polygon inside cubbon park", 6 | "region": { 7 | "type": "Polygon", 8 | "coordinates": [ 9 | [ 10 | [ 11 | 77.58894681930542, 12 | 12.976498523818783 13 | ], 14 | [ 15 | 77.58677959442139, 16 | 12.974533005048169 17 | ], 18 | [ 19 | 77.5879168510437, 20 | 12.971333776381767 21 | ], 22 | [ 23 | 77.58849620819092, 24 | 12.96800904416803 25 | ], 26 | [ 27 | 77.59371042251587, 28 | 12.972128359891645 29 | ], 30 | [ 31 | 77.59512662887573, 32 | 12.973842978816679 33 | ], 34 | [ 35 | 77.59253025054932, 36 | 12.976853988320428 37 | ], 38 | [ 39 | 77.58894681930542, 40 | 12.976498523818783 41 | ] 42 | ] 43 | ] 44 | } 45 | } -------------------------------------------------------------------------------- /test/tests/geoshapes/mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "types": { 3 | "geoshapes": { 4 | "properties": { 5 | "name": { 6 | "fields": [ 7 | { 8 | "include_term_vectors": true, 9 | "include_in_all": true, 10 | "index": true, 11 | "store": true, 12 | "analyzer": "keyword", 13 | "type": "text" 14 | } 15 | ], 16 | "dynamic": true, 17 | "enabled": true 18 | }, 19 | "region": { 20 | "fields": [ 21 | { 22 | "include_term_vectors": true, 23 | "include_in_all": true, 24 | "index": true, 25 | "store": true, 26 | "type": "geoshape" 27 | } 28 | ], 29 | "dynamic": true, 30 | "enabled": true 31 | } 32 | } 33 | } 34 | }, 35 | "default_type": "geoshapes" 36 | } -------------------------------------------------------------------------------- /test/tests/phrase/data/a.json: -------------------------------------------------------------------------------- 1 | { 2 | "body": "Twenty Thousand Leagues Under The Sea" 3 | } -------------------------------------------------------------------------------- /test/tests/phrase/data/b.json: -------------------------------------------------------------------------------- 1 | { 2 | "body": ["bad call", "defenseless receiver"] 3 | } -------------------------------------------------------------------------------- /test/tests/phrase/mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "types": { 3 | "book": { 4 | "properties": { 5 | "body": { 6 | "fields": [ 7 | { 8 | "include_term_vectors": true, 9 | "include_in_all": true, 10 | "index": true, 11 | "store": true, 12 | "analyzer": "en", 13 | "type": "text" 14 | } 15 | ], 16 | "dynamic": true, 17 | "enabled": true 18 | } 19 | } 20 | } 21 | }, 22 | "default_type": "book" 23 | } -------------------------------------------------------------------------------- /test/tests/sort/data/a.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "a", 3 | "name": "marty", 4 | "age": 19, 5 | "born": "2014-11-25", 6 | "title": "mista", 7 | "tags": ["gopher", "belieber"] 8 | } 9 | -------------------------------------------------------------------------------- /test/tests/sort/data/b.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "b", 3 | "name": "steve", 4 | "age": 21, 5 | "born": "2000-09-11", 6 | "title": "zebra", 7 | "tags": ["thought-leader", "futurist"] 8 | } 9 | -------------------------------------------------------------------------------- /test/tests/sort/data/c.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "c", 3 | "name": "aster", 4 | "age": 21, 5 | "born": "1954-02-02", 6 | "title": "blogger", 7 | "tags": ["red", "blue", "green"] 8 | } 9 | -------------------------------------------------------------------------------- /test/tests/sort/data/d.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "d", 3 | "age": 65, 4 | "born": "1978-12-02", 5 | "title": "agent d is desperately trying out to be successful rapster!", 6 | "tags": ["cats"] 7 | } 8 | -------------------------------------------------------------------------------- /test/tests/sort/data/e.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "e", 3 | "name": "nancy", 4 | "born": "1954-10-22", 5 | "title": "rapstar nancy rapster", 6 | "tags": ["pain"] 7 | } 8 | -------------------------------------------------------------------------------- /test/tests/sort/data/f.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "f", 3 | "name": "frank", 4 | "age": 1, 5 | "title": "frank the taxman of cb, Rapster!", 6 | "tags": ["vitamin","purple"] 7 | } 8 | -------------------------------------------------------------------------------- /test/tests/sort/mapping.json: -------------------------------------------------------------------------------- 1 | { 2 | 3 | } 4 | -------------------------------------------------------------------------------- /util/json.go: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023 Couchbase, Inc. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package util 16 | 17 | import ( 18 | "encoding/json" 19 | ) 20 | 21 | // Should only be overwritten during process init()'ialization. 22 | var ( 23 | MarshalJSON = json.Marshal 24 | UnmarshalJSON = json.Unmarshal 25 | ) 26 | --------------------------------------------------------------------------------