├── .dockerignore ├── .github ├── CODEOWNERS └── workflows │ ├── create-release.yaml │ └── tests.yaml ├── .gitignore ├── Dockerfile.full ├── Dockerfile.local-vectordb ├── Dockerfile.minimal ├── LICENSE ├── README.md ├── adapters └── repos │ └── extensions_weaviate_module.go ├── client └── client.go ├── compoundsplitting ├── dictionary.go ├── noop_splitter.go ├── splitter.go └── splitter_test.go ├── contextionary ├── contextionary.pb.go ├── contextionary.proto ├── core │ ├── annoyindex │ │ ├── annoy_test.go │ │ ├── annoygomodule.h │ │ ├── annoygomodule_wrap.cxx │ │ ├── annoyindex.go │ │ ├── annoylib.h │ │ └── kissrandom.h │ ├── centroid.go │ ├── centroid_test.go │ ├── certainty.go │ ├── combined.go │ ├── combined_simple_test.go │ ├── component_test.go │ ├── contextionary.go │ ├── generator │ │ ├── cmd │ │ │ └── generator.go │ │ └── generator.go │ ├── indices_test.go │ ├── memory_index.go │ ├── mmapped.go │ ├── similar_words.go │ ├── similar_words_test.go │ ├── stopwords │ │ └── detector.go │ ├── vector.go │ └── wordlist.go └── schema │ ├── contextionary.go │ ├── schema_search.go │ ├── schema_search_params.go │ ├── schema_search_params_test.go │ └── schema_search_test.go ├── errors └── errors.go ├── extensions ├── extension.go ├── looker_upper.go ├── looker_upper_test.go ├── storer.go └── storer_test.go ├── gen_proto_code.sh ├── go.mod ├── go.sum ├── logparser └── parse.go ├── main └── splitter_preprocessor.go ├── prepare_docker_buildx.sh ├── preprocessing ├── dictionary_pre_processing.go ├── dictionary_pre_processing_test.go ├── hunspell.go └── hunspell_test.go ├── server ├── api.go ├── config │ └── config.go ├── contextionary.go ├── corpus_vectorizer.go ├── corpus_vectorizer_test.go ├── grpc_error.go ├── server.go ├── splitter.go ├── splitter_test.go ├── weight_manipulator.go └── weight_manipulator_test.go ├── test ├── compoundsplitting │ ├── contextionary.idx │ ├── nl_NL.aff │ ├── nl_NL.dic │ └── pre_processed_splitter_dict.csv ├── journey.sh └── journey │ ├── Dockerfile │ ├── docker-compose.yml │ ├── go.mod │ ├── go.sum │ └── journey_test.go └── tools ├── dev ├── .gitignore ├── contextionary-playground │ ├── .gitignore │ ├── class_vectors │ │ ├── elastic.go │ │ ├── main.go │ │ ├── search.go │ │ ├── stopwords.go │ │ └── texts.go │ ├── comparison │ │ └── main.go │ ├── main.go │ └── schema │ │ └── main.go ├── en_test-vectors-small.txt.bz2 ├── gen_simple_contextionary.sh ├── run.sh └── stopwords.json ├── download_contextionary.sh ├── native_build_contextionary.sh ├── preprocess_splitter_dict.sh └── preprocess_splitter_dict_native_build.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | data/ 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/workflows/create-release.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/.github/workflows/create-release.yaml -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/.github/workflows/tests.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | data/ 2 | local-vectordb/ 3 | 4 | .idea 5 | 6 | vendor/ -------------------------------------------------------------------------------- /Dockerfile.full: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/Dockerfile.full -------------------------------------------------------------------------------- /Dockerfile.local-vectordb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/Dockerfile.local-vectordb -------------------------------------------------------------------------------- /Dockerfile.minimal: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/Dockerfile.minimal -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/README.md -------------------------------------------------------------------------------- /adapters/repos/extensions_weaviate_module.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/adapters/repos/extensions_weaviate_module.go -------------------------------------------------------------------------------- /client/client.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/client/client.go -------------------------------------------------------------------------------- /compoundsplitting/dictionary.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/compoundsplitting/dictionary.go -------------------------------------------------------------------------------- /compoundsplitting/noop_splitter.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/compoundsplitting/noop_splitter.go -------------------------------------------------------------------------------- /compoundsplitting/splitter.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/compoundsplitting/splitter.go -------------------------------------------------------------------------------- /compoundsplitting/splitter_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/compoundsplitting/splitter_test.go -------------------------------------------------------------------------------- /contextionary/contextionary.pb.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/contextionary.pb.go -------------------------------------------------------------------------------- /contextionary/contextionary.proto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/contextionary.proto -------------------------------------------------------------------------------- /contextionary/core/annoyindex/annoy_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/annoyindex/annoy_test.go -------------------------------------------------------------------------------- /contextionary/core/annoyindex/annoygomodule.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/annoyindex/annoygomodule.h -------------------------------------------------------------------------------- /contextionary/core/annoyindex/annoygomodule_wrap.cxx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/annoyindex/annoygomodule_wrap.cxx -------------------------------------------------------------------------------- /contextionary/core/annoyindex/annoyindex.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/annoyindex/annoyindex.go -------------------------------------------------------------------------------- /contextionary/core/annoyindex/annoylib.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/annoyindex/annoylib.h -------------------------------------------------------------------------------- /contextionary/core/annoyindex/kissrandom.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/annoyindex/kissrandom.h -------------------------------------------------------------------------------- /contextionary/core/centroid.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/centroid.go -------------------------------------------------------------------------------- /contextionary/core/centroid_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/centroid_test.go -------------------------------------------------------------------------------- /contextionary/core/certainty.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/certainty.go -------------------------------------------------------------------------------- /contextionary/core/combined.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/combined.go -------------------------------------------------------------------------------- /contextionary/core/combined_simple_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/combined_simple_test.go -------------------------------------------------------------------------------- /contextionary/core/component_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/component_test.go -------------------------------------------------------------------------------- /contextionary/core/contextionary.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/contextionary.go -------------------------------------------------------------------------------- /contextionary/core/generator/cmd/generator.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/generator/cmd/generator.go -------------------------------------------------------------------------------- /contextionary/core/generator/generator.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/generator/generator.go -------------------------------------------------------------------------------- /contextionary/core/indices_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/indices_test.go -------------------------------------------------------------------------------- /contextionary/core/memory_index.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/memory_index.go -------------------------------------------------------------------------------- /contextionary/core/mmapped.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/mmapped.go -------------------------------------------------------------------------------- /contextionary/core/similar_words.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/similar_words.go -------------------------------------------------------------------------------- /contextionary/core/similar_words_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/similar_words_test.go -------------------------------------------------------------------------------- /contextionary/core/stopwords/detector.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/stopwords/detector.go -------------------------------------------------------------------------------- /contextionary/core/vector.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/vector.go -------------------------------------------------------------------------------- /contextionary/core/wordlist.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/core/wordlist.go -------------------------------------------------------------------------------- /contextionary/schema/contextionary.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/schema/contextionary.go -------------------------------------------------------------------------------- /contextionary/schema/schema_search.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/schema/schema_search.go -------------------------------------------------------------------------------- /contextionary/schema/schema_search_params.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/schema/schema_search_params.go -------------------------------------------------------------------------------- /contextionary/schema/schema_search_params_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/schema/schema_search_params_test.go -------------------------------------------------------------------------------- /contextionary/schema/schema_search_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/contextionary/schema/schema_search_test.go -------------------------------------------------------------------------------- /errors/errors.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/errors/errors.go -------------------------------------------------------------------------------- /extensions/extension.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/extensions/extension.go -------------------------------------------------------------------------------- /extensions/looker_upper.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/extensions/looker_upper.go -------------------------------------------------------------------------------- /extensions/looker_upper_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/extensions/looker_upper_test.go -------------------------------------------------------------------------------- /extensions/storer.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/extensions/storer.go -------------------------------------------------------------------------------- /extensions/storer_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/extensions/storer_test.go -------------------------------------------------------------------------------- /gen_proto_code.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/gen_proto_code.sh -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/go.mod -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/go.sum -------------------------------------------------------------------------------- /logparser/parse.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/logparser/parse.go -------------------------------------------------------------------------------- /main/splitter_preprocessor.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/main/splitter_preprocessor.go -------------------------------------------------------------------------------- /prepare_docker_buildx.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/prepare_docker_buildx.sh -------------------------------------------------------------------------------- /preprocessing/dictionary_pre_processing.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/preprocessing/dictionary_pre_processing.go -------------------------------------------------------------------------------- /preprocessing/dictionary_pre_processing_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/preprocessing/dictionary_pre_processing_test.go -------------------------------------------------------------------------------- /preprocessing/hunspell.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/preprocessing/hunspell.go -------------------------------------------------------------------------------- /preprocessing/hunspell_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/preprocessing/hunspell_test.go -------------------------------------------------------------------------------- /server/api.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/api.go -------------------------------------------------------------------------------- /server/config/config.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/config/config.go -------------------------------------------------------------------------------- /server/contextionary.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/contextionary.go -------------------------------------------------------------------------------- /server/corpus_vectorizer.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/corpus_vectorizer.go -------------------------------------------------------------------------------- /server/corpus_vectorizer_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/corpus_vectorizer_test.go -------------------------------------------------------------------------------- /server/grpc_error.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/grpc_error.go -------------------------------------------------------------------------------- /server/server.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/server.go -------------------------------------------------------------------------------- /server/splitter.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/splitter.go -------------------------------------------------------------------------------- /server/splitter_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/splitter_test.go -------------------------------------------------------------------------------- /server/weight_manipulator.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/weight_manipulator.go -------------------------------------------------------------------------------- /server/weight_manipulator_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/server/weight_manipulator_test.go -------------------------------------------------------------------------------- /test/compoundsplitting/contextionary.idx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/test/compoundsplitting/contextionary.idx -------------------------------------------------------------------------------- /test/compoundsplitting/nl_NL.aff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/test/compoundsplitting/nl_NL.aff -------------------------------------------------------------------------------- /test/compoundsplitting/nl_NL.dic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/test/compoundsplitting/nl_NL.dic -------------------------------------------------------------------------------- /test/compoundsplitting/pre_processed_splitter_dict.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/test/compoundsplitting/pre_processed_splitter_dict.csv -------------------------------------------------------------------------------- /test/journey.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/test/journey.sh -------------------------------------------------------------------------------- /test/journey/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/test/journey/Dockerfile -------------------------------------------------------------------------------- /test/journey/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/test/journey/docker-compose.yml -------------------------------------------------------------------------------- /test/journey/go.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/test/journey/go.mod -------------------------------------------------------------------------------- /test/journey/go.sum: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/test/journey/go.sum -------------------------------------------------------------------------------- /test/journey/journey_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/test/journey/journey_test.go -------------------------------------------------------------------------------- /tools/dev/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/.gitignore -------------------------------------------------------------------------------- /tools/dev/contextionary-playground/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/contextionary-playground/.gitignore -------------------------------------------------------------------------------- /tools/dev/contextionary-playground/class_vectors/elastic.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/contextionary-playground/class_vectors/elastic.go -------------------------------------------------------------------------------- /tools/dev/contextionary-playground/class_vectors/main.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/contextionary-playground/class_vectors/main.go -------------------------------------------------------------------------------- /tools/dev/contextionary-playground/class_vectors/search.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/contextionary-playground/class_vectors/search.go -------------------------------------------------------------------------------- /tools/dev/contextionary-playground/class_vectors/stopwords.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/contextionary-playground/class_vectors/stopwords.go -------------------------------------------------------------------------------- /tools/dev/contextionary-playground/class_vectors/texts.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/contextionary-playground/class_vectors/texts.go -------------------------------------------------------------------------------- /tools/dev/contextionary-playground/comparison/main.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/contextionary-playground/comparison/main.go -------------------------------------------------------------------------------- /tools/dev/contextionary-playground/main.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/contextionary-playground/main.go -------------------------------------------------------------------------------- /tools/dev/contextionary-playground/schema/main.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/contextionary-playground/schema/main.go -------------------------------------------------------------------------------- /tools/dev/en_test-vectors-small.txt.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/en_test-vectors-small.txt.bz2 -------------------------------------------------------------------------------- /tools/dev/gen_simple_contextionary.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/gen_simple_contextionary.sh -------------------------------------------------------------------------------- /tools/dev/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/run.sh -------------------------------------------------------------------------------- /tools/dev/stopwords.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/dev/stopwords.json -------------------------------------------------------------------------------- /tools/download_contextionary.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/download_contextionary.sh -------------------------------------------------------------------------------- /tools/native_build_contextionary.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/native_build_contextionary.sh -------------------------------------------------------------------------------- /tools/preprocess_splitter_dict.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/preprocess_splitter_dict.sh -------------------------------------------------------------------------------- /tools/preprocess_splitter_dict_native_build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/weaviate/contextionary/HEAD/tools/preprocess_splitter_dict_native_build.sh --------------------------------------------------------------------------------