├── .github ├── CODEOWNERS ├── PULL_REQUEST_TEMPLATE.md ├── draft-release-notes-config.yml └── workflows │ ├── CI.yml │ ├── add-untriaged.yml │ ├── auto-release.yml │ ├── backport.yml │ ├── backwards_compatibility_tests_workflow.yml │ ├── changelog_verifier.yml │ ├── check-workflow-events.yml │ ├── copy-linked-issue-labels.yml │ ├── delete_backport_branch.yml │ ├── draft-release-notes-workflow.yml │ ├── links.yml │ ├── maven-publish.yml │ ├── test_aggregations.yml │ └── test_security.yml ├── .gitignore ├── .idea ├── copyright │ ├── SPDX_ALv2.xml │ └── profiles_settings.xml └── runConfigurations │ ├── DebugNeuralSearch.xml │ ├── Run_Neural_Search.xml │ └── Run_With_Debug_Port.xml ├── .whitesource ├── ADMINS.md ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DEVELOPER_GUIDE.md ├── LICENSE ├── MAINTAINERS.md ├── NOTICE ├── README.md ├── RELEASING.md ├── SECURITY.md ├── TRIAGING.md ├── build.gradle ├── codecov.yml ├── formatter ├── formatterConfig.xml └── license-header.txt ├── gradle.properties ├── gradle ├── formatting.gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── lombok.config ├── qa ├── build.gradle ├── restart-upgrade │ ├── build.gradle │ └── src │ │ └── test │ │ ├── java │ │ └── org │ │ │ └── opensearch │ │ │ └── neuralsearch │ │ │ └── bwc │ │ │ └── restart │ │ │ ├── AbstractRestartUpgradeRestTestCase.java │ │ │ ├── BatchIngestionIT.java │ │ │ ├── HybridSearchIT.java │ │ │ ├── HybridSearchWithRescoreIT.java │ │ │ ├── KnnRadialSearchIT.java │ │ │ ├── MultiModalSearchIT.java │ │ │ ├── NeuralQueryEnricherProcessorIT.java │ │ │ ├── NeuralSparseSearchIT.java │ │ │ ├── NeuralSparseTwoPhaseProcessorIT.java │ │ │ ├── RestNeuralStatsActionIT.java │ │ │ ├── SemanticSearchIT.java │ │ │ └── TextChunkingProcessorIT.java │ │ └── resources │ │ └── processor │ │ ├── ChunkingIndexSettings.json │ │ ├── CreateModelGroupRequestBody.json │ │ ├── IndexMappingMultipleShard.json │ │ ├── IndexMappingSingleShard.json │ │ ├── NeuralSparseTwoPhaseProcessorConfiguration.json │ │ ├── PipelineConfiguration.json │ │ ├── PipelineForSparseEncodingProcessorConfiguration.json │ │ ├── PipelineForTextChunkingProcessorConfiguration.json │ │ ├── PipelineForTextImageProcessorConfiguration.json │ │ ├── SearchRequestPipelineConfiguration.json │ │ ├── SparseIndexMappings.json │ │ ├── UploadModelRequestBody.json │ │ └── UploadSparseEncodingModelRequestBody.json └── rolling-upgrade │ ├── build.gradle │ └── src │ └── test │ ├── java │ └── org │ │ └── opensearch │ │ └── neuralsearch │ │ └── bwc │ │ └── rolling │ │ ├── AbstractRollingUpgradeTestCase.java │ │ ├── BatchIngestionIT.java │ │ ├── HybridSearchIT.java │ │ ├── HybridSearchRelevancyIT.java │ │ ├── HybridSearchWithRescoreIT.java │ │ ├── KnnRadialSearchIT.java │ │ ├── MultiModalSearchIT.java │ │ ├── NeuralQueryEnricherProcessorIT.java │ │ ├── NeuralSparseSearchIT.java │ │ ├── NeuralSparseTwoPhaseProcessorIT.java │ │ ├── RestNeuralStatsActionIT.java │ │ ├── SemanticSearchIT.java │ │ └── TextChunkingProcessorIT.java │ └── resources │ └── processor │ ├── ChunkingIndexSettings.json │ ├── CreateModelGroupRequestBody.json │ ├── IndexMappings.json │ ├── NeuralSparseTwoPhaseProcessorConfiguration.json │ ├── PipelineConfiguration.json │ ├── PipelineForSparseEncodingProcessorConfiguration.json │ ├── PipelineForTextChunkingProcessorConfiguration.json │ ├── PipelineForTextImageProcessorConfiguration.json │ ├── SearchRequestPipelineConfiguration.json │ ├── SparseIndexMappings.json │ ├── UploadModelRequestBody.json │ └── UploadSparseEncodingModelRequestBody.json ├── release-notes ├── opensearch-neural-search.release-notes-2.10.0.0.md ├── opensearch-neural-search.release-notes-2.11.0.0.md ├── opensearch-neural-search.release-notes-2.12.0.0.md ├── opensearch-neural-search.release-notes-2.13.0.0.md ├── opensearch-neural-search.release-notes-2.14.0.0.md ├── opensearch-neural-search.release-notes-2.15.0.0.md ├── opensearch-neural-search.release-notes-2.16.0.0.md ├── opensearch-neural-search.release-notes-2.17.0.0.md ├── opensearch-neural-search.release-notes-2.18.0.0.md ├── opensearch-neural-search.release-notes-2.19.0.0.md ├── opensearch-neural-search.release-notes-2.4.0.0.md ├── opensearch-neural-search.release-notes-2.4.1.0.md ├── opensearch-neural-search.release-notes-2.5.0.0.md ├── opensearch-neural-search.release-notes-2.6.0.0.md ├── opensearch-neural-search.release-notes-2.7.0.0.md ├── opensearch-neural-search.release-notes-2.8.0.0.md ├── opensearch-neural-search.release-notes-2.9.0.0.md ├── opensearch-neural-search.release-notes-3.0.0.0-alpha1.md ├── opensearch-neural-search.release-notes-3.0.0.0-beta1.md └── opensearch-neural-search.release-notes-3.0.0.0.md ├── repositories.gradle ├── settings.gradle └── src ├── main ├── java │ └── org │ │ └── opensearch │ │ └── neuralsearch │ │ ├── common │ │ ├── MinClusterVersionUtil.java │ │ └── VectorUtil.java │ │ ├── constants │ │ ├── MappingConstants.java │ │ ├── SemanticFieldConstants.java │ │ └── SemanticInfoFieldConstants.java │ │ ├── executors │ │ ├── HybridQueryExecutor.java │ │ ├── HybridQueryExecutorCollector.java │ │ ├── HybridQueryExecutorCollectorManager.java │ │ ├── HybridQueryRewriteCollectorManager.java │ │ └── HybridQueryScoreSupplierCollectorManager.java │ │ ├── highlight │ │ ├── SemanticHighlighter.java │ │ ├── SemanticHighlighterEngine.java │ │ └── extractor │ │ │ ├── BooleanQueryTextExtractor.java │ │ │ ├── HybridQueryTextExtractor.java │ │ │ ├── NestedQueryTextExtractor.java │ │ │ ├── NeuralQueryTextExtractor.java │ │ │ ├── QueryTextExtractor.java │ │ │ ├── QueryTextExtractorRegistry.java │ │ │ └── TermQueryTextExtractor.java │ │ ├── mapper │ │ ├── SemanticFieldMapper.java │ │ └── dto │ │ │ └── SemanticParameters.java │ │ ├── mappingtransformer │ │ ├── SemanticInfoConfigBuilder.java │ │ └── SemanticMappingTransformer.java │ │ ├── ml │ │ └── MLCommonsClientAccessor.java │ │ ├── plugin │ │ └── NeuralSearch.java │ │ ├── processor │ │ ├── AbstractScoreHybridizationProcessor.java │ │ ├── CompoundTopDocs.java │ │ ├── ExplanationResponseProcessor.java │ │ ├── InferenceProcessor.java │ │ ├── InferenceRequest.java │ │ ├── MapInferenceRequest.java │ │ ├── NeuralQueryEnricherProcessor.java │ │ ├── NeuralSparseTwoPhaseProcessor.java │ │ ├── NormalizationExecuteDTO.java │ │ ├── NormalizationProcessor.java │ │ ├── NormalizationProcessorWorkflow.java │ │ ├── NormalizationProcessorWorkflowExecuteRequest.java │ │ ├── NormalizeScoresDTO.java │ │ ├── RRFProcessor.java │ │ ├── SearchShard.java │ │ ├── SimilarityInferenceRequest.java │ │ ├── SparseEncodingProcessor.java │ │ ├── TechniqueCompatibilityCheckDTO.java │ │ ├── TextChunkingProcessor.java │ │ ├── TextEmbeddingProcessor.java │ │ ├── TextImageEmbeddingProcessor.java │ │ ├── TextInferenceRequest.java │ │ ├── chunker │ │ │ ├── Chunker.java │ │ │ ├── ChunkerFactory.java │ │ │ ├── ChunkerParameterParser.java │ │ │ ├── DelimiterChunker.java │ │ │ └── FixedTokenLengthChunker.java │ │ ├── combination │ │ │ ├── ArithmeticMeanScoreCombinationTechnique.java │ │ │ ├── CombineScoresDto.java │ │ │ ├── GeometricMeanScoreCombinationTechnique.java │ │ │ ├── HarmonicMeanScoreCombinationTechnique.java │ │ │ ├── RRFScoreCombinationTechnique.java │ │ │ ├── ScoreCombinationFactory.java │ │ │ ├── ScoreCombinationTechnique.java │ │ │ ├── ScoreCombinationUtil.java │ │ │ └── ScoreCombiner.java │ │ ├── dto │ │ │ └── SemanticFieldInfo.java │ │ ├── explain │ │ │ ├── CombinedExplanationDetails.java │ │ │ ├── DocIdAtSearchShard.java │ │ │ ├── ExplainableTechnique.java │ │ │ ├── ExplanationDetails.java │ │ │ ├── ExplanationPayload.java │ │ │ └── ExplanationUtils.java │ │ ├── factory │ │ │ ├── ExplanationResponseProcessorFactory.java │ │ │ ├── NormalizationProcessorFactory.java │ │ │ ├── RRFProcessorFactory.java │ │ │ ├── RerankProcessorFactory.java │ │ │ ├── SemanticFieldProcessorFactory.java │ │ │ ├── SparseEncodingProcessorFactory.java │ │ │ ├── TextChunkingProcessorFactory.java │ │ │ ├── TextEmbeddingProcessorFactory.java │ │ │ └── TextImageEmbeddingProcessorFactory.java │ │ ├── highlight │ │ │ └── SentenceHighlightingRequest.java │ │ ├── normalization │ │ │ ├── L2ScoreNormalizationTechnique.java │ │ │ ├── MinMaxScoreNormalizationTechnique.java │ │ │ ├── RRFNormalizationTechnique.java │ │ │ ├── ScoreNormalizationFactory.java │ │ │ ├── ScoreNormalizationTechnique.java │ │ │ ├── ScoreNormalizationUtil.java │ │ │ ├── ScoreNormalizer.java │ │ │ └── ZScoreNormalizationTechnique.java │ │ ├── optimization │ │ │ ├── InferenceFilter.java │ │ │ ├── TextEmbeddingInferenceFilter.java │ │ │ └── TextImageEmbeddingInferenceFilter.java │ │ ├── rerank │ │ │ ├── ByFieldRerankProcessor.java │ │ │ ├── MLOpenSearchRerankProcessor.java │ │ │ ├── RerankProcessor.java │ │ │ ├── RerankType.java │ │ │ ├── RescoringRerankProcessor.java │ │ │ └── context │ │ │ │ ├── ContextSourceFetcher.java │ │ │ │ ├── DocumentContextSourceFetcher.java │ │ │ │ └── QueryContextSourceFetcher.java │ │ ├── semantic │ │ │ └── SemanticFieldProcessor.java │ │ └── util │ │ │ ├── ChunkUtils.java │ │ │ └── ProcessorUtils.java │ │ ├── query │ │ ├── HybridBulkScorer.java │ │ ├── HybridQuery.java │ │ ├── HybridQueryBuilder.java │ │ ├── HybridQueryContext.java │ │ ├── HybridQueryDocIdStream.java │ │ ├── HybridQueryScorer.java │ │ ├── HybridQueryWeight.java │ │ ├── HybridScoreBlockBoundaryPropagator.java │ │ ├── HybridScorerSupplier.java │ │ ├── HybridSubQueryScorer.java │ │ ├── ModelInferenceQueryBuilder.java │ │ ├── NeuralKNNQuery.java │ │ ├── NeuralKNNQueryBuilder.java │ │ ├── NeuralQueryBuilder.java │ │ ├── NeuralSparseQueryBuilder.java │ │ ├── NeuralSparseQueryTwoPhaseInfo.java │ │ ├── dto │ │ │ ├── NeuralQueryBuildStage.java │ │ │ └── NeuralQueryTargetFieldConfig.java │ │ ├── ext │ │ │ └── RerankSearchExtBuilder.java │ │ ├── parser │ │ │ └── NeuralQueryParser.java │ │ └── visitor │ │ │ └── NeuralSearchQueryVisitor.java │ │ ├── rest │ │ └── RestNeuralStatsAction.java │ │ ├── search │ │ ├── HitsThresholdChecker.java │ │ ├── HybridDisiWrapper.java │ │ ├── collector │ │ │ ├── HybridLeafCollector.java │ │ │ ├── HybridSearchCollector.java │ │ │ ├── HybridTopFieldDocSortCollector.java │ │ │ ├── HybridTopScoreDocCollector.java │ │ │ ├── PagingFieldCollector.java │ │ │ └── SimpleFieldCollector.java │ │ ├── lucene │ │ │ └── MultiLeafFieldComparator.java │ │ ├── query │ │ │ ├── HybridAggregationProcessor.java │ │ │ ├── HybridCollectorManager.java │ │ │ ├── HybridQueryFieldDocComparator.java │ │ │ ├── HybridQueryPhaseSearcher.java │ │ │ ├── HybridQueryScoreDocsMerger.java │ │ │ ├── TopDocsMerger.java │ │ │ └── exception │ │ │ │ └── HybridSearchRescoreQueryException.java │ │ └── util │ │ │ ├── HybridSearchResultFormatUtil.java │ │ │ └── HybridSearchSortUtil.java │ │ ├── settings │ │ ├── NeuralSearchSettings.java │ │ └── NeuralSearchSettingsAccessor.java │ │ ├── stats │ │ ├── NeuralStatsInput.java │ │ ├── common │ │ │ ├── StatName.java │ │ │ ├── StatSnapshot.java │ │ │ └── StatType.java │ │ ├── events │ │ │ ├── EventStat.java │ │ │ ├── EventStatName.java │ │ │ ├── EventStatType.java │ │ │ ├── EventStatsManager.java │ │ │ ├── TimestampedEventStat.java │ │ │ └── TimestampedEventStatSnapshot.java │ │ └── info │ │ │ ├── CountableInfoStatSnapshot.java │ │ │ ├── InfoStatName.java │ │ │ ├── InfoStatType.java │ │ │ ├── InfoStatsManager.java │ │ │ └── SettableInfoStatSnapshot.java │ │ ├── transport │ │ ├── NeuralStatsAction.java │ │ ├── NeuralStatsNodeRequest.java │ │ ├── NeuralStatsNodeResponse.java │ │ ├── NeuralStatsRequest.java │ │ ├── NeuralStatsResponse.java │ │ └── NeuralStatsTransportAction.java │ │ └── util │ │ ├── HybridQueryUtil.java │ │ ├── NeuralQueryValidationUtil.java │ │ ├── NeuralSearchClusterUtil.java │ │ ├── PipelineServiceUtil.java │ │ ├── ProcessorDocumentUtils.java │ │ ├── RetryUtil.java │ │ ├── SemanticMLModelUtils.java │ │ ├── SemanticMappingUtils.java │ │ ├── TokenWeightUtil.java │ │ └── prune │ │ ├── PruneType.java │ │ └── PruneUtils.java └── plugin-metadata │ └── plugin-security.policy ├── test ├── java │ └── org │ │ └── opensearch │ │ └── neuralsearch │ │ ├── NeuralSearchIT.java │ │ ├── NeuralSearchTests.java │ │ ├── ValidateDependentPluginInstallationIT.java │ │ ├── common │ │ └── VectorUtilTests.java │ │ ├── constants │ │ └── TestCommonConstants.java │ │ ├── executors │ │ └── HybridQueryExecutorIT.java │ │ ├── highlight │ │ ├── QueryTextExtractorTests.java │ │ ├── SemanticHighlighterEngineTests.java │ │ ├── SemanticHighlighterIT.java │ │ └── SemanticHighlighterTests.java │ │ ├── mapper │ │ └── SemanticFieldMapperTests.java │ │ ├── mappingtransformer │ │ ├── SemanticInfoConfigBuilderTests.java │ │ └── SemanticMappingTransformerTests.java │ │ ├── ml │ │ └── MLCommonsClientAccessorTests.java │ │ ├── plugin │ │ └── NeuralSearchTests.java │ │ ├── processor │ │ ├── AbstractScoreHybridizationProcessorTests.java │ │ ├── CompoundTopDocsTests.java │ │ ├── ExplanationResponseProcessorTests.java │ │ ├── InferenceProcessorTestCase.java │ │ ├── InferenceProcessorTests.java │ │ ├── NeuralQueryEnricherProcessorIT.java │ │ ├── NeuralQueryEnricherProcessorTests.java │ │ ├── NeuralSparseTwoPhaseProcessorIT.java │ │ ├── NeuralSparseTwoPhaseProcessorTests.java │ │ ├── NormalizationProcessorIT.java │ │ ├── NormalizationProcessorTests.java │ │ ├── NormalizationProcessorWorkflowTests.java │ │ ├── RRFProcessorIT.java │ │ ├── RRFProcessorTests.java │ │ ├── ScoreCombinationIT.java │ │ ├── ScoreCombinationTechniqueTests.java │ │ ├── ScoreNormalizationIT.java │ │ ├── ScoreNormalizationTechniqueTests.java │ │ ├── SparseEncodingProcessIT.java │ │ ├── SparseEncodingProcessorTests.java │ │ ├── TextChunkingProcessorIT.java │ │ ├── TextChunkingProcessorTests.java │ │ ├── TextEmbeddingProcessorIT.java │ │ ├── TextEmbeddingProcessorTests.java │ │ ├── TextImageEmbeddingProcessorIT.java │ │ ├── TextImageEmbeddingProcessorTests.java │ │ ├── chunker │ │ │ ├── ChunkerFactoryTests.java │ │ │ ├── ChunkerParameterParserTests.java │ │ │ ├── DelimiterChunkerTests.java │ │ │ └── FixedTokenLengthChunkerTests.java │ │ ├── combination │ │ │ ├── ArithmeticMeanScoreCombinationTechniqueTests.java │ │ │ ├── BaseScoreCombinationTechniqueTests.java │ │ │ ├── GeometricMeanScoreCombinationTechniqueTests.java │ │ │ ├── HarmonicMeanScoreCombinationTechniqueTests.java │ │ │ ├── RRFScoreCombinationTechniqueTests.java │ │ │ ├── ScoreCombinationFactoryTests.java │ │ │ └── ScoreNormalizationUtilTests.java │ │ ├── dto │ │ │ └── SemanticFieldInfoTests.java │ │ ├── explain │ │ │ └── ExplanationUtilsTests.java │ │ ├── factory │ │ │ ├── ExplanationResponseProcessorFactoryTests.java │ │ │ ├── NormalizationProcessorFactoryTests.java │ │ │ ├── RRFProcessorFactoryTests.java │ │ │ ├── RerankProcessorFactoryTests.java │ │ │ ├── SemanticFieldProcessorFactoryTests.java │ │ │ ├── SparseEncodingEmbeddingProcessorFactoryTests.java │ │ │ ├── TextChunkingProcessorFactoryTests.java │ │ │ └── TextImageEmbeddingProcessorFactoryTests.java │ │ ├── normalization │ │ │ ├── L2ScoreNormalizationTechniqueTests.java │ │ │ ├── MinMaxScoreNormalizationTechniqueTests.java │ │ │ ├── RRFNormalizationTechniqueTests.java │ │ │ ├── ScoreNormalizationFactoryTests.java │ │ │ ├── ScoreNormalizationUtilTests.java │ │ │ └── ZScoreNormalizationTechniqueTests.java │ │ ├── optimization │ │ │ ├── TextEmbeddingInferenceFilterTests.java │ │ │ └── TextImageEmbeddingInferenceFilterTests.java │ │ ├── rerank │ │ │ ├── ByFieldRerankProcessorIT.java │ │ │ ├── ByFieldRerankProcessorTests.java │ │ │ ├── MLOpenSearchRerankProcessorIT.java │ │ │ └── MLOpenSearchRerankProcessorTests.java │ │ ├── semantic │ │ │ └── SemanticFieldProcessorTests.java │ │ └── util │ │ │ └── ChunkUtilsTests.java │ │ ├── query │ │ ├── HybridBulkScorerTests.java │ │ ├── HybridQueryAggregationsIT.java │ │ ├── HybridQueryBuilderTests.java │ │ ├── HybridQueryDocIdStreamTests.java │ │ ├── HybridQueryExplainIT.java │ │ ├── HybridQueryFilterIT.java │ │ ├── HybridQueryIT.java │ │ ├── HybridQueryInnerHitsIT.java │ │ ├── HybridQueryPostFilterIT.java │ │ ├── HybridQueryScorerTests.java │ │ ├── HybridQuerySortIT.java │ │ ├── HybridQueryTests.java │ │ ├── HybridQueryWeightTests.java │ │ ├── HybridScoreBlockBoundaryPropagatorTests.java │ │ ├── HybridScorerSupplierTests.java │ │ ├── HybridSubQueryScorerTests.java │ │ ├── NeuralKNNQueryBuilderTests.java │ │ ├── NeuralKNNQueryTests.java │ │ ├── NeuralQueryBuilderBuilderTests.java │ │ ├── NeuralQueryBuilderRewriteTests.java │ │ ├── NeuralQueryBuilderTests.java │ │ ├── NeuralQueryIT.java │ │ ├── NeuralSparseQueryBuilderTests.java │ │ ├── NeuralSparseQueryIT.java │ │ ├── NeuralSparseQueryTwoPhaseInfoTests.java │ │ ├── OpenSearchQueryTestCase.java │ │ ├── aggregation │ │ │ ├── BaseAggregationsWithHybridQueryIT.java │ │ │ ├── BucketAggregationsWithHybridQueryIT.java │ │ │ ├── MetricAggregationsWithHybridQueryIT.java │ │ │ └── PipelineAggregationsWithHybridQueryIT.java │ │ ├── ext │ │ │ └── RerankSearchExtBuilderTests.java │ │ └── visitor │ │ │ └── NeuralSearchQueryVisitorTests.java │ │ ├── rest │ │ ├── RestNeuralStatsActionIT.java │ │ └── RestNeuralStatsActionTests.java │ │ ├── search │ │ ├── HitsThresholdCheckerTests.java │ │ ├── HybridDisiWrapperTests.java │ │ ├── collector │ │ │ ├── HybridCollectorTestCase.java │ │ │ ├── HybridTopFieldDocSortCollectorTests.java │ │ │ └── HybridTopScoreDocCollectorTests.java │ │ ├── query │ │ │ ├── HybridAggregationProcessorTests.java │ │ │ ├── HybridCollectorManagerTests.java │ │ │ ├── HybridQueryPhaseSearcherTests.java │ │ │ ├── HybridQueryScoreDocsMergerTests.java │ │ │ └── TopDocsMergerTests.java │ │ └── util │ │ │ └── HybridSearchResultFormatUtilTests.java │ │ ├── stats │ │ ├── NeuralStatsInputTests.java │ │ ├── events │ │ │ ├── EventStatNameTests.java │ │ │ ├── EventStatsManagerTests.java │ │ │ ├── TimestampedEventStatSnapshotTests.java │ │ │ └── TimestampedEventStatTests.java │ │ └── info │ │ │ ├── CountableInfoStatSnapshotTests.java │ │ │ ├── InfoStatNameTests.java │ │ │ ├── InfoStatsManagerTests.java │ │ │ └── SettableInfoStatSnapshotTests.java │ │ ├── transport │ │ ├── NeuralStatsResponseTests.java │ │ └── NeuralStatsTransportActionTests.java │ │ └── util │ │ ├── HybridQueryUtilTests.java │ │ ├── NeuralSearchClusterUtilTests.java │ │ ├── PipelineServiceUtilTests.java │ │ ├── ProcessorDocumentUtilsTests.java │ │ ├── ProcessorUtilsTests.java │ │ ├── SemanticFieldMapperTestUtil.java │ │ ├── SemanticMLModelUtilsTests.java │ │ ├── SemanticMappingUtilsTests.java │ │ ├── TokenWeightUtilTests.java │ │ └── prune │ │ ├── PruneTypeTests.java │ │ └── PruneUtilsTests.java └── resources │ ├── highlight │ └── UploadSentenceHighlightingModelRequestBody.json │ ├── mapper │ └── mappingWithNestedSemanticFields.json │ ├── mappingtransformer │ └── transformedMappingMultipleSemanticFields.json │ ├── processor │ ├── CreateModelGroupRequestBody.json │ ├── IndexMappings.json │ ├── NeuralSparseTwoPhaseAndNeuralEnrichProcessorConfiguration.json │ ├── NeuralSparseTwoPhaseProcessorConfiguration.json │ ├── PipelineConfiguration.json │ ├── PipelineConfigurationWithBatchSize.json │ ├── PipelineConfigurationWithBatchSizeWithSkipExisting.json │ ├── PipelineConfigurationWithNestedFieldsMapping.json │ ├── PipelineConfigurationWithNestedFieldsMappingWithSkipExisting.json │ ├── PipelineConfigurationWithSkipExisting.json │ ├── PipelineForTextImageEmbeddingProcessorConfiguration.json │ ├── PipelineForTextImageEmbeddingWithSkipExistingProcessorConfiguration.json │ ├── ReRankByFieldPipelineConfiguration.json │ ├── RerankMLOpenSearchPipelineConfiguration.json │ ├── SearchRequestPipelineConfiguration.json │ ├── SparseEncodingIndexMappings.json │ ├── SparseEncodingPipelineConfiguration.json │ ├── SparseEncodingPipelineConfigurationWithPrune.json │ ├── SparseEncodingPipelineConfigurationWithSkipExisting.json │ ├── UploadModelRequestBody.json │ ├── UploadSparseEncodingModelRequestBody.json │ ├── UploadTextSimilarityModelRequestBody.json │ ├── bulk_item_template.json │ ├── chunker │ │ ├── PipelineForCascadedChunker.json │ │ ├── PipelineForDelimiterChunker.json │ │ ├── PipelineForFixedTokenLengthChunkerWithLetterTokenizer.json │ │ ├── PipelineForFixedTokenLengthChunkerWithLowercaseTokenizer.json │ │ ├── PipelineForFixedTokenLengthChunkerWithStandardTokenizer.json │ │ ├── TextChunkingIndexSettings.json │ │ ├── TextChunkingTestDocument.json │ │ └── TextChunkingTestLongDocument.json │ ├── ingest_bulk.json │ ├── ingest_doc1.json │ ├── ingest_doc2.json │ ├── ingest_doc3.json │ ├── ingest_doc4.json │ ├── ingest_doc5.json │ ├── semantic │ │ ├── ingest_doc1.json │ │ ├── ingest_doc2.json │ │ ├── ingest_doc3.json │ │ ├── ingested_doc1.json │ │ ├── ingested_doc2.json │ │ ├── ingested_doc3.json │ │ └── invalid_ingest_doc.json │ ├── update_doc1.json │ ├── update_doc2.json │ ├── update_doc3.json │ ├── update_doc4.json │ └── update_doc5.json │ └── util │ └── ProcessorDocumentUtils.json └── testFixtures └── java └── org └── opensearch └── neuralsearch ├── BaseNeuralSearchIT.java ├── OpenSearchSecureRestTestCase.java └── util ├── AggregationsTestUtils.java ├── BatchIngestionUtils.java ├── NeuralSearchClusterTestUtils.java └── TestUtils.java /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # This should match the owning team set up in https://github.com/orgs/opensearch-project/teams 2 | * @heemin32 @navneet1v @VijayanB @vamshin @jmazanec15 @naveentatikonda @junqiu-lei @martin-gaievski @sean-zheng-amazon @model-collapse @zane-neo @vibrantvarun @zhichao-aws @yuye-aws @minalsha 3 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | [Describe what this change achieves] 3 | 4 | ### Related Issues 5 | Resolves #[Issue number to be closed when this PR is merged] 6 | 7 | 8 | ### Check List 9 | - [ ] New functionality includes testing. 10 | - [ ] New functionality has been documented. 11 | - [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md). 12 | - [ ] Commits are signed per the DCO using `--signoff`. 13 | - [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose). 14 | 15 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. 16 | For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/neural-search/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). 17 | -------------------------------------------------------------------------------- /.github/draft-release-notes-config.yml: -------------------------------------------------------------------------------- 1 | # The overall template of the release notes 2 | template: | 3 | Compatible with OpenSearch (**set version here**). 4 | $CHANGES 5 | 6 | # Setting the formatting and sorting for the release notes body 7 | name-template: Version (set version here) 8 | change-template: '* $TITLE (#$NUMBER)' 9 | sort-by: merged_at 10 | sort-direction: ascending 11 | replacers: 12 | - search: '##' 13 | replace: '###' 14 | 15 | # Organizing the tagged PRs into categories 16 | categories: 17 | - title: 'Breaking Changes' 18 | labels: 19 | - 'Breaking Changes' 20 | - title: 'Features' 21 | labels: 22 | - 'Features' 23 | - title: 'Enhancements' 24 | labels: 25 | - 'Enhancements' 26 | - title: 'Bug Fixes' 27 | labels: 28 | - 'Bug Fixes' 29 | - title: 'Infrastructure' 30 | labels: 31 | - 'Infrastructure' 32 | - title: 'Documentation' 33 | labels: 34 | - 'Documentation' 35 | - title: 'Maintenance' 36 | labels: 37 | - 'Maintenance' 38 | - title: 'Refactoring' 39 | labels: 40 | - 'Refactoring' 41 | -------------------------------------------------------------------------------- /.github/workflows/add-untriaged.yml: -------------------------------------------------------------------------------- 1 | name: Apply 'untriaged' label during issue lifecycle 2 | 3 | on: 4 | issues: 5 | types: [opened, reopened, transferred] 6 | 7 | jobs: 8 | apply-label: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/github-script@v6 12 | with: 13 | script: | 14 | github.rest.issues.addLabels({ 15 | issue_number: context.issue.number, 16 | owner: context.repo.owner, 17 | repo: context.repo.repo, 18 | labels: ['untriaged'] 19 | }) 20 | -------------------------------------------------------------------------------- /.github/workflows/auto-release.yml: -------------------------------------------------------------------------------- 1 | name: Releases 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: write 13 | steps: 14 | - name: GitHub App token 15 | id: github_app_token 16 | uses: tibdex/github-app-token@v1.5.0 17 | with: 18 | app_id: ${{ secrets.APP_ID }} 19 | private_key: ${{ secrets.APP_PRIVATE_KEY }} 20 | installation_id: 22958780 21 | - name: Get tag 22 | id: tag 23 | uses: dawidd6/action-get-tag@v1 24 | - uses: actions/checkout@v2 25 | - uses: ncipollo/release-action@v1 26 | with: 27 | github_token: ${{ steps.github_app_token.outputs.token }} 28 | bodyFile: release-notes/opensearch-neural-search.release-notes-${{steps.tag.outputs.tag}}.md 29 | -------------------------------------------------------------------------------- /.github/workflows/backport.yml: -------------------------------------------------------------------------------- 1 | name: Backport 2 | on: 3 | pull_request_target: 4 | types: 5 | - closed 6 | - labeled 7 | 8 | jobs: 9 | backport: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: write 13 | pull-requests: write 14 | name: Backport 15 | steps: 16 | - name: GitHub App token 17 | id: github_app_token 18 | uses: tibdex/github-app-token@v1.5.0 19 | with: 20 | app_id: ${{ secrets.APP_ID }} 21 | private_key: ${{ secrets.APP_PRIVATE_KEY }} 22 | installation_id: 22958780 23 | 24 | - name: Backport 25 | uses: VachaShah/backport@v1.1.4 26 | with: 27 | github_token: ${{ steps.github_app_token.outputs.token }} 28 | branch_name: backport/backport-${{ github.event.number }} 29 | -------------------------------------------------------------------------------- /.github/workflows/backwards_compatibility_tests_workflow.yml: -------------------------------------------------------------------------------- 1 | name: Backwards Compatibility Tests NeuralSearch 2 | on: 3 | push: 4 | branches: 5 | - "*" 6 | - "feature/**" 7 | pull_request: 8 | branches: 9 | - "*" 10 | - "feature/**" 11 | 12 | jobs: 13 | Restart-Upgrade-BWCTests-NeuralSearch: 14 | strategy: 15 | matrix: 16 | java: [ 21, 23 ] 17 | os: [ubuntu-latest] 18 | bwc_version : [ "2.9.0","2.10.0","2.11.0","2.12.0","2.13.0","2.14.0","2.15.0","2.16.0","2.17.0","2.18.0","2.19.0","2.20.0-SNAPSHOT","3.0.0" ] 19 | opensearch_version : [ "3.1.0-SNAPSHOT" ] 20 | 21 | name: NeuralSearch Restart-Upgrade BWC Tests 22 | runs-on: ${{ matrix.os }} 23 | env: 24 | BWC_VERSION_RESTART_UPGRADE: ${{ matrix.bwc_version }} 25 | 26 | steps: 27 | - name: Checkout neural-search 28 | uses: actions/checkout@v1 29 | 30 | - name: Setup Java ${{ matrix.java }} 31 | uses: actions/setup-java@v1 32 | with: 33 | java-version: ${{ matrix.java }} 34 | 35 | - name: Run NeuralSearch Restart-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}} 36 | run: | 37 | echo "Running restart-upgrade backwards compatibility tests ..." 38 | ./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}' 39 | 40 | Rolling-Upgrade-BWCTests-NeuralSearch: 41 | strategy: 42 | matrix: 43 | java: [21, 23] 44 | os: [ubuntu-latest] 45 | bwc_version: [ "2.20.0-SNAPSHOT","3.0.0" ] 46 | opensearch_version: [ "3.1.0-SNAPSHOT" ] 47 | 48 | name: NeuralSearch Rolling-Upgrade BWC Tests 49 | runs-on: ${{ matrix.os }} 50 | env: 51 | BWC_VERSION_ROLLING_UPGRADE: ${{ matrix.bwc_version }} 52 | 53 | steps: 54 | - name: Checkout neural-search 55 | uses: actions/checkout@v1 56 | 57 | - name: Setup Java ${{ matrix.java }} 58 | uses: actions/setup-java@v1 59 | with: 60 | java-version: ${{ matrix.java }} 61 | 62 | - name: Run NeuralSearch Rolling-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}} 63 | run: | 64 | echo "Running rolling-upgrade backwards compatibility tests ..." 65 | ./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}' 66 | -------------------------------------------------------------------------------- /.github/workflows/changelog_verifier.yml: -------------------------------------------------------------------------------- 1 | name: "Changelog Verifier" 2 | on: 3 | pull_request: 4 | types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled] 5 | 6 | jobs: 7 | # Enforces the update of a changelog file on every pull request 8 | verify-changelog: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | with: 13 | token: ${{ secrets.GITHUB_TOKEN }} 14 | ref: ${{ github.event.pull_request.head.sha }} 15 | 16 | - uses: dangoslen/changelog-enforcer@v3 17 | with: 18 | skipLabels: "autocut, skip-changelog" 19 | -------------------------------------------------------------------------------- /.github/workflows/check-workflow-events.yml: -------------------------------------------------------------------------------- 1 | name: Check Workflow Events 2 | on: 3 | pull_request: 4 | 5 | jobs: 6 | check-workflow-events: 7 | runs-on: ubuntu-latest 8 | name: Check Workflow Events 9 | steps: 10 | - uses: actions/checkout@v4 11 | - name: Check Workflow Events 12 | working-directory: .github/workflows 13 | run: | 14 | set +e 15 | EVENT_COUNT=0 16 | for file_found in `ls | grep .ym`; do 17 | yq -r e '.on | keys | .[0]' $file_found | grep -q pull_request_target 18 | EVENT_FOUND=$? 19 | 20 | if [ "$EVENT_FOUND" = 0 ] && [ "$file_found" != "backport.yml" ] && [ "$file_found" != "copy-linked-issue-labels.yml" ]; then 21 | EVENT_COUNT=$(( EVENT_COUNT+1 )) 22 | echo "'$file_found' workflow file contains 'pull_request_target' event, please remove!" 23 | fi 24 | done 25 | 26 | if [ "$EVENT_COUNT" != 0 ]; then 27 | exit 1 28 | fi 29 | -------------------------------------------------------------------------------- /.github/workflows/copy-linked-issue-labels.yml: -------------------------------------------------------------------------------- 1 | name: Copy labels from linked issues 2 | on: 3 | pull_request_target: 4 | types: [opened, edited, review_requested, synchronize, reopened, ready_for_review] 5 | 6 | jobs: 7 | copy-issue-labels: 8 | if: github.repository == 'opensearch-project/neural-search' 9 | runs-on: ubuntu-latest 10 | permissions: 11 | issues: read 12 | contents: read 13 | pull-requests: write 14 | steps: 15 | - name: copy-issue-labels 16 | uses: michalvankodev/copy-issue-labels@v1.3.0 17 | with: 18 | repo-token: ${{ secrets.GITHUB_TOKEN }} 19 | labels-to-exclude: | 20 | untriaged 21 | triaged 22 | -------------------------------------------------------------------------------- /.github/workflows/delete_backport_branch.yml: -------------------------------------------------------------------------------- 1 | name: Delete merged branch of the backport PRs 2 | on: 3 | pull_request: 4 | types: 5 | - closed 6 | 7 | jobs: 8 | delete-branch: 9 | runs-on: ubuntu-latest 10 | permissions: 11 | contents: write 12 | if: github.repository == 'opensearch-project/neural-search' && startsWith(github.event.pull_request.head.ref,'backport/') 13 | steps: 14 | - name: Delete merged branch 15 | uses: actions/github-script@v7 16 | with: 17 | script: | 18 | github.rest.git.deleteRef({ 19 | owner: context.repo.owner, 20 | repo: context.repo.repo, 21 | ref: `heads/${context.payload.pull_request.head.ref}`, 22 | }) 23 | -------------------------------------------------------------------------------- /.github/workflows/draft-release-notes-workflow.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | update_release_draft: 10 | name: Update draft release notes 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Update draft release notes 14 | uses: release-drafter/release-drafter@v5 15 | with: 16 | config-name: draft-release-notes-config.yml 17 | name: Version (set here) 18 | tag: (None) 19 | env: 20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 21 | -------------------------------------------------------------------------------- /.github/workflows/links.yml: -------------------------------------------------------------------------------- 1 | name: Link Checker 2 | on: 3 | push: 4 | branches: [ main ] 5 | pull_request: 6 | branches: [ main ] 7 | 8 | jobs: 9 | linkchecker: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: lychee Link Checker 16 | id: lychee 17 | uses: lycheeverse/lychee-action@master 18 | with: 19 | args: --accept=200,403,429 **/*.html **/*.md **/*.txt **/*.json 20 | env: 21 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 22 | - name: Fail if there were link errors 23 | run: exit ${{ steps.lychee.outputs.exit_code }} 24 | -------------------------------------------------------------------------------- /.github/workflows/maven-publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish snapshots to maven 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - 'main' 8 | - '[0-9]+.[0-9]+' 9 | - '[0-9]+.x' 10 | 11 | jobs: 12 | build-and-publish-snapshots: 13 | runs-on: ubuntu-latest 14 | 15 | permissions: 16 | id-token: write 17 | contents: write 18 | 19 | steps: 20 | - uses: actions/setup-java@v3 21 | with: 22 | distribution: temurin # Temurin is a distribution of adoptium 23 | java-version: 21 24 | - uses: actions/checkout@v3 25 | - uses: aws-actions/configure-aws-credentials@v1 26 | with: 27 | role-to-assume: ${{ secrets.PUBLISH_SNAPSHOTS_ROLE }} 28 | aws-region: us-east-1 29 | - name: publish snapshots to maven 30 | run: | 31 | export SONATYPE_USERNAME=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-username --query SecretString --output text) 32 | export SONATYPE_PASSWORD=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-password --query SecretString --output text) 33 | echo "::add-mask::$SONATYPE_USERNAME" 34 | echo "::add-mask::$SONATYPE_PASSWORD" 35 | ./gradlew publishPluginZipPublicationToSnapshotsRepository 36 | -------------------------------------------------------------------------------- /.github/workflows/test_aggregations.yml: -------------------------------------------------------------------------------- 1 | name: Run Additional Tests for Neural Search 2 | on: 3 | schedule: 4 | - cron: '0 0 * * *' # every night 5 | push: 6 | branches: 7 | - "*" 8 | - "feature/**" 9 | pull_request: 10 | branches: 11 | - "*" 12 | - "feature/**" 13 | jobs: 14 | Get-CI-Image-Tag: 15 | uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main 16 | with: 17 | product: opensearch 18 | 19 | Check-neural-search-linux: 20 | needs: Get-CI-Image-Tag 21 | strategy: 22 | matrix: 23 | java: [21, 23] 24 | os: [ubuntu-latest] 25 | 26 | name: Integ Tests Linux 27 | runs-on: ${{ matrix.os }} 28 | container: 29 | # using the same image which is used by opensearch-build team to build the OpenSearch Distribution 30 | # this image tag is subject to change as more dependencies and updates will arrive over time 31 | image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} 32 | # need to switch to root so that github actions can install runner binary on container without permission issues. 33 | options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} 34 | 35 | 36 | steps: 37 | - name: Run start commands 38 | run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} 39 | 40 | - name: Checkout neural-search 41 | uses: actions/checkout@v4 42 | 43 | - name: Setup Java ${{ matrix.java }} 44 | uses: actions/setup-java@v4 45 | with: 46 | distribution: 'temurin' 47 | java-version: ${{ matrix.java }} 48 | 49 | - name: Run tests 50 | run: | 51 | chown -R 1000:1000 `pwd` 52 | su `id -un 1000` -c "./gradlew ':integTest' -Dtest_aggs=true --tests \"org.opensearch.neuralsearch.query.aggregation.*IT\"" 53 | 54 | Check-neural-search-windows: 55 | strategy: 56 | matrix: 57 | java: [23] 58 | os: [windows-latest] 59 | 60 | name: Integ Tests Windows 61 | runs-on: ${{ matrix.os }} 62 | 63 | steps: 64 | - name: Checkout neural-search 65 | uses: actions/checkout@v4 66 | 67 | - name: Setup Java ${{ matrix.java }} 68 | uses: actions/setup-java@v4 69 | with: 70 | distribution: 'temurin' 71 | java-version: ${{ matrix.java }} 72 | 73 | - name: Run tests 74 | run: | 75 | ./gradlew ':integTest' -Dtest_aggs=true --tests "org.opensearch.neuralsearch.query.aggregation.*IT" 76 | -------------------------------------------------------------------------------- /.github/workflows/test_security.yml: -------------------------------------------------------------------------------- 1 | name: Test neural-search on Secure Cluster 2 | on: 3 | schedule: 4 | - cron: '0 0 * * *' # every night 5 | push: 6 | branches: 7 | - "*" 8 | - "feature/**" 9 | pull_request: 10 | branches: 11 | - "*" 12 | - "feature/**" 13 | 14 | jobs: 15 | Get-CI-Image-Tag: 16 | uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main 17 | with: 18 | product: opensearch 19 | 20 | integ-test-with-security-linux: 21 | strategy: 22 | matrix: 23 | java: [21, 23] 24 | 25 | name: Run Integration Tests on Linux 26 | runs-on: ubuntu-latest 27 | needs: Get-CI-Image-Tag 28 | container: 29 | # using the same image which is used by opensearch-build team to build the OpenSearch Distribution 30 | # this image tag is subject to change as more dependencies and updates will arrive over time 31 | image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} 32 | # need to switch to root so that github actions can install runner binary on container without permission issues. 33 | options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }} 34 | 35 | steps: 36 | - name: Run start commands 37 | run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }} 38 | 39 | - name: Checkout neural-search 40 | uses: actions/checkout@v4 41 | with: 42 | submodules: true 43 | 44 | - name: Setup Java ${{ matrix.java }} 45 | uses: actions/setup-java@v4 46 | with: 47 | distribution: 'temurin' 48 | java-version: ${{ matrix.java }} 49 | 50 | - name: Run tests 51 | # switching the user, as OpenSearch cluster can only be started as root/Administrator on linux-deb/linux-rpm/windows-zip. 52 | run: | 53 | chown -R 1000:1000 `pwd` 54 | su `id -un 1000` -c "whoami && java -version && ./gradlew integTest -Dsecurity.enabled=true" 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # intellij files 2 | .idea/ 3 | *.iml 4 | *.ipr 5 | *.iws 6 | *.log 7 | build-idea/ 8 | out/ 9 | 10 | # eclipse files 11 | .classpath 12 | .project 13 | .settings 14 | 15 | # gradle stuff 16 | .gradle/ 17 | build/ 18 | bin/ 19 | 20 | # vscode stuff 21 | .vscode/ 22 | 23 | # osx stuff 24 | .DS_Store 25 | 26 | # git stuff 27 | .gitattributes 28 | -------------------------------------------------------------------------------- /.idea/copyright/SPDX_ALv2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/runConfigurations/DebugNeuralSearch.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 16 | -------------------------------------------------------------------------------- /.idea/runConfigurations/Run_Neural_Search.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 16 | 18 | true 19 | true 20 | false 21 | 22 | 23 | -------------------------------------------------------------------------------- /.idea/runConfigurations/Run_With_Debug_Port.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 17 | 19 | true 20 | true 21 | false 22 | 23 | 24 | -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "scanSettings": { 3 | "configMode": "AUTO", 4 | "configExternalURL": "", 5 | "projectToken": "", 6 | "baseBranches": [] 7 | }, 8 | "checkRunSettings": { 9 | "vulnerableCheckRunConclusionLevel": "failure", 10 | "displayMode": "diff", 11 | "useMendCheckNames": true 12 | }, 13 | "issueSettings": { 14 | "minSeverityLevel": "LOW", 15 | "issueType": "DEPENDENCY" 16 | }, 17 | "remediateSettings": { 18 | "workflowRules": { 19 | "enabled": true 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /ADMINS.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | This document explains who the admins are (see below), what they do in this repo, and how they should be doing it. If you're interested in becoming a maintainer, see [MAINTAINERS](MAINTAINERS.md). If you're interested in contributing, see [CONTRIBUTING](CONTRIBUTING.md). 4 | 5 | ## Current Admins 6 | 7 | | Admin | GitHub ID | Affiliation | 8 | | --------------- | --------------------------------------- | ----------- | 9 | | Charlotte | [CEHENKLE](https://github.com/CEHENKLE) | Amazon | 10 | 11 | ## Admin Responsibilities 12 | 13 | As an admin you own stewartship of the repository and its settings. Admins have [admin-level permissions on a repository](https://docs.github.com/en/organizations/managing-access-to-your-organizations-repositories/repository-permission-levels-for-an-organization). Use those privileges to serve the community and protect the repository as follows. 14 | 15 | ### Prioritize Security 16 | 17 | Security is your number one priority. Manage security keys and safeguard access to the repository. 18 | 19 | Note that this repository is monitored and supported 24/7 by Amazon Security, see [Reporting a Vulnerability](SECURITY.md) for details. 20 | 21 | ### Enforce Code of Conduct 22 | 23 | Act on [CODE_OF_CONDUCT](CODE_OF_CONDUCT.md) violations by revoking access, and blocking malicious actors. 24 | 25 | ### Adopt Organizational Best Practices 26 | 27 | Adopt organizational best practices, work in the open, and collaborate with other admins by opening issues before making process changes. Prefer consistency, and avoid diverging from practices in the opensearch-project organization. 28 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | [![Build and Test Neural Search](https://github.com/opensearch-project/neural-search/actions/workflows/CI.yml/badge.svg)](https://github.com/opensearch-project/neural-search/actions/workflows/CI.yml) 4 | [![codecov](https://codecov.io/gh/opensearch-project/neural-search/branch/main/graph/badge.svg?token=PYQO2GW39S)](https://codecov.io/gh/opensearch-project/neural-search) 5 | [![Documentation](https://img.shields.io/badge/doc-reference-blue)](https://opensearch.org/docs/latest/search-plugins/neural-search/) 6 | [![Chat](https://img.shields.io/badge/chat-on%20forums-blue)](https://forum.opensearch.org) 7 | ![PRs welcome!](https://img.shields.io/badge/PRs-welcome!-success) 8 | 9 | ## OpenSearch Neural Search 10 | **OpenSearch Neural Search** is an OpenSearch plugin that adds dense neural retrieval into the OpenSearch ecosystem. 11 | The plugin provides the capability for indexing documents and doing neural search on the indexed documents. 12 | 13 | ## Project Resources 14 | 15 | * [Project Website](https://opensearch.org/) 16 | * [Downloads](https://opensearch.org/downloads.html). 17 | * [Documentation](https://opensearch.org/docs/) 18 | * Need help? Try [Forums](https://discuss.opendistrocommunity.dev/) 19 | * [Project Principles](https://opensearch.org/#principles) 20 | * [Contributing to OpenSearch](CONTRIBUTING.md) 21 | * [Maintainer Responsibilities](MAINTAINERS.md) 22 | * [Release Management](RELEASING.md) 23 | * [Admin Responsibilities](ADMINS.md) 24 | * [Security](SECURITY.md) 25 | * [Code of Conduct](#code-of-conduct) 26 | * [License](#license) 27 | * [Copyright](#copyright) 28 | 29 | ## Code of Conduct 30 | 31 | This project has adopted the [Amazon Open Source Code of Conduct](CODE_OF_CONDUCT.md). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq), or contact [opensource-codeofconduct@amazon.com](mailto:opensource-codeofconduct@amazon.com) with any additional questions or comments. 32 | 33 | ## License 34 | 35 | This project is licensed under the [Apache v2.0 License](LICENSE). 36 | 37 | ## Copyright 38 | 39 | Copyright OpenSearch Contributors. See [NOTICE](NOTICE) for details. 40 | -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- 1 | - [Overview](#overview) 2 | - [Branching](#branching) 3 | - [Release Branching](#release-branching) 4 | - [Feature Branches](#feature-branches) 5 | - [Release Labels](#release-labels) 6 | - [Releasing](#releasing) 7 | 8 | ## Overview 9 | 10 | This document explains the release strategy for artifacts in this organization. 11 | 12 | ## Branching 13 | 14 | ### Release Branching 15 | 16 | Given the current major release of 1.0, projects in this organization maintain the following active branches. 17 | 18 | * **main**: The next _major_ release. This is the branch where all merges take place and code moves fast. 19 | * **1.x**: The next _minor_ release. Once a change is merged into `main`, decide whether to backport it to `1.x`. 20 | * **1.0**: The _current_ release. In between minor releases, only hotfixes (e.g. security) are backported to `1.0`. 21 | 22 | Label PRs with the next major version label (e.g. `2.0.0`) and merge changes into `main`. Label PRs that you believe need to be backported as `1.x` and `1.0`. Backport PRs by checking out the versioned branch, cherry-pick changes and open a PR against each target backport branch. 23 | 24 | ### Feature Branches 25 | 26 | Do not creating branches in the upstream repo, use your fork, for the exception of long lasting feature branches that require active collaboration from multiple developers. Name feature branches `feature/`. Once the work is merged to `main`, please make sure to delete the feature branch. 27 | 28 | ## Release Labels 29 | 30 | Repositories create consistent release labels, such as `v1.0.0`, `v1.1.0` and `v2.0.0`, as well as `patch` and `backport`. Use release labels to target an issue or a PR for a given release. See [MAINTAINERS](MAINTAINERS.md#triage-open-issues) for more information on triaging issues. 31 | 32 | ## Releasing 33 | 34 | The release process is standard across repositories in this org and is run by a release manager volunteering from amongst [MAINTAINERS](MAINTAINERS.md). 35 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Reporting a Vulnerability 2 | 3 | If you discover a potential security issue in this project we ask that you notify OpenSearch Security directly via email to security@opensearch.org. Please do **not** create a public GitHub issue. 4 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | --- 2 | coverage: 3 | precision: 2 4 | round: down 5 | range: '70...90' 6 | status: 7 | project: 8 | default: 9 | target: auto 10 | threshold: 5% 11 | -------------------------------------------------------------------------------- /formatter/license-header.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright OpenSearch Contributors 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | # The BWC version here should always be the latest opensearch version set in 7 | # https://github.com/opensearch-project/OpenSearch/blob/main/libs/core/src/main/java/org/opensearch/Version.java . 8 | # Wired compatibility of OpenSearch works like 3.x version is compatible with 2.(latest-major) version. 9 | # Therefore, to run rolling-upgrade BWC Test on local machine the BWC version here should be set 2.(latest-major). 10 | systemProp.bwc.version=3.1.0-SNAPSHOT 11 | systemProp.bwc.bundle.version=3.0.0 12 | 13 | # For fixing Spotless check with Java 17 14 | org.gradle.jvmargs=--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \ 15 | --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \ 16 | --add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \ 17 | --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ 18 | --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED 19 | -------------------------------------------------------------------------------- /gradle/formatting.gradle: -------------------------------------------------------------------------------- 1 | allprojects { 2 | project.apply plugin: "com.diffplug.spotless" 3 | spotless { 4 | java { 5 | // Normally this isn't necessary, but we have Java sources in 6 | // non-standard places 7 | target '**/*.java' 8 | 9 | removeUnusedImports() 10 | eclipse().withP2Mirrors(Map.of("https://download.eclipse.org/", "https://mirror.umd.edu/eclipse/")).configFile rootProject.file('formatter/formatterConfig.xml') 11 | trimTrailingWhitespace() 12 | endWithNewline(); 13 | 14 | custom 'Refuse wildcard imports', { 15 | // Wildcard imports can't be resolved; fail the build 16 | if (it =~ /\s+import .*\*;/) { 17 | throw new AssertionError("Do not use wildcard imports. 'spotlessApply' cannot resolve this issue.") 18 | } 19 | } 20 | } 21 | format 'misc', { 22 | target '*.md', '**/*.gradle', '**/*.json', '**/*.yaml', '**/*.yml', '**/*.svg', '**/*.properties' 23 | 24 | trimTrailingWhitespace() 25 | endWithNewline() 26 | } 27 | format("license", { 28 | licenseHeaderFile("${rootProject.file("formatter/license-header.txt")}", "package "); 29 | target("src/*/java/**/*.java","qa/*/java/**/*.java") 30 | }) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opensearch-project/neural-search/a6669e4cc5f69b56e6eb00105b49e71599692a48/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright OpenSearch Contributors 3 | # SPDX-License-Identifier: Apache-2.0 4 | # 5 | 6 | distributionBase=GRADLE_USER_HOME 7 | distributionPath=wrapper/dists 8 | distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6 9 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip 10 | networkTimeout=10000 11 | validateDistributionUrl=true 12 | zipStoreBase=GRADLE_USER_HOME 13 | zipStorePath=wrapper/dists 14 | -------------------------------------------------------------------------------- /lombok.config: -------------------------------------------------------------------------------- 1 | config.stopBubbling = true 2 | lombok.addLombokGeneratedAnnotation = true 3 | lombok.nonNull.exceptionType = JDK 4 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/ChunkingIndexSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings":{ 3 | "default_pipeline": "%s", 4 | "number_of_shards": 3, 5 | "number_of_replicas": 1 6 | }, 7 | "mappings": { 8 | "properties": { 9 | "body": { 10 | "type": "text" 11 | }, 12 | "body_chunk": { 13 | "type": "text" 14 | } 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/CreateModelGroupRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "%s", 3 | "description": "This is a public model group" 4 | } 5 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/IndexMappingMultipleShard.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "index": { 4 | "knn": true, 5 | "refresh_interval": "30s", 6 | "default_pipeline": "%s" 7 | }, 8 | "number_of_shards": 3, 9 | "number_of_replicas": 1 10 | }, 11 | "mappings": { 12 | "properties": { 13 | "passage_embedding": { 14 | "type": "knn_vector", 15 | "dimension": 768, 16 | "method": { 17 | "name": "hnsw", 18 | "space_type": "l2", 19 | "engine": "lucene", 20 | "parameters": { 21 | "ef_construction": 128, 22 | "m": 24 23 | } 24 | } 25 | }, 26 | "passage_text": { 27 | "type": "text" 28 | }, 29 | "passage_image": { 30 | "type": "text" 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/IndexMappingSingleShard.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "index": { 4 | "knn": true, 5 | "refresh_interval": "30s", 6 | "default_pipeline": "%s" 7 | }, 8 | "number_of_shards": 1, 9 | "number_of_replicas": 0 10 | }, 11 | "mappings": { 12 | "properties": { 13 | "passage_embedding": { 14 | "type": "knn_vector", 15 | "dimension": 768, 16 | "method": { 17 | "name": "hnsw", 18 | "space_type": "l2", 19 | "engine": "lucene", 20 | "parameters": { 21 | "ef_construction": 128, 22 | "m": 24 23 | } 24 | } 25 | }, 26 | "passage_text": { 27 | "type": "text" 28 | }, 29 | "passage_image": { 30 | "type": "text" 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "request_processors": [ 3 | { 4 | "neural_sparse_two_phase_processor": { 5 | "tag": "neural-sparse", 6 | "description": "This processor is making two-phase rescorer.", 7 | "enabled": true, 8 | "two_phase_parameter": { 9 | "prune_ratio": %f, 10 | "expansion_rate": %f, 11 | "max_window_size": %d 12 | } 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/PipelineConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text embedding pipeline for hybrid", 3 | "processors": [ 4 | { 5 | "text_embedding": { 6 | "model_id": "%s", 7 | "field_map": { 8 | "passage_text": "passage_embedding" 9 | } 10 | } 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An sparse encoding ingest pipeline", 3 | "processors": [ 4 | { 5 | "sparse_encoding": { 6 | "model_id": "%s", 7 | "field_map": { 8 | "passage_text": "passage_embedding" 9 | } 10 | } 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An example fixed token length chunker pipeline with standard tokenizer", 3 | "processors" : [ 4 | { 5 | "text_chunking": { 6 | "field_map": { 7 | "body": "body_chunk" 8 | }, 9 | "algorithm": { 10 | "fixed_token_length": { 11 | "token_limit": 10, 12 | "tokenizer": "standard" 13 | } 14 | } 15 | } 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/PipelineForTextImageProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text image embedding pipeline", 3 | "processors": [ 4 | { 5 | "text_image_embedding": { 6 | "model_id": "%s", 7 | "embedding": "passage_embedding", 8 | "field_map": { 9 | "text": "passage_text", 10 | "image": "passage_image" 11 | } 12 | } 13 | } 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/SearchRequestPipelineConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "request_processors": [ 3 | { 4 | "neural_query_enricher": { 5 | "tag": "tag1", 6 | "description": "This processor is going to restrict to publicly visible documents", 7 | "default_model_id": "%s" 8 | } 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/SparseIndexMappings.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "default_pipeline": "%s", 4 | "number_of_shards": 3, 5 | "number_of_replicas": 1 6 | }, 7 | "mappings": { 8 | "properties": { 9 | "passage_embedding": { 10 | "type": "rank_features" 11 | }, 12 | "passage_text": { 13 | "type": "text" 14 | } 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/UploadModelRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "traced_small_model", 3 | "version": "1.0.0", 4 | "model_format": "TORCH_SCRIPT", 5 | "function_name": "TEXT_EMBEDDING", 6 | "model_task_type": "text_embedding", 7 | "model_content_hash_value": "e13b74006290a9d0f58c1376f9629d4ebc05a0f9385f40db837452b167ae9021", 8 | "model_group_id": "%s", 9 | "model_config": { 10 | "model_type": "bert", 11 | "embedding_dimension": 768, 12 | "framework_type": "sentence_transformers", 13 | "all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}" 14 | }, 15 | "url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true" 16 | } 17 | -------------------------------------------------------------------------------- /qa/restart-upgrade/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tokenize-idf-0915", 3 | "version": "1.0.0", 4 | "function_name": "SPARSE_TOKENIZE", 5 | "description": "test model", 6 | "model_format": "TORCH_SCRIPT", 7 | "model_group_id": "%s", 8 | "model_content_hash_value": "b345e9e943b62c405a8dd227ef2c46c84c5ff0a0b71b584be9132b37bce91a9a", 9 | "url": "https://github.com/opensearch-project/ml-commons/raw/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/sparse_encoding/sparse_demo.zip" 10 | } 11 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/ChunkingIndexSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings":{ 3 | "default_pipeline": "%s", 4 | "number_of_shards": 3, 5 | "number_of_replicas": 1 6 | }, 7 | "mappings": { 8 | "properties": { 9 | "body": { 10 | "type": "text" 11 | }, 12 | "body_chunk": { 13 | "type": "text" 14 | } 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/CreateModelGroupRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "%s", 3 | "description": "This is a public model group" 4 | } 5 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/IndexMappings.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "index": { 4 | "knn": true, 5 | "refresh_interval": "30s", 6 | "default_pipeline": "%s" 7 | }, 8 | "number_of_shards": 3, 9 | "number_of_replicas": 1 10 | }, 11 | "mappings": { 12 | "properties": { 13 | "passage_embedding": { 14 | "type": "knn_vector", 15 | "dimension": 768, 16 | "method": { 17 | "name": "hnsw", 18 | "space_type": "l2", 19 | "engine": "lucene", 20 | "parameters": { 21 | "ef_construction": 128, 22 | "m": 24 23 | } 24 | } 25 | }, 26 | "passage_text": { 27 | "type": "text" 28 | }, 29 | "passage_image": { 30 | "type": "text" 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "request_processors": [ 3 | { 4 | "neural_sparse_two_phase_processor": { 5 | "tag": "neural-sparse", 6 | "description": "This processor is making two-phase rescorer.", 7 | "enabled": true, 8 | "two_phase_parameter": { 9 | "prune_ratio": %f, 10 | "expansion_rate": %f, 11 | "max_window_size": %d 12 | } 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/PipelineConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text embedding pipeline for hybrid", 3 | "processors": [ 4 | { 5 | "text_embedding": { 6 | "model_id": "%s", 7 | "field_map": { 8 | "passage_text": "passage_embedding" 9 | } 10 | } 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An sparse encoding ingest pipeline", 3 | "processors": [ 4 | { 5 | "sparse_encoding": { 6 | "model_id": "%s", 7 | "batch_size": "%d", 8 | "field_map": { 9 | "passage_text": "passage_embedding" 10 | } 11 | } 12 | } 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An example fixed token length chunker pipeline with standard tokenizer", 3 | "processors" : [ 4 | { 5 | "text_chunking": { 6 | "field_map": { 7 | "body": "body_chunk" 8 | }, 9 | "algorithm": { 10 | "fixed_token_length": { 11 | "token_limit": 10, 12 | "tokenizer": "standard" 13 | } 14 | } 15 | } 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/PipelineForTextImageProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text image embedding pipeline", 3 | "processors": [ 4 | { 5 | "text_image_embedding": { 6 | "model_id": "%s", 7 | "embedding": "passage_embedding", 8 | "field_map": { 9 | "text": "passage_text", 10 | "image": "passage_image" 11 | } 12 | } 13 | } 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/SearchRequestPipelineConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "request_processors": [ 3 | { 4 | "neural_query_enricher": { 5 | "tag": "tag1", 6 | "description": "This processor is going to restrict to publicly visible documents", 7 | "default_model_id": "%s" 8 | } 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/SparseIndexMappings.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "default_pipeline": "%s", 4 | "number_of_shards": 3, 5 | "number_of_replicas": 1 6 | }, 7 | "mappings": { 8 | "properties": { 9 | "passage_embedding": { 10 | "type": "rank_features" 11 | }, 12 | "passage_text": { 13 | "type": "text" 14 | } 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/UploadModelRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "traced_small_model", 3 | "version": "1.0.0", 4 | "model_format": "TORCH_SCRIPT", 5 | "function_name": "TEXT_EMBEDDING", 6 | "model_task_type": "text_embedding", 7 | "model_content_hash_value": "e13b74006290a9d0f58c1376f9629d4ebc05a0f9385f40db837452b167ae9021", 8 | "model_group_id": "%s", 9 | "model_config": { 10 | "model_type": "bert", 11 | "embedding_dimension": 768, 12 | "framework_type": "sentence_transformers", 13 | "all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}" 14 | }, 15 | "url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true" 16 | } 17 | -------------------------------------------------------------------------------- /qa/rolling-upgrade/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tokenize-idf-0915", 3 | "version": "1.0.0", 4 | "function_name": "SPARSE_TOKENIZE", 5 | "description": "test model", 6 | "model_format": "TORCH_SCRIPT", 7 | "model_group_id": "%s", 8 | "model_content_hash_value": "b345e9e943b62c405a8dd227ef2c46c84c5ff0a0b71b584be9132b37bce91a9a", 9 | "url": "https://github.com/opensearch-project/ml-commons/raw/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/sparse_encoding/sparse_demo.zip" 10 | } 11 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.10.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.10.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.10.0 4 | 5 | ### Features 6 | * Improved Hybrid Search relevancy by Score Normalization and Combination ([#241](https://github.com/opensearch-project/neural-search/pull/241/)) 7 | 8 | ### Enhancements 9 | * Changed format for hybrid query results to a single list of scores with delimiter ([#259](https://github.com/opensearch-project/neural-search/pull/259)) 10 | * Added validations for score combination weights in Hybrid Search ([#265](https://github.com/opensearch-project/neural-search/pull/265)) 11 | * Made hybrid search active by default ([#274](https://github.com/opensearch-project/neural-search/pull/274)) 12 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.11.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.11.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.11.0 4 | 5 | ### Features 6 | * Support sparse semantic retrieval by introducing `sparse_encoding` ingest processor and query builder ([#333](https://github.com/opensearch-project/neural-search/pull/333)) 7 | * Enabled support for applying default modelId in neural search query ([#337](https://github.com/opensearch-project/neural-search/pull/337) 8 | ### Bug Fixes 9 | * Fixed exception in Hybrid Query for one shard and multiple node ([#396](https://github.com/opensearch-project/neural-search/pull/396)) 10 | ### Maintenance 11 | * Consumed latest changes from core, use QueryPhaseSearcherWrapper as parent class for Hybrid QPS ([#356](https://github.com/opensearch-project/neural-search/pull/356)) 12 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.12.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.12.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.12.0 4 | 5 | ### Features 6 | - Add rerank processor interface and ml-commons reranker ([#494](https://github.com/opensearch-project/neural-search/pull/494)) 7 | ### Bug Fixes 8 | - Fixing multiple issues reported in #497 ([#524](https://github.com/opensearch-project/neural-search/pull/524)) 9 | - Fix Flaky test reported in #433 ([#533](https://github.com/opensearch-project/neural-search/pull/533)) 10 | - Enable support for default model id on HybridQueryBuilder ([#541](https://github.com/opensearch-project/neural-search/pull/541)) 11 | - Fix Flaky test reported in #384 ([#559](https://github.com/opensearch-project/neural-search/pull/559)) 12 | - Add validations for reranker requests per #555 ([#562](https://github.com/opensearch-project/neural-search/pull/562)) 13 | ### Infrastructure 14 | - BWC tests for Neural Search ([#515](https://github.com/opensearch-project/neural-search/pull/515)) 15 | - Github action to run integ tests in secure opensearch cluster ([#535](https://github.com/opensearch-project/neural-search/pull/535)) 16 | - BWC tests for Multimodal search, Hybrid Search and Neural Sparse Search ([#533](https://github.com/opensearch-project/neural-search/pull/533)) 17 | - Distribution bundle bwc tests ([#579])(https://github.com/opensearch-project/neural-search/pull/579) 18 | ### Maintenance 19 | - Update spotless and eclipse dependencies ([#589](https://github.com/opensearch-project/neural-search/pull/589)) 20 | ### Refactoring 21 | - Added spotless check in the build ([#515](https://github.com/opensearch-project/neural-search/pull/515)) -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.13.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.13.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.13.0 4 | 5 | ### Features 6 | - Implement document chunking processor with fixed token length and delimiter algorithm ([#607](https://github.com/opensearch-project/neural-search/pull/607/)) 7 | - Enabled support for applying default modelId in neural sparse query ([#614](https://github.com/opensearch-project/neural-search/pull/614) 8 | ### Enhancements 9 | - Adding aggregations in hybrid query ([#630](https://github.com/opensearch-project/neural-search/pull/630)) 10 | - Support for post filter in hybrid query ([#633](https://github.com/opensearch-project/neural-search/pull/633)) 11 | ### Bug Fixes 12 | - Fix runtime exceptions in hybrid query for case when sub-query scorer return TwoPhase iterator that is incompatible with DISI iterator ([#624](https://github.com/opensearch-project/neural-search/pull/624)) -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.14.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.14.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.14.0 4 | 5 | ### Features 6 | * Support k-NN radial search parameters in neural search([#697](https://github.com/opensearch-project/neural-search/pull/697)) 7 | ### Enhancements 8 | * BWC tests for text chunking processor ([#661](https://github.com/opensearch-project/neural-search/pull/661)) 9 | * Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663)) 10 | * Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670)) 11 | * Allowing query by raw tokens in neural_sparse query ([#693](https://github.com/opensearch-project/neural-search/pull/693)) 12 | * Removed stream.findFirst implementation to use more native iteration implement to improve hybrid query latencies by 35% ([#706](https://github.com/opensearch-project/neural-search/pull/706)) 13 | * Removed map of subquery to subquery index in favor of storing index as part of disi wrapper to improve hybrid query latencies by 20% ([#711](https://github.com/opensearch-project/neural-search/pull/711)) 14 | * Avoid change max_chunk_limit exceed exception in text chunking processor ([#717](https://github.com/opensearch-project/neural-search/pull/717)) 15 | ### Bug Fixes 16 | * Fix async actions are left in neural_sparse query ([#438](https://github.com/opensearch-project/neural-search/pull/438)) 17 | * Fix typo for sparse encoding processor factory([#578](https://github.com/opensearch-project/neural-search/pull/578)) 18 | * Add non-null check for queryBuilder in NeuralQueryEnricherProcessor ([#615](https://github.com/opensearch-project/neural-search/pull/615)) 19 | * Add max_token_score field placeholder in NeuralSparseQueryBuilder to fix the rolling-upgrade from 2.x nodes bwc tests. ([#696](https://github.com/opensearch-project/neural-search/pull/696)) 20 | * Fix multi node "no such index" error in text chunking processor. ([#713](https://github.com/opensearch-project/neural-search/pull/713)) 21 | ### Infrastructure 22 | * Adding integration tests for scenario of hybrid query with aggregations ([#632](https://github.com/opensearch-project/neural-search/pull/632)) 23 | ### Maintenance 24 | * Update bwc tests for neural_query_enricher neural_sparse search ([#652](https://github.com/opensearch-project/neural-search/pull/652)) 25 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.15.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.15.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.15.0 4 | 5 | ### Features 6 | * Speed up NeuralSparseQuery by two-phase using a custom search pipeline.([#646](https://github.com/opensearch-project/neural-search/issues/646)) 7 | * Support batchExecute in TextEmbeddingProcessor and SparseEncodingProcessor ([#743](https://github.com/opensearch-project/neural-search/issues/743)) 8 | ### Enhancements 9 | * Pass empty doc collector instead of top docs collector to improve hybrid query latencies by 20% ([#731](https://github.com/opensearch-project/neural-search/pull/731)) 10 | * Optimize parameter parsing in text chunking processor ([#733](https://github.com/opensearch-project/neural-search/pull/733)) 11 | * Use lazy initialization for priority queue of hits and scores to improve latencies by 20% ([#746](https://github.com/opensearch-project/neural-search/pull/746)) 12 | * Optimize max score calculation in the Query Phase of the Hybrid Search ([765](https://github.com/opensearch-project/neural-search/pull/765)) 13 | * Implement parallel execution of sub-queries for hybrid search ([#749](https://github.com/opensearch-project/neural-search/pull/749)) 14 | ### Bug Fixes 15 | * Total hit count fix in Hybrid Query ([756](https://github.com/opensearch-project/neural-search/pull/756)) 16 | * Fix map type validation issue in multiple pipeline processors ([#661](https://github.com/opensearch-project/neural-search/pull/661)) 17 | ### Infrastructure 18 | * Disable memory circuit breaker for integ tests ([#770](https://github.com/opensearch-project/neural-search/pull/770)) 19 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.16.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.16.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.16.0 4 | 5 | ### Features 6 | - Enable sorting and search_after features in Hybrid Search [#827](https://github.com/opensearch-project/neural-search/pull/827) 7 | ### Enhancements 8 | - InferenceProcessor inherits from AbstractBatchingProcessor to support sub batching in processor [#820](https://github.com/opensearch-project/neural-search/pull/820) 9 | - Adds dynamic knn query parameters efsearch and nprobes [#814](https://github.com/opensearch-project/neural-search/pull/814/) 10 | - Enable '.' for nested field in text embedding processor ([#811](https://github.com/opensearch-project/neural-search/pull/811)) 11 | - Enhance syntax for nested mapping in destination fields([#841](https://github.com/opensearch-project/neural-search/pull/841)) 12 | ### Bug Fixes 13 | - Fix function names and comments in the gradle file for BWC tests ([#795](https://github.com/opensearch-project/neural-search/pull/795/files)) 14 | - Fix for missing HybridQuery results when concurrent segment search is enabled ([#800](https://github.com/opensearch-project/neural-search/pull/800)) 15 | ### Infrastructure 16 | - Add BWC for batch ingestion ([#769](https://github.com/opensearch-project/neural-search/pull/769)) 17 | - Add backward test cases for neural sparse two phase processor ([#777](https://github.com/opensearch-project/neural-search/pull/777)) 18 | - Fix CI for JDK upgrade towards 21 ([#835](https://github.com/opensearch-project/neural-search/pull/835)) 19 | - Maven publishing workflow by upgrade jdk to 21 ([#837](https://github.com/opensearch-project/neural-search/pull/837)) -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.17.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.17.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.17.0 4 | 5 | ### Enhancements 6 | - Adds rescore parameter support ([#885](https://github.com/opensearch-project/neural-search/pull/885)) 7 | ### Bug Fixes 8 | - Removing code to cut search results of hybrid search in the priority queue ([#867](https://github.com/opensearch-project/neural-search/pull/867)) 9 | - Fixed merge logic in hybrid query for multiple shards case ([#877](https://github.com/opensearch-project/neural-search/pull/877)) 10 | ### Infrastructure 11 | - Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852)) -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.18.0.0.md: -------------------------------------------------------------------------------- 1 | 2 | ## Version 2.18.0.0 Release Notes 3 | 4 | Compatible with OpenSearch 2.18.0 5 | 6 | ### Features 7 | - Introduces ByFieldRerankProcessor for second level reranking on documents ([#932](https://github.com/opensearch-project/neural-search/pull/932)) 8 | ### Bug Fixes 9 | - Fixed incorrect document order for nested aggregations in hybrid query ([#956](https://github.com/opensearch-project/neural-search/pull/956)) 10 | ### Enhancements 11 | - Implement `ignore_missing` field in text chunking processors ([#907](https://github.com/opensearch-project/neural-search/pull/907)) 12 | - Added rescorer in hybrid query ([#917](https://github.com/opensearch-project/neural-search/pull/917)) 13 | 14 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.19.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.19.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.19.0 4 | 5 | ### Features 6 | * Pagination in Hybrid query ([#1048](https://github.com/opensearch-project/neural-search/pull/1048)) 7 | * Implement Reciprocal Rank Fusion score normalization/combination technique in hybrid query ([#874](https://github.com/opensearch-project/neural-search/pull/874)) 8 | ### Bug Fixes 9 | * Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998)) 10 | * Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040)) 11 | * Fixed document source and score field mismatch in sorted hybrid queries ([#1043](https://github.com/opensearch-project/neural-search/pull/1043)) 12 | * Update NeuralQueryBuilder doEquals() and doHashCode() to cater the missing parameters information ([#1045](https://github.com/opensearch-project/neural-search/pull/1045)). 13 | * Fix bug where embedding is missing when ingested document has "." in field name, and mismatches fieldMap config ([#1062](https://github.com/opensearch-project/neural-search/pull/1062)) 14 | ### Enhancements 15 | * Explainability in hybrid query ([#970](https://github.com/opensearch-project/neural-search/pull/970)) 16 | * Support new knn query parameter expand_nested ([#1013](https://github.com/opensearch-project/neural-search/pull/1013)) 17 | * Implement pruning for neural sparse ingestion pipeline and two phase search processor ([#988](https://github.com/opensearch-project/neural-search/pull/988)) 18 | * Support empty string for fields in text embedding processor ([#1041](https://github.com/opensearch-project/neural-search/pull/1041)) 19 | * Optimize ML inference connection retry logic ([#1054](https://github.com/opensearch-project/neural-search/pull/1054)) 20 | * Support for builder constructor in Neural Query Builder ([#1047](https://github.com/opensearch-project/neural-search/pull/1047)) 21 | * Validate Disjunction query to avoid having nested hybrid query ([#1127](https://github.com/opensearch-project/neural-search/pull/1127)) 22 | ### Maintenance 23 | * Add reindex integration tests for ingest processors ([#1075](https://github.com/opensearch-project/neural-search/pull/1075)) 24 | * Fix github CI by adding eclipse dependency in formatting.gradle ([#1079](https://github.com/opensearch-project/neural-search/pull/1079)) -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.4.1.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.4.1.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.4.1 4 | 5 | ### Bug Fixes 6 | 7 | * Change the behavior when embedding fields are not present ([#72](https://github.com/opensearch-project/neural-search/pull/72)) 8 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.5.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.5.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.5.0 4 | 5 | ### Enhancements 6 | 7 | * Add filter option for query type ([#88](https://github.com/opensearch-project/neural-search/pull/88)) 8 | * Add retry mechanism for neural search inference ([#91](https://github.com/opensearch-project/neural-search/pull/91)) 9 | * Enable core branching strategy and make Neural Plugin as extensible plugin. ([#87](https://github.com/opensearch-project/neural-search/pull/87)) 10 | 11 | ### Documentation 12 | 13 | * Update MAINTAINERS.md format ([#95](https://github.com/opensearch-project/neural-search/pull/95)) 14 | * Use short-form MAINTAINERS.md ([#84](https://github.com/opensearch-project/neural-search/pull/84)) 15 | 16 | ### Refactoring 17 | 18 | * Remove unused MLPredict Transport action from src ([#94](https://github.com/opensearch-project/neural-search/pull/94)) 19 | 20 | ### Maintenance 21 | 22 | * Increment version to 2.5.0-SNAPSHOT ([#76](https://github.com/opensearch-project/neural-search/pull/76)) 23 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.6.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.6.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.6.0 4 | 5 | ### Maintenance 6 | 7 | * Increment version to 2.6.0-SNAPSHOT ([#117](https://github.com/opensearch-project/neural-search/pull/117)) 8 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.7.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.7.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.7.0 4 | 5 | ### Infrastructure 6 | 7 | * Add GHA to publish to maven repository ([#237](https://github.com/opensearch-project/neural-search/pull/130)) 8 | * Add reflection dependency ([#136](https://github.com/opensearch-project/neural-search/pull/136)) 9 | * Add CHANGELOG ([#135](https://github.com/opensearch-project/neural-search/pull/135)) 10 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.8.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.8.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.8.0 4 | 5 | ### Infrastructure 6 | 7 | * Bump gradle version to 8.1.1 ([#169](https://github.com/opensearch-project/neural-search/pull/169)) 8 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-2.9.0.0.md: -------------------------------------------------------------------------------- 1 | ## Version 2.9.0.0 Release Notes 2 | 3 | Compatible with OpenSearch 2.9.0 4 | 5 | ### Maintenance 6 | Increment version to 2.9.0-SNAPSHOT ([#191](https://github.com/opensearch-project/neural-search/pull/191)) 7 | 8 | ### Bug Fixes 9 | Fix update document with knnn_vector size not matching issue ([#208](https://github.com/opensearch-project/neural-search/pull/208)) 10 | -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-3.0.0.0-alpha1.md: -------------------------------------------------------------------------------- 1 | ## Version 3.0.0.0-alpha1 Release Notes 2 | 3 | Compatible with OpenSearch 3.0.0-alpha1 4 | 5 | ### Enhancements 6 | - Set neural-search plugin 3.0.0 baseline JDK version to JDK-21 ([#838](https://github.com/opensearch-project/neural-search/pull/838)) 7 | - Support different embedding types in model's response ([#1007](https://github.com/opensearch-project/neural-search/pull/1007)) 8 | ### Bug Fixes 9 | - Fix a bug to unflatten the doc with list of map with multiple entries correctly ([#1204](https://github.com/opensearch-project/neural-search/pull/1204)). 10 | ### Infrastructure 11 | - [3.0] Update neural-search for OpenSearch 3.0 compatibility ([#1141](https://github.com/opensearch-project/neural-search/pull/1141)) 12 | ### Refactoring 13 | - Encapsulate KNNQueryBuilder creation within NeuralKNNQueryBuilder ([#1183](https://github.com/opensearch-project/neural-search/pull/1183)) 14 | ### Documentation 15 | - Adding code guidelines ([#502](https://github.com/opensearch-project/neural-search/pull/502)) -------------------------------------------------------------------------------- /release-notes/opensearch-neural-search.release-notes-3.0.0.0-beta1.md: -------------------------------------------------------------------------------- 1 | ## Version 3.0.0.0-beta1 Release Notes 2 | 3 | Compatible with OpenSearch 3.0.0-beta1 4 | 5 | ### Features 6 | - Lower bound for min-max normalization technique in hybrid query ([#1195](https://github.com/opensearch-project/neural-search/pull/1195)) 7 | - Support filter function for HybridQueryBuilder and NeuralQueryBuilder ([#1206](https://github.com/opensearch-project/neural-search/pull/1206)) 8 | - Add Z Score normalization technique ([#1224](https://github.com/opensearch-project/neural-search/pull/1224)) 9 | - Support semantic sentence highlighter ([#1193](https://github.com/opensearch-project/neural-search/pull/1193)) 10 | - Optimize embedding generation in Text Embedding Processor ([#1191](https://github.com/opensearch-project/neural-search/pull/1191)) 11 | - Optimize embedding generation in Sparse Encoding Processor ([#1246](https://github.com/opensearch-project/neural-search/pull/1246)) 12 | - Optimize embedding generation in Text/Image Embedding Processor ([#1249](https://github.com/opensearch-project/neural-search/pull/1249)) 13 | - Inner hits support with hybrid query ([#1253](https://github.com/opensearch-project/neural-search/pull/1253)) 14 | - Support custom tags in semantic highlighter ([#1254](https://github.com/opensearch-project/neural-search/pull/1254)) 15 | - Add stats API ([#1256](https://github.com/opensearch-project/neural-search/pull/1256)) 16 | 17 | ### Bug Fixes 18 | - Remove validations for unmapped fields (text and image) in TextImageEmbeddingProcessor ([#1230](https://github.com/opensearch-project/neural-search/pull/1230)) 19 | 20 | ### Infrastructure 21 | - [3.0] Update neural-search for OpenSearch 3.0 beta compatibility ([#1245](https://github.com/opensearch-project/neural-search/pull/1245)) 22 | -------------------------------------------------------------------------------- /repositories.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | 6 | repositories { 7 | mavenLocal() 8 | maven { url "https://aws.oss.sonatype.org/content/repositories/snapshots" } 9 | mavenCentral() 10 | maven { url "https://plugins.gradle.org/m2/" } 11 | } 12 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * The settings file is used to specify which projects to include in your build. 5 | * 6 | * Detailed information about configuring a multi-project build in Gradle can be found 7 | * in the user manual at https://docs.gradle.org/7.5.1/userguide/multi_project_builds.html 8 | * This project uses @Incubating APIs which are subject to change. 9 | */ 10 | 11 | rootProject.name = 'neural-search' 12 | 13 | include ":qa" 14 | include ":qa:rolling-upgrade" 15 | include ":qa:restart-upgrade" 16 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/common/VectorUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.common; 6 | 7 | import java.util.List; 8 | 9 | import lombok.AccessLevel; 10 | import lombok.NoArgsConstructor; 11 | 12 | /** 13 | * Utility class for working with vectors 14 | */ 15 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 16 | public class VectorUtil { 17 | 18 | /** 19 | * Converts a vector represented as a list to an array 20 | * 21 | * @param vectorAsList {@link List} of {@link Float}'s representing the vector 22 | * @return array of floats produced from input list 23 | */ 24 | public static float[] vectorAsListToArray(List vectorAsList) { 25 | float[] vector = new float[vectorAsList.size()]; 26 | for (int i = 0; i < vectorAsList.size(); i++) { 27 | vector[i] = vectorAsList.get(i).floatValue(); 28 | } 29 | return vector; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/constants/MappingConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.constants; 6 | 7 | /** 8 | * Constants related to the index mapping. 9 | */ 10 | public class MappingConstants { 11 | /** 12 | * Name for the field type. In index mapping we use this key to define the field type. 13 | */ 14 | public static final String TYPE = "type"; 15 | /** 16 | * Name for doc. Actions like create index and legacy create/update index template will have the 17 | * mapping properties under a _doc key. 18 | */ 19 | public static final String DOC = "_doc"; 20 | /** 21 | * Name for properties. An object field will define subfields as properties. 22 | */ 23 | public static final String PROPERTIES = "properties"; 24 | 25 | /** 26 | * Separator in a field path. 27 | */ 28 | public static final String PATH_SEPARATOR = "."; 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/constants/SemanticFieldConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.constants; 6 | 7 | /** 8 | * Constants for semantic field 9 | */ 10 | public class SemanticFieldConstants { 11 | /** 12 | * Name of the model id parameter. We use this key to define the id of the ML model that we will use for the 13 | * semantic field. 14 | */ 15 | public static final String MODEL_ID = "model_id"; 16 | 17 | /** 18 | * Name of the search model id parameter. We use this key to define the id of the ML model that we will use to 19 | * inference the query text during the search. If this parameter is not defined we will use the model_id instead. 20 | */ 21 | public static final String SEARCH_MODEL_ID = "search_model_id"; 22 | 23 | /** 24 | * Name of the raw field type parameter. We use this key to define the field type for the raw data. It will control 25 | * how to store and query the raw data. 26 | */ 27 | public static final String RAW_FIELD_TYPE = "raw_field_type"; 28 | 29 | /** 30 | * Name of the raw field type parameter. We use this key to define a custom field name for the semantic info. 31 | */ 32 | public static final String SEMANTIC_INFO_FIELD_NAME = "semantic_info_field_name"; 33 | 34 | /** 35 | * Default suffix for semantic info field name. It will be used to construct the field name of the semantic info. 36 | */ 37 | public static final String DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX = "_semantic_info"; 38 | 39 | /** 40 | * Name of the field to control if we should do chunking for the semantic field. By default, the chunking is 41 | * disabled to not downgrade the search performance. 42 | */ 43 | public static final String CHUNKING = "chunking"; 44 | 45 | /** 46 | * Name of the field for search analyzer parameter. With this field set up, user has not to specify 47 | * it during query time. 48 | */ 49 | public static final String SEMANTIC_FIELD_SEARCH_ANALYZER = "semantic_field_search_analyzer"; 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/executors/HybridQueryExecutorCollector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.executors; 6 | 7 | import lombok.AccessLevel; 8 | import lombok.Getter; 9 | import lombok.RequiredArgsConstructor; 10 | import lombok.Synchronized; 11 | 12 | import java.util.Optional; 13 | import java.util.function.Function; 14 | 15 | /** 16 | * {@link HybridQueryExecutorCollector} is a generic Collector used by Hybrid Search Query during 17 | * Query phase to parallelize sub query's action to improve latency 18 | */ 19 | @RequiredArgsConstructor(staticName = "newCollector", access = AccessLevel.PACKAGE) 20 | public final class HybridQueryExecutorCollector { 21 | 22 | // will be used as input for all instances of collector generated by newCollector method, 23 | // if it is required for collect operation 24 | private final I param; 25 | 26 | // getResult should only be called after collector's collect method is invoked. 27 | @Getter(onMethod_ = { @Synchronized }) 28 | private Optional result = Optional.empty(); 29 | 30 | /** 31 | * Called once for every time an action has to be performed on this Collector 32 | * @param action function that will be executed and result will be stored at result. 33 | */ 34 | @Synchronized 35 | public void collect(Function action) { 36 | result = Optional.ofNullable(action.apply(param)); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/executors/HybridQueryExecutorCollectorManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.executors; 6 | 7 | /** 8 | * {@link HybridQueryExecutorCollectorManager} is responsible for creating new {@link HybridQueryExecutorCollector} instances 9 | */ 10 | public interface HybridQueryExecutorCollectorManager { 11 | /** 12 | * Return a new Collector instance that extends {@link HybridQueryExecutor}. 13 | * This will be used during Hybrid Search when sub queries wants to execute part of 14 | * operation that is independent of each other that can be parallelized to improve 15 | * the performance. 16 | * @return HybridQueryExecutorCollector 17 | */ 18 | C newCollector(); 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/highlight/extractor/BooleanQueryTextExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.highlight.extractor; 6 | 7 | import org.apache.lucene.search.BooleanClause; 8 | import org.apache.lucene.search.BooleanQuery; 9 | import org.apache.lucene.search.Query; 10 | 11 | import lombok.extern.log4j.Log4j2; 12 | 13 | /** 14 | * Extractor for boolean queries 15 | */ 16 | @Log4j2 17 | public class BooleanQueryTextExtractor implements QueryTextExtractor { 18 | 19 | private final QueryTextExtractorRegistry registry; 20 | 21 | public BooleanQueryTextExtractor(QueryTextExtractorRegistry registry) { 22 | this.registry = registry; 23 | } 24 | 25 | @Override 26 | public String extractQueryText(Query query, String fieldName) { 27 | BooleanQuery booleanQuery = toQueryType(query, BooleanQuery.class); 28 | 29 | StringBuilder sb = new StringBuilder(); 30 | 31 | for (BooleanClause clause : booleanQuery.clauses()) { 32 | // Skip MUST_NOT clauses as they represent negative terms 33 | if (clause.isProhibited()) { 34 | continue; 35 | } 36 | 37 | try { 38 | String clauseText = registry.extractQueryText(clause.query(), fieldName); 39 | if (clauseText.isEmpty() == false) { 40 | if (sb.isEmpty() == false) { 41 | sb.append(" "); 42 | } 43 | sb.append(clauseText); 44 | } 45 | } catch (IllegalArgumentException e) { 46 | log.warn("Failed to extract text from clause {}: {}", clause, e.getMessage(), e); 47 | } 48 | } 49 | 50 | return sb.toString(); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/highlight/extractor/HybridQueryTextExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.highlight.extractor; 6 | 7 | import org.apache.lucene.search.Query; 8 | import org.opensearch.neuralsearch.query.HybridQuery; 9 | 10 | import java.util.HashSet; 11 | import java.util.Set; 12 | 13 | /** 14 | * Extractor for hybrid queries that combines text from all sub-queries 15 | */ 16 | public class HybridQueryTextExtractor implements QueryTextExtractor { 17 | 18 | private final QueryTextExtractorRegistry registry; 19 | 20 | public HybridQueryTextExtractor(QueryTextExtractorRegistry registry) { 21 | this.registry = registry; 22 | } 23 | 24 | @Override 25 | public String extractQueryText(Query query, String fieldName) { 26 | HybridQuery hybridQuery = toQueryType(query, HybridQuery.class); 27 | 28 | // Create a set to avoid duplicates 29 | Set queryTexts = new HashSet<>(); 30 | 31 | // Extract text from each sub-query 32 | for (Query subQuery : hybridQuery.getSubQueries()) { 33 | String extractedText = registry.extractQueryText(subQuery, fieldName); 34 | if (extractedText != null && extractedText.isEmpty() == false) { 35 | queryTexts.add(extractedText); 36 | } 37 | } 38 | 39 | // Join with spaces 40 | return String.join(" ", queryTexts).trim(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/highlight/extractor/NestedQueryTextExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.highlight.extractor; 6 | 7 | import org.apache.lucene.search.Query; 8 | import org.opensearch.index.search.OpenSearchToParentBlockJoinQuery; 9 | 10 | public class NestedQueryTextExtractor implements QueryTextExtractor { 11 | private final QueryTextExtractorRegistry registry; 12 | 13 | public NestedQueryTextExtractor(QueryTextExtractorRegistry registry) { 14 | this.registry = registry; 15 | } 16 | 17 | @Override 18 | public String extractQueryText(Query query, String fieldName) { 19 | OpenSearchToParentBlockJoinQuery neuralQuery = toQueryType(query, OpenSearchToParentBlockJoinQuery.class); 20 | return registry.extractQueryText(neuralQuery.getChildQuery(), fieldName); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/highlight/extractor/NeuralQueryTextExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.highlight.extractor; 6 | 7 | import org.apache.lucene.search.Query; 8 | import org.opensearch.neuralsearch.query.NeuralKNNQuery; 9 | 10 | /** 11 | * Extractor for neural queries 12 | */ 13 | public class NeuralQueryTextExtractor implements QueryTextExtractor { 14 | 15 | @Override 16 | public String extractQueryText(Query query, String fieldName) { 17 | NeuralKNNQuery neuralQuery = toQueryType(query, NeuralKNNQuery.class); 18 | return neuralQuery.getOriginalQueryText(); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/highlight/extractor/QueryTextExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.highlight.extractor; 6 | 7 | import org.apache.lucene.search.Query; 8 | 9 | import java.util.Locale; 10 | 11 | /** 12 | * Interface for extracting query text from different query types 13 | */ 14 | public interface QueryTextExtractor { 15 | /** 16 | * Converts a query to the expected type, throwing an exception if the type doesn't match 17 | * 18 | * @param query The query to convert 19 | * @param expectedType The expected query type 20 | * @return The query cast to the expected type 21 | * @throws IllegalArgumentException if the query is not of the expected type 22 | */ 23 | default T toQueryType(Query query, Class expectedType) { 24 | if (!expectedType.isInstance(query)) { 25 | throw new IllegalArgumentException( 26 | String.format(Locale.ROOT, "Expected %s but got %s", expectedType.getSimpleName(), query.getClass().getSimpleName()) 27 | ); 28 | } 29 | return expectedType.cast(query); 30 | } 31 | 32 | /** 33 | * Extracts text from a query for highlighting 34 | * 35 | * @param query The query to extract text from 36 | * @param fieldName The name of the field being highlighted 37 | * @return The extracted query text 38 | */ 39 | String extractQueryText(Query query, String fieldName); 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/highlight/extractor/TermQueryTextExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.highlight.extractor; 6 | 7 | import org.apache.lucene.index.Term; 8 | import org.apache.lucene.search.Query; 9 | import org.apache.lucene.search.TermQuery; 10 | 11 | /** 12 | * Extractor for term queries 13 | */ 14 | public class TermQueryTextExtractor implements QueryTextExtractor { 15 | 16 | @Override 17 | public String extractQueryText(Query query, String fieldName) { 18 | TermQuery termQuery = toQueryType(query, TermQuery.class); 19 | 20 | Term term = termQuery.getTerm(); 21 | // Only include terms from the field we're highlighting 22 | if (fieldName.equals(term.field())) { 23 | return term.text(); 24 | } 25 | return ""; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/mapper/dto/SemanticParameters.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.mapper.dto; 6 | 7 | import lombok.Builder; 8 | import lombok.Getter; 9 | 10 | /** 11 | * A DTO to hold all the semantic parameters. 12 | */ 13 | @Getter 14 | @Builder 15 | public class SemanticParameters { 16 | private final String modelId; 17 | private final String searchModelId; 18 | private final String rawFieldType; 19 | private final String semanticInfoFieldName; 20 | private final Boolean chunkingEnabled; 21 | private final String semanticFieldSearchAnalyzer; 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/InferenceRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor; 6 | 7 | import java.util.List; 8 | 9 | import lombok.Builder; 10 | import lombok.Getter; 11 | import lombok.NoArgsConstructor; 12 | import lombok.NonNull; 13 | import lombok.Setter; 14 | import lombok.experimental.SuperBuilder; 15 | 16 | @SuperBuilder 17 | @NoArgsConstructor 18 | @Getter 19 | @Setter 20 | /** 21 | * Base abstract class for inference requests. 22 | * This class contains common fields and behaviors shared across different types of inference requests. 23 | */ 24 | public abstract class InferenceRequest { 25 | /** 26 | * Unique identifier for the model to be used for inference. 27 | * This field is required and cannot be null. 28 | */ 29 | @NonNull 30 | private String modelId; 31 | /** 32 | * List of targetResponseFilters to be applied. 33 | * Defaults value if not specified. 34 | */ 35 | @Builder.Default 36 | private List targetResponseFilters = List.of("sentence_embedding"); 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/MapInferenceRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor; 6 | 7 | import java.util.Map; 8 | import lombok.Getter; 9 | import lombok.NoArgsConstructor; 10 | import lombok.Setter; 11 | import lombok.experimental.SuperBuilder; 12 | 13 | /** 14 | * Implementation of InferenceRequest for inputObjects based inference requests. 15 | * Use this class when the input data consists of key-value pairs. 16 | * 17 | * @see InferenceRequest 18 | */ 19 | @SuperBuilder 20 | @NoArgsConstructor 21 | @Getter 22 | @Setter 23 | public class MapInferenceRequest extends InferenceRequest { 24 | private Map inputObjects; 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/NormalizationExecuteDTO.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor; 6 | 7 | import lombok.AllArgsConstructor; 8 | import lombok.Builder; 9 | import lombok.Getter; 10 | import lombok.NonNull; 11 | import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique; 12 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique; 13 | import org.opensearch.search.fetch.FetchSearchResult; 14 | import org.opensearch.search.query.QuerySearchResult; 15 | 16 | import java.util.List; 17 | import java.util.Optional; 18 | 19 | /** 20 | * DTO object to hold data in NormalizationProcessorWorkflow class 21 | * in NormalizationProcessorWorkflow. 22 | */ 23 | @AllArgsConstructor 24 | @Builder 25 | @Getter 26 | public class NormalizationExecuteDTO { 27 | @NonNull 28 | private List querySearchResults; 29 | @NonNull 30 | private Optional fetchSearchResultOptional; 31 | @NonNull 32 | private ScoreNormalizationTechnique normalizationTechnique; 33 | @NonNull 34 | private ScoreCombinationTechnique combinationTechnique; 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflowExecuteRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor; 6 | 7 | import lombok.AllArgsConstructor; 8 | import lombok.Builder; 9 | import lombok.Getter; 10 | import org.opensearch.action.search.SearchPhaseContext; 11 | import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique; 12 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique; 13 | import org.opensearch.search.fetch.FetchSearchResult; 14 | import org.opensearch.search.pipeline.PipelineProcessingContext; 15 | import org.opensearch.search.query.QuerySearchResult; 16 | 17 | import java.util.List; 18 | import java.util.Optional; 19 | 20 | @Builder 21 | @AllArgsConstructor 22 | @Getter 23 | /** 24 | * DTO class to hold request parameters for normalization and combination 25 | */ 26 | public class NormalizationProcessorWorkflowExecuteRequest { 27 | final List querySearchResults; 28 | final Optional fetchSearchResultOptional; 29 | final ScoreNormalizationTechnique normalizationTechnique; 30 | final ScoreCombinationTechnique combinationTechnique; 31 | boolean explain; 32 | final PipelineProcessingContext pipelineProcessingContext; 33 | final SearchPhaseContext searchPhaseContext; 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/NormalizeScoresDTO.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor; 6 | 7 | import lombok.AllArgsConstructor; 8 | import lombok.Builder; 9 | import lombok.Getter; 10 | import lombok.NonNull; 11 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique; 12 | 13 | import java.util.List; 14 | 15 | /** 16 | * DTO object to hold data required for score normalization. 17 | */ 18 | @AllArgsConstructor 19 | @Builder 20 | @Getter 21 | public class NormalizeScoresDTO { 22 | @NonNull 23 | private List queryTopDocs; 24 | @NonNull 25 | private ScoreNormalizationTechnique normalizationTechnique; 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/SearchShard.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor; 6 | 7 | import lombok.AllArgsConstructor; 8 | import lombok.Value; 9 | import org.opensearch.search.SearchShardTarget; 10 | 11 | /** 12 | * DTO class to store index, shardId and nodeId for a search shard. 13 | */ 14 | @Value 15 | @AllArgsConstructor 16 | public class SearchShard { 17 | String index; 18 | int shardId; 19 | String nodeId; 20 | 21 | /** 22 | * Create SearchShard from SearchShardTarget 23 | * @param searchShardTarget 24 | * @return SearchShard 25 | */ 26 | public static SearchShard createSearchShard(final SearchShardTarget searchShardTarget) { 27 | return new SearchShard(searchShardTarget.getIndex(), searchShardTarget.getShardId().id(), searchShardTarget.getNodeId()); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/SimilarityInferenceRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor; 6 | 7 | import lombok.NoArgsConstructor; 8 | import lombok.Getter; 9 | import lombok.Setter; 10 | import lombok.experimental.SuperBuilder; 11 | 12 | /** 13 | * Implementation of InferenceRequest for similarity based text inference requests. 14 | * 15 | * @see TextInferenceRequest 16 | */ 17 | @SuperBuilder 18 | @NoArgsConstructor 19 | @Getter 20 | @Setter 21 | public class SimilarityInferenceRequest extends TextInferenceRequest { 22 | private String queryText; 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/TechniqueCompatibilityCheckDTO.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor; 6 | 7 | import lombok.AllArgsConstructor; 8 | import lombok.Builder; 9 | import lombok.Getter; 10 | import lombok.NonNull; 11 | import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique; 12 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique; 13 | 14 | /** 15 | * DTO object to hold data required for validation. 16 | */ 17 | @AllArgsConstructor 18 | @Builder 19 | @Getter 20 | public class TechniqueCompatibilityCheckDTO { 21 | @NonNull 22 | private ScoreCombinationTechnique scoreCombinationTechnique; 23 | @NonNull 24 | private ScoreNormalizationTechnique scoreNormalizationTechnique; 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/TextInferenceRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor; 6 | 7 | import java.util.List; 8 | import lombok.Getter; 9 | import lombok.NoArgsConstructor; 10 | import lombok.Setter; 11 | import lombok.experimental.SuperBuilder; 12 | 13 | /** 14 | * Implementation of InferenceRequest for inputTexts based inference requests. 15 | * Use this class when the input data consists of list of strings. 16 | * 17 | * @see InferenceRequest 18 | */ 19 | @SuperBuilder 20 | @NoArgsConstructor 21 | @Getter 22 | @Setter 23 | public class TextInferenceRequest extends InferenceRequest { 24 | private List inputTexts; // on which inference needs to happen 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.chunker; 6 | 7 | import com.google.common.collect.ImmutableMap; 8 | 9 | import java.util.Map; 10 | import java.util.Objects; 11 | import java.util.Set; 12 | import java.util.function.Function; 13 | 14 | /** 15 | * A factory to create different chunking algorithm objects. 16 | */ 17 | public final class ChunkerFactory { 18 | 19 | private ChunkerFactory() {} // no instance of this factory class 20 | 21 | private static final Map, Chunker>> CHUNKERS_CONSTRUCTORS = ImmutableMap.of( 22 | FixedTokenLengthChunker.ALGORITHM_NAME, 23 | FixedTokenLengthChunker::new, 24 | DelimiterChunker.ALGORITHM_NAME, 25 | DelimiterChunker::new 26 | ); 27 | 28 | /** Set of supported chunker algorithm types */ 29 | public static Set CHUNKER_ALGORITHMS = CHUNKERS_CONSTRUCTORS.keySet(); 30 | 31 | /** 32 | * Creates a new Chunker instance based on the specified type and parameters. 33 | * 34 | * @param type the type of chunker to create 35 | * @param parameters configuration parameters for the chunker 36 | * @return a new Chunker instance configured with the given parameters 37 | */ 38 | public static Chunker create(final String type, final Map parameters) { 39 | Function, Chunker> chunkerConstructionFunction = CHUNKERS_CONSTRUCTORS.get(type); 40 | // chunkerConstructionFunction is not null because we have validated the type in text chunking processor 41 | Objects.requireNonNull(chunkerConstructionFunction); 42 | return chunkerConstructionFunction.apply(parameters); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/combination/CombineScoresDto.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.combination; 6 | 7 | import java.util.List; 8 | import lombok.AllArgsConstructor; 9 | import lombok.Builder; 10 | import lombok.Getter; 11 | import lombok.NonNull; 12 | import org.apache.lucene.search.Sort; 13 | import org.opensearch.common.Nullable; 14 | import org.opensearch.neuralsearch.processor.CompoundTopDocs; 15 | import org.opensearch.search.query.QuerySearchResult; 16 | 17 | /** 18 | * DTO object to hold data required for Score Combination. 19 | */ 20 | @AllArgsConstructor 21 | @Builder 22 | @Getter 23 | public class CombineScoresDto { 24 | @NonNull 25 | private List queryTopDocs; 26 | @NonNull 27 | private ScoreCombinationTechnique scoreCombinationTechnique; 28 | @NonNull 29 | private List querySearchResults; 30 | @Nullable 31 | private Sort sort; 32 | private int fromValueForSingleShard; 33 | private boolean isSingleShard; 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/combination/ScoreCombinationTechnique.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.combination; 6 | 7 | public interface ScoreCombinationTechnique { 8 | 9 | /** 10 | * Defines combination function specific to this technique 11 | * @param scores array of collected original scores 12 | * @return combined score 13 | */ 14 | float combine(final float[] scores); 15 | 16 | /** 17 | * Returns the name of the combination technique. 18 | */ 19 | String techniqueName(); 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/explain/CombinedExplanationDetails.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.explain; 6 | 7 | import lombok.AllArgsConstructor; 8 | import lombok.Builder; 9 | import lombok.Getter; 10 | 11 | /** 12 | * DTO class to hold explain details for normalization and combination 13 | */ 14 | @AllArgsConstructor 15 | @Builder 16 | @Getter 17 | public class CombinedExplanationDetails { 18 | private ExplanationDetails normalizationExplanations; 19 | private ExplanationDetails combinationExplanations; 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/explain/DocIdAtSearchShard.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.explain; 6 | 7 | import lombok.Value; 8 | import org.opensearch.neuralsearch.processor.SearchShard; 9 | 10 | /** 11 | * DTO class to store docId and search shard for a query. 12 | * Used in {@link org.opensearch.neuralsearch.processor.NormalizationProcessorWorkflow} to normalize scores across shards. 13 | */ 14 | @Value 15 | public class DocIdAtSearchShard { 16 | int docId; 17 | SearchShard searchShard; 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/explain/ExplainableTechnique.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.explain; 6 | 7 | import org.opensearch.neuralsearch.processor.CompoundTopDocs; 8 | 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | /** 13 | * Abstracts explanation of score combination or normalization technique. 14 | */ 15 | public interface ExplainableTechnique { 16 | 17 | String GENERIC_DESCRIPTION_OF_TECHNIQUE = "generic score processing technique"; 18 | 19 | /** 20 | * Returns a string with general description of the technique 21 | */ 22 | default String describe() { 23 | return GENERIC_DESCRIPTION_OF_TECHNIQUE; 24 | } 25 | 26 | /** 27 | * Returns a map with explanation for each document id 28 | * @param queryTopDocs collection of CompoundTopDocs for each shard result 29 | * @return map of document per shard and corresponding explanation object 30 | */ 31 | default Map explain(final List queryTopDocs) { 32 | return Map.of(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/explain/ExplanationDetails.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.explain; 6 | 7 | import lombok.AllArgsConstructor; 8 | import lombok.Value; 9 | import org.apache.commons.lang3.tuple.Pair; 10 | 11 | import java.util.List; 12 | 13 | /** 14 | * DTO class to store value and description for explain details. 15 | * Used in {@link org.opensearch.neuralsearch.processor.NormalizationProcessorWorkflow} to normalize scores across shards. 16 | */ 17 | @Value 18 | @AllArgsConstructor 19 | public class ExplanationDetails { 20 | int docId; 21 | List> scoreDetails; 22 | 23 | public ExplanationDetails(List> scoreDetails) { 24 | // pass docId as -1 to match docId in SearchHit 25 | // https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/search/SearchHit.java#L170 26 | this(-1, scoreDetails); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/explain/ExplanationPayload.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.explain; 6 | 7 | import lombok.AllArgsConstructor; 8 | import lombok.Builder; 9 | import lombok.Getter; 10 | 11 | import java.util.Map; 12 | 13 | /** 14 | * DTO class to hold explain details for normalization and combination 15 | */ 16 | @AllArgsConstructor 17 | @Builder 18 | @Getter 19 | public class ExplanationPayload { 20 | private final Map explainPayload; 21 | 22 | public enum PayloadType { 23 | NORMALIZATION_PROCESSOR 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/factory/ExplanationResponseProcessorFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.factory; 6 | 7 | import org.opensearch.neuralsearch.processor.ExplanationResponseProcessor; 8 | import org.opensearch.search.pipeline.Processor; 9 | import org.opensearch.search.pipeline.SearchResponseProcessor; 10 | 11 | import java.util.Map; 12 | 13 | /** 14 | * Factory class for creating ExplanationResponseProcessor 15 | */ 16 | public class ExplanationResponseProcessorFactory implements Processor.Factory { 17 | 18 | @Override 19 | public SearchResponseProcessor create( 20 | Map> processorFactories, 21 | String tag, 22 | String description, 23 | boolean ignoreFailure, 24 | Map config, 25 | Processor.PipelineContext pipelineContext 26 | ) throws Exception { 27 | return new ExplanationResponseProcessor(description, tag, ignoreFailure); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/highlight/SentenceHighlightingRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.highlight; 6 | 7 | import lombok.Getter; 8 | import lombok.NoArgsConstructor; 9 | import lombok.Setter; 10 | import lombok.experimental.SuperBuilder; 11 | import org.opensearch.neuralsearch.processor.InferenceRequest; 12 | 13 | /** 14 | * Implementation of InferenceRequest for sentence highlighting inference requests. 15 | * This class handles the question and context parameters needed for highlighting. 16 | * 17 | * @see InferenceRequest 18 | */ 19 | @SuperBuilder 20 | @NoArgsConstructor 21 | @Getter 22 | @Setter 23 | public class SentenceHighlightingRequest extends InferenceRequest { 24 | /** 25 | * The question to be answered from the context 26 | */ 27 | private String question; 28 | 29 | /** 30 | * The context text in which to find the answer 31 | */ 32 | private String context; 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/normalization/ScoreNormalizationTechnique.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.normalization; 6 | 7 | import org.opensearch.neuralsearch.processor.NormalizeScoresDTO; 8 | 9 | /** 10 | * Abstracts normalization of scores in query search results. 11 | */ 12 | public interface ScoreNormalizationTechnique { 13 | 14 | /** 15 | * Performs score normalization based on input normalization technique. 16 | * Mutates input object by updating normalized scores. 17 | * @param normalizeScoresDTO is a data transfer object that contains queryTopDocs 18 | * original query results from multiple shards and multiple sub-queries, ScoreNormalizationTechnique, 19 | * and nullable rankConstant that is only used in RRF technique 20 | */ 21 | void normalize(final NormalizeScoresDTO normalizeScoresDTO); 22 | 23 | /** 24 | * Returns the name of the normalization technique. 25 | */ 26 | String techniqueName(); 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/rerank/RerankType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.rerank; 6 | 7 | import java.util.Collections; 8 | import java.util.HashMap; 9 | import java.util.Locale; 10 | import java.util.Map; 11 | 12 | import lombok.Getter; 13 | 14 | /** 15 | * enum for distinguishing various reranking methods 16 | */ 17 | public enum RerankType { 18 | 19 | ML_OPENSEARCH("ml_opensearch"), 20 | BY_FIELD("by_field"); 21 | 22 | @Getter 23 | private final String label; 24 | 25 | private RerankType(String label) { 26 | this.label = label; 27 | } 28 | 29 | private static final Map LABEL_MAP; 30 | static { 31 | Map labelMap = new HashMap<>(); 32 | for (RerankType type : RerankType.values()) { 33 | labelMap.put(type.getLabel(), type); 34 | } 35 | LABEL_MAP = Collections.unmodifiableMap(labelMap); 36 | } 37 | 38 | /** 39 | * Construct a RerankType from the label 40 | * @param label label of a RerankType 41 | * @return RerankType represented by the label 42 | */ 43 | public static RerankType from(final String label) { 44 | RerankType ans = LABEL_MAP.get(label); 45 | if (ans == null) { 46 | throw new IllegalArgumentException(String.format(Locale.ROOT, "Wrong rerank type name: %s", label)); 47 | } 48 | return ans; 49 | } 50 | 51 | public static Map labelMap() { 52 | return LABEL_MAP; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/processor/rerank/context/ContextSourceFetcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.rerank.context; 6 | 7 | import java.util.Map; 8 | 9 | import org.opensearch.action.search.SearchRequest; 10 | import org.opensearch.action.search.SearchResponse; 11 | import org.opensearch.core.action.ActionListener; 12 | 13 | /** 14 | * Interface that gets context from some source and puts it in a map 15 | * for a reranking processor to use 16 | */ 17 | public interface ContextSourceFetcher { 18 | 19 | /** 20 | * Fetch the information needed in order to rerank. 21 | * That could be as simple as grabbing a field from the search request or 22 | * as complicated as a lookup to some external service 23 | * @param searchRequest the search query 24 | * @param searchResponse the search results, in case they're relevant 25 | * @param listener be async 26 | */ 27 | void fetchContext( 28 | final SearchRequest searchRequest, 29 | final SearchResponse searchResponse, 30 | final ActionListener> listener 31 | ); 32 | 33 | /** 34 | * Get the name of the contextSourceFetcher. This will be used as the field 35 | * name in the context config for the pipeline 36 | * @return Name of the fetcher 37 | */ 38 | String getName(); 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/query/HybridQueryContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.query; 6 | 7 | import lombok.Builder; 8 | import lombok.Getter; 9 | 10 | /** 11 | * Class that holds the low level information of hybrid query in the form of context 12 | */ 13 | @Builder 14 | @Getter 15 | public class HybridQueryContext { 16 | private Integer paginationDepth; 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/query/HybridSubQueryScorer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.query; 6 | 7 | import lombok.Data; 8 | import org.apache.lucene.search.Scorable; 9 | 10 | import java.io.IOException; 11 | import java.util.Arrays; 12 | 13 | /** 14 | * Scorer implementation for Hybrid Query. This object is light and expected to be re-used between different doc ids 15 | */ 16 | @Data 17 | public class HybridSubQueryScorer extends Scorable { 18 | // array of scores from all sub-queries for a single doc id 19 | private final float[] subQueryScores; 20 | // array of min competitive scores, score is shard level 21 | private final float[] minScores; 22 | 23 | public HybridSubQueryScorer(int numOfSubQueries) { 24 | this.minScores = new float[numOfSubQueries]; 25 | this.subQueryScores = new float[numOfSubQueries]; 26 | } 27 | 28 | @Override 29 | public float score() throws IOException { 30 | // for scenarios when scorer is needed (like in aggregations) for one doc id return sum of sub-query scores 31 | float totalScore = 0.0f; 32 | for (float score : subQueryScores) { 33 | totalScore += score; 34 | } 35 | return totalScore; 36 | } 37 | 38 | /** 39 | * Reset sub-query scores to 0.0f so this scorer can be reused for next doc id 40 | */ 41 | public void resetScores() { 42 | Arrays.fill(subQueryScores, 0.0f); 43 | } 44 | 45 | public int getNumOfSubQueries() { 46 | return subQueryScores.length; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/query/ModelInferenceQueryBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.query; 6 | 7 | /** 8 | * Query builders which calls ml-commons API to do model inference. 9 | * The model inference result is used for search on target field. 10 | */ 11 | public interface ModelInferenceQueryBuilder { 12 | /** 13 | * Get the model id used by ml-commons model inference. Return null if the model id is absent. 14 | */ 15 | public String modelId(); 16 | 17 | /** 18 | * Set a new model id for the query builder. 19 | */ 20 | public ModelInferenceQueryBuilder modelId(String modelId); 21 | 22 | /** 23 | * Get the field name for search. 24 | */ 25 | public String fieldName(); 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/query/NeuralKNNQuery.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.query; 6 | 7 | import lombok.Getter; 8 | import lombok.RequiredArgsConstructor; 9 | import org.apache.lucene.search.IndexSearcher; 10 | import org.apache.lucene.search.Query; 11 | import org.apache.lucene.search.QueryVisitor; 12 | import org.apache.lucene.search.ScoreMode; 13 | import org.apache.lucene.search.Weight; 14 | 15 | import java.io.IOException; 16 | import java.util.Objects; 17 | 18 | /** 19 | * Wraps KNN Lucene query to support neural search extensions. 20 | * Delegates core operations to the underlying KNN query. 21 | */ 22 | @Getter 23 | @RequiredArgsConstructor 24 | public class NeuralKNNQuery extends Query { 25 | private final Query knnQuery; 26 | private final String originalQueryText; 27 | 28 | @Override 29 | public String toString(String field) { 30 | return knnQuery.toString(field); 31 | } 32 | 33 | @Override 34 | public void visit(QueryVisitor visitor) { 35 | // Delegate the visitor to the underlying KNN query 36 | knnQuery.visit(visitor); 37 | } 38 | 39 | @Override 40 | public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { 41 | // Delegate weight creation to the underlying KNN query 42 | return knnQuery.createWeight(searcher, scoreMode, boost); 43 | } 44 | 45 | @Override 46 | public Query rewrite(IndexSearcher indexSearcher) throws IOException { 47 | Query rewritten = knnQuery.rewrite(indexSearcher); 48 | if (rewritten == knnQuery) { 49 | return this; 50 | } 51 | return new NeuralKNNQuery(rewritten, originalQueryText); 52 | } 53 | 54 | @Override 55 | public boolean equals(Object other) { 56 | if (this == other) return true; 57 | if (other == null || getClass() != other.getClass()) return false; 58 | NeuralKNNQuery that = (NeuralKNNQuery) other; 59 | return Objects.equals(knnQuery, that.knnQuery) && Objects.equals(originalQueryText, that.originalQueryText); 60 | } 61 | 62 | @Override 63 | public int hashCode() { 64 | return Objects.hash(knnQuery, originalQueryText); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/query/dto/NeuralQueryBuildStage.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.query.dto; 6 | 7 | public enum NeuralQueryBuildStage { 8 | FROM_X_CONTENT, 9 | REWRITE 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/query/dto/NeuralQueryTargetFieldConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.query.dto; 6 | 7 | import lombok.Builder; 8 | import lombok.Data; 9 | 10 | @Data 11 | @Builder 12 | public class NeuralQueryTargetFieldConfig { 13 | private final Boolean isSemanticField; 14 | private final Boolean isUnmappedField; 15 | private final String searchModelId; 16 | private final String embeddingFieldType; 17 | private final String embeddingFieldPath; 18 | private final String chunksPath; 19 | private final Boolean chunkingEnabled; 20 | private final String semanticFieldSearchAnalyzer; 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/search/HitsThresholdChecker.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.search; 6 | 7 | import java.util.Locale; 8 | 9 | import org.apache.lucene.search.ScoreMode; 10 | 11 | import lombok.Getter; 12 | 13 | /** 14 | * Abstracts algorithm that allows early termination for the search flow if number of hits reached 15 | * certain treshold 16 | */ 17 | public class HitsThresholdChecker { 18 | private int hitCount; 19 | @Getter 20 | private final int totalHitsThreshold; 21 | 22 | public HitsThresholdChecker(int totalHitsThreshold) { 23 | if (totalHitsThreshold < 0) { 24 | throw new IllegalArgumentException(String.format(Locale.ROOT, "totalHitsThreshold must be >= 0, got %d", totalHitsThreshold)); 25 | } 26 | this.totalHitsThreshold = totalHitsThreshold; 27 | } 28 | 29 | public void incrementHitCount() { 30 | ++hitCount; 31 | } 32 | 33 | public boolean isThresholdReached() { 34 | return hitCount >= getTotalHitsThreshold(); 35 | } 36 | 37 | public ScoreMode scoreMode() { 38 | return ScoreMode.TOP_SCORES; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/search/HybridDisiWrapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.search; 6 | 7 | import lombok.Getter; 8 | import org.apache.lucene.search.DisiWrapper; 9 | import org.apache.lucene.search.Scorer; 10 | 11 | /** 12 | * Wrapper for DisiWrapper, saves state of sub-queries for performance reasons 13 | */ 14 | @Getter 15 | public class HybridDisiWrapper extends DisiWrapper { 16 | // index of disi wrapper sub-query object when its part of the hybrid query 17 | private final int subQueryIndex; 18 | 19 | public HybridDisiWrapper(Scorer scorer, int subQueryIndex) { 20 | super(scorer, false); 21 | this.subQueryIndex = subQueryIndex; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/search/collector/HybridLeafCollector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.search.collector; 6 | 7 | import lombok.AccessLevel; 8 | import lombok.Getter; 9 | import lombok.extern.log4j.Log4j2; 10 | import org.apache.lucene.search.LeafCollector; 11 | import org.apache.lucene.search.Scorable; 12 | import org.opensearch.neuralsearch.query.HybridSubQueryScorer; 13 | 14 | import java.io.IOException; 15 | import java.util.Objects; 16 | 17 | /** 18 | * The abstract class for hybrid query leaf collector 19 | */ 20 | @Log4j2 21 | public abstract class HybridLeafCollector implements LeafCollector { 22 | @Getter(AccessLevel.PACKAGE) 23 | HybridSubQueryScorer compoundQueryScorer; 24 | 25 | @Override 26 | public void setScorer(Scorable scorer) throws IOException { 27 | if (scorer instanceof HybridSubQueryScorer) { 28 | compoundQueryScorer = (HybridSubQueryScorer) scorer; 29 | } else { 30 | compoundQueryScorer = getHybridQueryScorer(scorer); 31 | if (Objects.isNull(compoundQueryScorer)) { 32 | log.error("cannot find scorer of type HybridQueryScorer in a hierarchy of scorer {}", scorer); 33 | } 34 | } 35 | } 36 | 37 | private HybridSubQueryScorer getHybridQueryScorer(final Scorable scorer) throws IOException { 38 | if (Objects.isNull(scorer)) { 39 | return null; 40 | } 41 | if (scorer instanceof HybridSubQueryScorer) { 42 | return (HybridSubQueryScorer) scorer; 43 | } 44 | for (Scorable.ChildScorable childScorable : scorer.getChildren()) { 45 | HybridSubQueryScorer hybridQueryScorer = getHybridQueryScorer(childScorable.child()); 46 | if (Objects.nonNull(hybridQueryScorer)) { 47 | log.debug("found hybrid query scorer, it's child of scorer {}", childScorable.child().getClass().getSimpleName()); 48 | return hybridQueryScorer; 49 | } 50 | } 51 | return null; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/search/collector/HybridSearchCollector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.search.collector; 6 | 7 | import java.util.List; 8 | import org.apache.lucene.search.Collector; 9 | import org.apache.lucene.search.TopDocs; 10 | 11 | /** 12 | * Common interface class for Hybrid search collectors 13 | */ 14 | public interface HybridSearchCollector extends Collector { 15 | /** 16 | * @return List of topDocs which contains topDocs of individual subqueries. 17 | */ 18 | List topDocs(); 19 | 20 | /** 21 | * @return count of total hits per shard 22 | */ 23 | int getTotalHits(); 24 | 25 | /** 26 | * @return maxScore found on a shard 27 | */ 28 | float getMaxScore(); 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/search/collector/SimpleFieldCollector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.search.collector; 6 | 7 | import java.io.IOException; 8 | import java.util.Objects; 9 | import org.apache.lucene.index.LeafReaderContext; 10 | import org.apache.lucene.search.LeafCollector; 11 | import org.apache.lucene.search.Sort; 12 | import org.opensearch.neuralsearch.search.HitsThresholdChecker; 13 | 14 | /* 15 | SimpleFieldCollector collects the sorted results at the shard level for every individual query. 16 | It collects the list of TopFieldDocs. 17 | */ 18 | public final class SimpleFieldCollector extends HybridTopFieldDocSortCollector { 19 | 20 | public SimpleFieldCollector(int numHits, HitsThresholdChecker hitsThresholdChecker, Sort sort) { 21 | super(numHits, hitsThresholdChecker, sort, null); 22 | } 23 | 24 | @Override 25 | public LeafCollector getLeafCollector(LeafReaderContext context) { 26 | docBase = context.docBase; 27 | 28 | return new HybridTopDocSortLeafCollector() { 29 | @Override 30 | public void collect(int doc) throws IOException { 31 | if (Objects.isNull(compoundQueryScorer)) { 32 | throw new IllegalArgumentException("scorers are null for all sub-queries in hybrid query"); 33 | } 34 | float[] subScoresByQuery = compoundQueryScorer.getSubQueryScores(); 35 | initializePriorityQueuesWithComparators(context, subScoresByQuery.length); 36 | incrementTotalHitCount(); 37 | for (int i = 0; i < subScoresByQuery.length; i++) { 38 | float score = subScoresByQuery[i]; 39 | // if score is 0.0 there is no hits for that sub-query 40 | if (score == 0) { 41 | continue; 42 | } 43 | maxScore = Math.max(score, maxScore); 44 | if (queueFull[i]) { 45 | if (thresholdCheck(doc, i)) { 46 | return; 47 | } 48 | collectCompetitiveHit(doc, i); 49 | } else { 50 | collectedHits[i]++; 51 | collectHit(doc, collectedHits[i], i, score); 52 | } 53 | } 54 | } 55 | }; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryFieldDocComparator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.search.query; 6 | 7 | import java.util.Comparator; 8 | import lombok.AccessLevel; 9 | import lombok.RequiredArgsConstructor; 10 | import org.apache.lucene.search.FieldComparator; 11 | import org.apache.lucene.search.FieldDoc; 12 | import org.apache.lucene.search.Pruning; 13 | import org.apache.lucene.search.ScoreDoc; 14 | import org.apache.lucene.search.SortField; 15 | 16 | /** 17 | * Comparator class that compares two field docs as per the sorting criteria 18 | */ 19 | @RequiredArgsConstructor(access = AccessLevel.PACKAGE) 20 | class HybridQueryFieldDocComparator implements Comparator { 21 | final SortField[] sortFields; 22 | final FieldComparator[] comparators; 23 | final int[] reverseMul; 24 | final Comparator tieBreaker; 25 | 26 | public HybridQueryFieldDocComparator(SortField[] sortFields, Comparator tieBreaker) { 27 | this.sortFields = sortFields; 28 | this.tieBreaker = tieBreaker; 29 | comparators = new FieldComparator[sortFields.length]; 30 | reverseMul = new int[sortFields.length]; 31 | for (int compIDX = 0; compIDX < sortFields.length; compIDX++) { 32 | final SortField sortField = sortFields[compIDX]; 33 | comparators[compIDX] = sortField.getComparator(1, Pruning.NONE); 34 | reverseMul[compIDX] = sortField.getReverse() ? -1 : 1; 35 | } 36 | } 37 | 38 | @Override 39 | public int compare(final FieldDoc firstFD, final FieldDoc secondFD) { 40 | for (int compIDX = 0; compIDX < comparators.length; compIDX++) { 41 | final FieldComparator comp = comparators[compIDX]; 42 | 43 | final int cmp = reverseMul[compIDX] * comp.compareValues(firstFD.fields[compIDX], secondFD.fields[compIDX]); 44 | 45 | if (cmp != 0) { 46 | return cmp; 47 | } 48 | } 49 | return tieBreakCompare(firstFD, secondFD, tieBreaker); 50 | } 51 | 52 | private int tieBreakCompare(ScoreDoc firstDoc, ScoreDoc secondDoc, Comparator tieBreaker) { 53 | assert tieBreaker != null; 54 | int value = tieBreaker.compare(firstDoc, secondDoc); 55 | return value; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/search/query/exception/HybridSearchRescoreQueryException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.search.query.exception; 6 | 7 | import org.opensearch.OpenSearchException; 8 | 9 | /** 10 | * Exception thrown when there is an issue with the hybrid search rescore query. 11 | */ 12 | public class HybridSearchRescoreQueryException extends OpenSearchException { 13 | 14 | public HybridSearchRescoreQueryException(Throwable cause) { 15 | super("rescore failed for hybrid query", cause); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/settings/NeuralSearchSettings.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.settings; 6 | 7 | import org.opensearch.common.settings.Setting; 8 | 9 | import lombok.AccessLevel; 10 | import lombok.NoArgsConstructor; 11 | 12 | /** 13 | * Class defines settings specific to neural-search plugin 14 | */ 15 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 16 | public final class NeuralSearchSettings { 17 | 18 | /** 19 | * Limits the number of document fields that can be passed to the reranker. 20 | */ 21 | public static final Setting RERANKER_MAX_DOC_FIELDS = Setting.intSetting( 22 | "plugins.neural_search.reranker_max_document_fields", 23 | 50, 24 | Setting.Property.NodeScope 25 | ); 26 | 27 | /** 28 | * Enables or disables the Stats API and event stat collection. 29 | * If API is called when stats are disabled, the response will 403. 30 | * Event stat increment calls are also treated as no-ops. 31 | */ 32 | public static final Setting NEURAL_STATS_ENABLED = Setting.boolSetting( 33 | "plugins.neural_search.stats_enabled", 34 | false, 35 | Setting.Property.NodeScope, 36 | Setting.Property.Dynamic 37 | ); 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/settings/NeuralSearchSettingsAccessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.settings; 6 | 7 | import lombok.Getter; 8 | import org.opensearch.cluster.service.ClusterService; 9 | import org.opensearch.common.settings.Settings; 10 | import org.opensearch.neuralsearch.stats.events.EventStatsManager; 11 | 12 | /** 13 | * Class handles exposing settings related to neural search and manages callbacks when the settings change 14 | */ 15 | public class NeuralSearchSettingsAccessor { 16 | @Getter 17 | private volatile boolean isStatsEnabled; 18 | 19 | /** 20 | * Constructor, registers callbacks to update settings 21 | * @param clusterService 22 | * @param settings 23 | */ 24 | public NeuralSearchSettingsAccessor(ClusterService clusterService, Settings settings) { 25 | isStatsEnabled = NeuralSearchSettings.NEURAL_STATS_ENABLED.get(settings); 26 | registerSettingsCallbacks(clusterService); 27 | } 28 | 29 | private void registerSettingsCallbacks(ClusterService clusterService) { 30 | clusterService.getClusterSettings().addSettingsUpdateConsumer(NeuralSearchSettings.NEURAL_STATS_ENABLED, value -> { 31 | // If stats are being toggled off, clear and reset all stats 32 | if (isStatsEnabled && (value == false)) { 33 | EventStatsManager.instance().reset(); 34 | } 35 | isStatsEnabled = value; 36 | }); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/stats/common/StatName.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.stats.common; 6 | 7 | /** 8 | * Interface for objects that hold stat name, path, and type information. 9 | * The stat name is used as the unique identifier for the stat. It can be used as a request parameter for user filtering. 10 | */ 11 | public interface StatName { 12 | /** 13 | * Gets the name of the stat. These must be unique to support user request stat filtering. 14 | * @return the name of the stat 15 | */ 16 | String getNameString(); 17 | 18 | /** 19 | * Gets the path of the stat in dot notation. 20 | * The path must be unique and avoid collisions with other stat names. 21 | * @return the path of the stat 22 | */ 23 | String getFullPath(); 24 | 25 | /** 26 | * The type of the stat 27 | * @return the stat type 28 | */ 29 | StatType getStatType(); 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/stats/common/StatSnapshot.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.stats.common; 6 | 7 | import org.opensearch.core.xcontent.ToXContent; 8 | import org.opensearch.core.xcontent.ToXContentFragment; 9 | import org.opensearch.core.xcontent.XContentBuilder; 10 | 11 | import java.io.IOException; 12 | 13 | /** 14 | * A serializable snapshot of a stat at a given point in time. 15 | * Holds stat values, type, and metadata for processing and returning across rest layer. 16 | * These are not meant to be persisted. 17 | * @param The type of the value of the stat 18 | */ 19 | public interface StatSnapshot extends ToXContentFragment { 20 | /** 21 | * Field name of the stat_type in XContent 22 | */ 23 | String STAT_TYPE_FIELD = "stat_type"; 24 | 25 | /** 26 | * Field name of the value in XContent 27 | */ 28 | String VALUE_FIELD = "value"; 29 | 30 | /** 31 | * Gets the raw value of the stat, excluding any metadata 32 | * @return the raw stat value 33 | */ 34 | T getValue(); 35 | 36 | /** 37 | * Converts to fields xContent, including stat metadata 38 | * 39 | * @param builder XContentBuilder 40 | * @param params Params 41 | * @return XContentBuilder 42 | * @throws IOException thrown by builder for invalid field 43 | */ 44 | XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException; 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/stats/common/StatType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.stats.common; 6 | 7 | /** 8 | * Interface for the type of stat. Used for stat type metadata 9 | */ 10 | public interface StatType { 11 | 12 | /** 13 | * Get the name of the stat type containing info about the type and how to process it 14 | * @return name of the stat type 15 | */ 16 | String getTypeString(); 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/stats/events/EventStat.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.stats.events; 6 | 7 | import org.opensearch.neuralsearch.stats.common.StatSnapshot; 8 | 9 | /** 10 | * Interface for event stats. These contain logic to store and update ongoing event information. 11 | */ 12 | public interface EventStat { 13 | /** 14 | * Returns a single point in time value associated with the stat. Typically a counter. 15 | * @return the value of the stat 16 | */ 17 | long getValue(); 18 | 19 | /** 20 | * Returns a snapshot of the stat. Used to cross transport layer/rest layer 21 | * @return 22 | */ 23 | StatSnapshot getStatSnapshot(); 24 | 25 | /** 26 | * Increments the stat 27 | */ 28 | void increment(); 29 | 30 | /** 31 | * Resets the stat value 32 | */ 33 | void reset(); 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/stats/events/EventStatType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.stats.events; 6 | 7 | import org.opensearch.neuralsearch.stats.common.StatType; 8 | 9 | import java.util.Locale; 10 | 11 | /** 12 | * Enum for different kinds of event stat types to track 13 | */ 14 | public enum EventStatType implements StatType { 15 | TIMESTAMPED_EVENT_COUNTER; 16 | 17 | /** 18 | * Gets the name of the stat type, the enum name in lowercase 19 | * @return the name of the stat type 20 | */ 21 | public String getTypeString() { 22 | return name().toLowerCase(Locale.ROOT); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/stats/info/CountableInfoStatSnapshot.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.stats.info; 6 | 7 | import org.opensearch.core.xcontent.ToXContent; 8 | import org.opensearch.core.xcontent.XContentBuilder; 9 | import org.opensearch.neuralsearch.stats.common.StatSnapshot; 10 | 11 | import java.io.IOException; 12 | import java.util.concurrent.atomic.LongAdder; 13 | 14 | /** 15 | * A countable stat snapshot for info stats. 16 | * Can be updated in place 17 | */ 18 | public class CountableInfoStatSnapshot implements StatSnapshot { 19 | private LongAdder adder; 20 | private InfoStatName statName; 21 | 22 | /** 23 | * Creates a new stat snapshot 24 | * @param statName the name of the stat it corresponds to 25 | */ 26 | public CountableInfoStatSnapshot(InfoStatName statName) { 27 | this.statName = statName; 28 | this.adder = new LongAdder(); 29 | } 30 | 31 | /** 32 | * Gets the counter value 33 | * @return the counter value 34 | */ 35 | public Long getValue() { 36 | return adder.longValue(); 37 | } 38 | 39 | /** 40 | * Increment the counter by a given delta 41 | * @param delta the amount ot increment by 42 | */ 43 | public void incrementBy(Long delta) { 44 | adder.add(delta); 45 | } 46 | 47 | /** 48 | * Converts to fields xContent, including stat metadata 49 | * 50 | * @param builder XContentBuilder 51 | * @param params Params 52 | * @return XContentBuilder 53 | * @throws IOException thrown by builder for invalid field 54 | */ 55 | @Override 56 | public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { 57 | builder.startObject(); 58 | builder.field(StatSnapshot.VALUE_FIELD, getValue()); 59 | builder.field(StatSnapshot.STAT_TYPE_FIELD, statName.getStatType().getTypeString()); 60 | builder.endObject(); 61 | return builder; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/stats/info/InfoStatType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.stats.info; 6 | 7 | import org.opensearch.neuralsearch.stats.common.StatType; 8 | 9 | import java.util.Locale; 10 | 11 | /** 12 | * Enum for different kinds of info stat types to track 13 | */ 14 | public enum InfoStatType implements StatType { 15 | INFO_COUNTER, 16 | INFO_STRING, 17 | INFO_BOOLEAN; 18 | 19 | /** 20 | * Gets the name of the stat type, the enum name in lowercase 21 | * @return the name of the stat type 22 | */ 23 | public String getTypeString() { 24 | return name().toLowerCase(Locale.ROOT); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/stats/info/SettableInfoStatSnapshot.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.stats.info; 6 | 7 | import lombok.Getter; 8 | import lombok.Setter; 9 | import org.opensearch.core.xcontent.XContentBuilder; 10 | import org.opensearch.neuralsearch.stats.common.StatSnapshot; 11 | 12 | import java.io.IOException; 13 | 14 | /** 15 | * A settable info snapshot used to track Strings, booleans, or other simple serializable objects 16 | * This are meant to be constructed, set, and serialized, not for long storage in memory 17 | * @param the type of the value to set 18 | */ 19 | public class SettableInfoStatSnapshot implements StatSnapshot { 20 | @Getter 21 | @Setter 22 | private T value; 23 | 24 | private InfoStatName statName; 25 | 26 | /** 27 | * Creates a new stat snapshot with default null value 28 | * @param statName the associated stat name 29 | */ 30 | public SettableInfoStatSnapshot(InfoStatName statName) { 31 | this.statName = statName; 32 | this.value = null; 33 | } 34 | 35 | /** 36 | * Creates a new stat snapshot for a given value 37 | * @param statName the associated stat name 38 | * @param value the initial value to set 39 | */ 40 | public SettableInfoStatSnapshot(InfoStatName statName, T value) { 41 | this.statName = statName; 42 | this.value = value; 43 | } 44 | 45 | /** 46 | * Converts to fields xContent, including stat metadata 47 | * 48 | * @param builder XContentBuilder 49 | * @param params Params 50 | * @return XContentBuilder 51 | * @throws IOException thrown by builder for invalid field 52 | */ 53 | @Override 54 | public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { 55 | builder.startObject(); 56 | builder.field(StatSnapshot.VALUE_FIELD, getValue()); 57 | builder.field(StatSnapshot.STAT_TYPE_FIELD, statName.getStatType().getTypeString()); 58 | builder.endObject(); 59 | return builder; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/transport/NeuralStatsAction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.transport; 6 | 7 | import org.opensearch.action.ActionType; 8 | import org.opensearch.core.common.io.stream.Writeable; 9 | 10 | /** 11 | * NeuralStatsAction class 12 | */ 13 | public class NeuralStatsAction extends ActionType { 14 | 15 | public static final NeuralStatsAction INSTANCE = new NeuralStatsAction(); 16 | public static final String NAME = "cluster:admin/neural_stats_action"; 17 | 18 | /** 19 | * Constructor 20 | */ 21 | private NeuralStatsAction() { 22 | super(NAME, NeuralStatsResponse::new); 23 | } 24 | 25 | @Override 26 | public Writeable.Reader getResponseReader() { 27 | return NeuralStatsResponse::new; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/transport/NeuralStatsNodeRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.transport; 6 | 7 | import lombok.Getter; 8 | import org.opensearch.core.common.io.stream.StreamInput; 9 | import org.opensearch.core.common.io.stream.StreamOutput; 10 | import org.opensearch.transport.TransportRequest; 11 | 12 | import java.io.IOException; 13 | 14 | /** 15 | * NeuralStatsNodeRequest represents the request to an individual node 16 | */ 17 | public class NeuralStatsNodeRequest extends TransportRequest { 18 | @Getter 19 | private NeuralStatsRequest request; 20 | 21 | /** 22 | * Constructor 23 | */ 24 | public NeuralStatsNodeRequest() { 25 | super(); 26 | } 27 | 28 | /** 29 | * Constructor 30 | * 31 | * @param in input stream 32 | * @throws IOException in case of I/O errors 33 | */ 34 | public NeuralStatsNodeRequest(StreamInput in) throws IOException { 35 | super(in); 36 | request = new NeuralStatsRequest(in); 37 | } 38 | 39 | /** 40 | * Constructor 41 | * 42 | * @param request NeuralStatsRequest 43 | */ 44 | public NeuralStatsNodeRequest(NeuralStatsRequest request) { 45 | this.request = request; 46 | } 47 | 48 | @Override 49 | public void writeTo(StreamOutput out) throws IOException { 50 | super.writeTo(out); 51 | request.writeTo(out); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/transport/NeuralStatsRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.transport; 6 | 7 | import lombok.Getter; 8 | import org.opensearch.action.support.nodes.BaseNodesRequest; 9 | import org.opensearch.core.common.io.stream.StreamInput; 10 | import org.opensearch.core.common.io.stream.StreamOutput; 11 | import org.opensearch.neuralsearch.stats.NeuralStatsInput; 12 | 13 | import java.io.IOException; 14 | 15 | /** 16 | * NeuralStatsRequest gets node (cluster) level Stats for Neural 17 | * By default, all parameters will be true 18 | */ 19 | public class NeuralStatsRequest extends BaseNodesRequest { 20 | 21 | /** 22 | * Key indicating all stats should be retrieved 23 | */ 24 | @Getter 25 | private final NeuralStatsInput neuralStatsInput; 26 | 27 | /** 28 | * Empty constructor needed for NeuralStatsTransportAction 29 | */ 30 | public NeuralStatsRequest() { 31 | super((String[]) null); 32 | this.neuralStatsInput = new NeuralStatsInput(); 33 | } 34 | 35 | /** 36 | * Constructor 37 | * 38 | * @param in input stream 39 | * @throws IOException in case of I/O errors 40 | */ 41 | public NeuralStatsRequest(StreamInput in) throws IOException { 42 | super(in); 43 | this.neuralStatsInput = new NeuralStatsInput(in); 44 | } 45 | 46 | /** 47 | * Constructor 48 | * 49 | * @param nodeIds NodeIDs from which to retrieve stats 50 | */ 51 | public NeuralStatsRequest(String[] nodeIds, NeuralStatsInput neuralStatsInput) { 52 | super(nodeIds); 53 | this.neuralStatsInput = neuralStatsInput; 54 | } 55 | 56 | @Override 57 | public void writeTo(StreamOutput out) throws IOException { 58 | super.writeTo(out); 59 | neuralStatsInput.writeTo(out); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/util/HybridQueryUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.util; 6 | 7 | import lombok.AccessLevel; 8 | import lombok.NoArgsConstructor; 9 | import org.apache.lucene.search.BooleanQuery; 10 | import org.apache.lucene.search.Query; 11 | import org.opensearch.index.search.NestedHelper; 12 | import org.opensearch.neuralsearch.query.HybridQuery; 13 | import org.opensearch.search.internal.SearchContext; 14 | 15 | import java.util.Objects; 16 | 17 | /** 18 | * Utility class for anything related to hybrid query 19 | */ 20 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 21 | public class HybridQueryUtil { 22 | 23 | /** 24 | * This method validates whether the query object is an instance of hybrid query 25 | */ 26 | public static boolean isHybridQuery(final Query query, final SearchContext searchContext) { 27 | if (query instanceof HybridQuery 28 | || (Objects.nonNull(searchContext.parsedQuery()) && searchContext.parsedQuery().query() instanceof HybridQuery)) { 29 | return true; 30 | } 31 | return false; 32 | } 33 | 34 | private static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) { 35 | return searchContext.mapperService().hasNested() && new NestedHelper(searchContext.mapperService()).mightMatchNestedDocs(query); 36 | } 37 | 38 | private static boolean isWrappedHybridQuery(final Query query) { 39 | return query instanceof BooleanQuery 40 | && ((BooleanQuery) query).clauses().stream().anyMatch(clauseQuery -> clauseQuery.query() instanceof HybridQuery); 41 | } 42 | 43 | private static boolean hasAliasFilter(final Query query, final SearchContext searchContext) { 44 | return Objects.nonNull(searchContext.aliasFilter()); 45 | } 46 | 47 | /** 48 | * This method checks whether hybrid query is wrapped under boolean query object 49 | */ 50 | public static boolean isHybridQueryWrappedInBooleanQuery(final SearchContext searchContext, final Query query) { 51 | return ((hasAliasFilter(query, searchContext) || hasNestedFieldOrNestedDocs(query, searchContext)) 52 | && isWrappedHybridQuery(query) 53 | && !((BooleanQuery) query).clauses().isEmpty()); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/util/PipelineServiceUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.util; 6 | 7 | import com.google.common.annotations.VisibleForTesting; 8 | import lombok.extern.log4j.Log4j2; 9 | import org.opensearch.cluster.service.ClusterService; 10 | import org.opensearch.ingest.IngestService; 11 | import org.opensearch.search.pipeline.PipelineConfiguration; 12 | import org.opensearch.search.pipeline.SearchPipelineService; 13 | 14 | import java.util.List; 15 | import java.util.Map; 16 | import java.util.stream.Collectors; 17 | 18 | /** 19 | * Class abstracts information related to ingest and search pipelines 20 | */ 21 | @Log4j2 22 | public class PipelineServiceUtil { 23 | private ClusterService clusterService; 24 | 25 | /** 26 | * Constructor 27 | * @param clusterService 28 | */ 29 | public PipelineServiceUtil(ClusterService clusterService) { 30 | this.clusterService = clusterService; 31 | } 32 | 33 | /** 34 | * Returns list of search pipeline configs 35 | * @return list of search pipeline configs 36 | */ 37 | public List> getSearchPipelineConfigs() { 38 | List> pipelineConfigs = getSearchPipelines().stream() 39 | .map(PipelineConfiguration::getConfigAsMap) 40 | .collect(Collectors.toList()); 41 | 42 | return pipelineConfigs; 43 | } 44 | 45 | /** 46 | * Returns list of ingest pipeline configs 47 | * @return list of ingest pipeline configs 48 | */ 49 | public List> getIngestPipelineConfigs() { 50 | List> pipelineConfigs = getIngestPipelines().stream() 51 | .map(org.opensearch.ingest.PipelineConfiguration::getConfigAsMap) 52 | .collect(Collectors.toList()); 53 | 54 | return pipelineConfigs; 55 | } 56 | 57 | @VisibleForTesting 58 | protected List getIngestPipelines() { 59 | return IngestService.getPipelines(clusterService.state()); 60 | } 61 | 62 | @VisibleForTesting 63 | protected List getSearchPipelines() { 64 | return SearchPipelineService.getPipelines(clusterService.state()); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/org/opensearch/neuralsearch/util/prune/PruneType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.util.prune; 6 | 7 | import org.apache.commons.lang.StringUtils; 8 | 9 | import java.util.Arrays; 10 | import java.util.Locale; 11 | import java.util.Map; 12 | import java.util.function.Function; 13 | import java.util.stream.Collectors; 14 | 15 | /** 16 | * Enum representing different types of prune methods for sparse vectors 17 | */ 18 | public enum PruneType { 19 | NONE("none"), 20 | TOP_K("top_k"), 21 | ALPHA_MASS("alpha_mass"), 22 | MAX_RATIO("max_ratio"), 23 | ABS_VALUE("abs_value"); 24 | 25 | private final String value; 26 | private static final Map VALUE_MAP = Arrays.stream(values()) 27 | .collect(Collectors.toUnmodifiableMap(status -> status.value, Function.identity())); 28 | 29 | PruneType(String value) { 30 | this.value = value; 31 | } 32 | 33 | public String getValue() { 34 | return value; 35 | } 36 | 37 | /** 38 | * Get PruneType from string value 39 | * 40 | * @param value string representation of prune type 41 | * @return corresponding PruneType enum 42 | * @throws IllegalArgumentException if value doesn't match any prune type 43 | */ 44 | public static PruneType fromString(final String value) { 45 | if (StringUtils.isEmpty(value)) return NONE; 46 | PruneType type = VALUE_MAP.get(value); 47 | if (type == null) { 48 | throw new IllegalArgumentException(String.format(Locale.ROOT, "Unknown prune type: %s", value)); 49 | } 50 | return type; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/plugin-metadata/plugin-security.policy: -------------------------------------------------------------------------------- 1 | grant { 2 | //ml-commons client 3 | permission java.lang.RuntimePermission "getClassLoader"; 4 | permission java.lang.RuntimePermission "accessDeclaredMembers"; 5 | permission java.lang.reflect.ReflectPermission "suppressAccessChecks"; 6 | permission java.lang.RuntimePermission "setContextClassLoader"; 7 | 8 | }; 9 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/NeuralSearchIT.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch; 6 | 7 | import java.io.IOException; 8 | 9 | import org.apache.hc.core5.http.ParseException; 10 | import org.apache.hc.core5.http.io.entity.EntityUtils; 11 | import org.junit.Assert; 12 | import org.opensearch.client.Request; 13 | import org.opensearch.client.Response; 14 | import org.opensearch.rest.RestRequest; 15 | 16 | public class NeuralSearchIT extends OpenSearchSecureRestTestCase { 17 | private static final String NEURAL_SEARCH_PLUGIN_NAME = "neural-search"; 18 | 19 | public void testNeuralSearchPluginInstalled() throws IOException, ParseException { 20 | final Request request = new Request(RestRequest.Method.GET.name(), String.join("/", "_cat", "plugins")); 21 | final Response response = client().performRequest(request); 22 | assertOK(response); 23 | 24 | final String responseBody = EntityUtils.toString(response.getEntity()); 25 | Assert.assertNotNull(responseBody); 26 | Assert.assertTrue(responseBody.contains(NEURAL_SEARCH_PLUGIN_NAME)); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/NeuralSearchTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch; 6 | 7 | import org.opensearch.knn.common.KNNConstants; 8 | import org.opensearch.knn.index.engine.KNNEngine; 9 | import org.opensearch.test.OpenSearchTestCase; 10 | 11 | public class NeuralSearchTests extends OpenSearchTestCase { 12 | 13 | public void testValidateKNNDependency() { 14 | assertEquals(KNNConstants.LUCENE_NAME, KNNEngine.LUCENE.getName()); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/common/VectorUtilTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.common; 6 | 7 | import java.util.Collections; 8 | import java.util.List; 9 | 10 | import org.opensearch.test.OpenSearchTestCase; 11 | 12 | public class VectorUtilTests extends OpenSearchTestCase { 13 | 14 | public void testVectorAsListToArray() { 15 | List vectorAsList_withThreeElements = List.of(1.3f, 2.5f, 3.5f); 16 | float[] vectorAsArray_withThreeElements = VectorUtil.vectorAsListToArray(vectorAsList_withThreeElements); 17 | 18 | assertEquals(vectorAsList_withThreeElements.size(), vectorAsArray_withThreeElements.length); 19 | for (int i = 0; i < vectorAsList_withThreeElements.size(); i++) { 20 | assertEquals(vectorAsList_withThreeElements.get(i).floatValue(), vectorAsArray_withThreeElements[i], 0.0f); 21 | } 22 | 23 | List vectorAsList_withNoElements = Collections.emptyList(); 24 | float[] vectorAsArray_withNoElements = VectorUtil.vectorAsListToArray(vectorAsList_withNoElements); 25 | assertEquals(0, vectorAsArray_withNoElements.length); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/constants/TestCommonConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.constants; 6 | 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | import lombok.AccessLevel; 11 | import lombok.NoArgsConstructor; 12 | import org.opensearch.neuralsearch.processor.MapInferenceRequest; 13 | import org.opensearch.neuralsearch.processor.SimilarityInferenceRequest; 14 | import org.opensearch.neuralsearch.processor.TextInferenceRequest; 15 | 16 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 17 | public class TestCommonConstants { 18 | public static final String MODEL_ID = "modeId"; 19 | public static final List TARGET_RESPONSE_FILTERS = List.of("sentence_embedding"); 20 | public static final Float[] PREDICT_VECTOR_ARRAY = new Float[] { 2.0f, 3.0f }; 21 | public static final List SENTENCES_LIST = List.of("it is sunny today", "roses are red"); 22 | public static final Map SENTENCES_MAP = Map.of("inputText", "Text query", "inputImage", "base641234567890"); 23 | 24 | public static final String QUERY_TEST = "is it sunny"; 25 | 26 | public static final TextInferenceRequest TEXT_INFERENCE_REQUEST = TextInferenceRequest.builder() 27 | .modelId(MODEL_ID) 28 | .inputTexts(SENTENCES_LIST) 29 | .build(); 30 | 31 | public static final MapInferenceRequest MAP_INFERENCE_REQUEST = MapInferenceRequest.builder() 32 | .modelId(MODEL_ID) 33 | .inputObjects(SENTENCES_MAP) 34 | .build(); 35 | 36 | public static final SimilarityInferenceRequest SIMILARITY_INFERENCE_REQUEST = SimilarityInferenceRequest.builder() 37 | .modelId(MODEL_ID) 38 | .inputTexts(SENTENCES_LIST) 39 | .queryText(QUERY_TEST) 40 | .build(); 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/executors/HybridQueryExecutorIT.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.executors; 6 | 7 | import org.apache.hc.core5.http.ParseException; 8 | import org.apache.hc.core5.http.io.entity.EntityUtils; 9 | import org.junit.Assert; 10 | import org.opensearch.client.Request; 11 | import org.opensearch.client.Response; 12 | import org.opensearch.neuralsearch.OpenSearchSecureRestTestCase; 13 | import org.opensearch.rest.RestRequest; 14 | 15 | import java.io.IOException; 16 | 17 | import static org.opensearch.neuralsearch.executors.HybridQueryExecutor.getThreadPoolName; 18 | 19 | public class HybridQueryExecutorIT extends OpenSearchSecureRestTestCase { 20 | 21 | public void testHybridQueryExecutorThreadIsInitialized() throws IOException, ParseException { 22 | final Request request = new Request(RestRequest.Method.GET.name(), String.join("/", "_cat", "thread_pool", getThreadPoolName())); 23 | final Response response = client().performRequest(request); 24 | assertOK(response); 25 | 26 | final String responseBody = EntityUtils.toString(response.getEntity()); 27 | Assert.assertNotNull(responseBody); 28 | Assert.assertTrue(responseBody.contains(getThreadPoolName())); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactoryTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.processor.chunker; 6 | 7 | import org.mockito.Mock; 8 | import org.opensearch.index.analysis.AnalysisRegistry; 9 | import org.opensearch.test.OpenSearchTestCase; 10 | 11 | import java.util.HashMap; 12 | import java.util.Map; 13 | 14 | import static org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker.ANALYSIS_REGISTRY_FIELD; 15 | 16 | public class ChunkerFactoryTests extends OpenSearchTestCase { 17 | 18 | @Mock 19 | private AnalysisRegistry analysisRegistry; 20 | 21 | public void testCreate_FixedTokenLength() { 22 | Chunker chunker = ChunkerFactory.create(FixedTokenLengthChunker.ALGORITHM_NAME, createChunkParameters()); 23 | assertNotNull(chunker); 24 | assert (chunker instanceof FixedTokenLengthChunker); 25 | } 26 | 27 | public void testCreate_Delimiter() { 28 | Chunker chunker = ChunkerFactory.create(DelimiterChunker.ALGORITHM_NAME, createChunkParameters()); 29 | assertNotNull(chunker); 30 | assert (chunker instanceof DelimiterChunker); 31 | } 32 | 33 | public void testCreate_Invalid() { 34 | String invalidChunkerName = "Invalid Chunker Algorithm"; 35 | assertThrows(NullPointerException.class, () -> ChunkerFactory.create(invalidChunkerName, createChunkParameters())); 36 | } 37 | 38 | private Map createChunkParameters() { 39 | Map parameters = new HashMap<>(); 40 | parameters.put(ANALYSIS_REGISTRY_FIELD, analysisRegistry); 41 | return parameters; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/query/HybridSubQueryScorerTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.query; 6 | 7 | import org.opensearch.test.OpenSearchTestCase; 8 | 9 | public class HybridSubQueryScorerTests extends OpenSearchTestCase { 10 | 11 | private static final int NUM_SUB_QUERIES = 2; 12 | 13 | public void testGetSubQueryScores_whenInitialized_thenReturnCorrectSize() { 14 | HybridSubQueryScorer scorer = new HybridSubQueryScorer(NUM_SUB_QUERIES); 15 | float[] scores = scorer.getSubQueryScores(); 16 | 17 | assertEquals(NUM_SUB_QUERIES, scores.length); 18 | assertEquals(NUM_SUB_QUERIES, scorer.getNumOfSubQueries()); 19 | } 20 | 21 | public void testResetScores_whenScoresSet_thenAllScoresZero() { 22 | HybridSubQueryScorer scorer = new HybridSubQueryScorer(NUM_SUB_QUERIES); 23 | float[] scores = scorer.getSubQueryScores(); 24 | scores[0] = 0.5f; 25 | scores[1] = 1.0f; 26 | 27 | scorer.resetScores(); 28 | 29 | // verify all scores are reset to 0 30 | for (float score : scorer.getSubQueryScores()) { 31 | assertEquals(0.0f, score, 0.0f); 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/query/NeuralKNNQueryTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.query; 6 | 7 | import org.apache.lucene.search.Query; 8 | import org.opensearch.test.OpenSearchTestCase; 9 | 10 | import java.io.IOException; 11 | 12 | import static org.mockito.ArgumentMatchers.any; 13 | import static org.mockito.ArgumentMatchers.anyFloat; 14 | import static org.mockito.Mockito.mock; 15 | import static org.mockito.Mockito.verify; 16 | import static org.mockito.Mockito.when; 17 | 18 | public class NeuralKNNQueryTests extends OpenSearchTestCase { 19 | 20 | public void testNeuralKNNQuery() throws IOException { 21 | Query mockKnnQuery = mock(Query.class); 22 | String originalQueryText = "test query"; 23 | NeuralKNNQuery query = new NeuralKNNQuery(mockKnnQuery, originalQueryText); 24 | 25 | // Test toString 26 | when(mockKnnQuery.toString("field")).thenReturn("test_query"); 27 | assertEquals("toString should delegate to underlying query", "test_query", query.toString("field")); 28 | 29 | // Test createWeight 30 | when(mockKnnQuery.createWeight(any(), any(), anyFloat())).thenReturn(null); 31 | query.createWeight(null, null, 1.0f); 32 | verify(mockKnnQuery).createWeight(any(), any(), anyFloat()); 33 | 34 | // Test equals and hashCode 35 | NeuralKNNQuery query2 = new NeuralKNNQuery(mockKnnQuery, originalQueryText); 36 | assertEquals("Same underlying query should be equal", query, query2); 37 | assertEquals("Same underlying query should have same hash code", query.hashCode(), query2.hashCode()); 38 | 39 | // Test originalQueryText getter 40 | assertEquals("Original query text should match", originalQueryText, query.getOriginalQueryText()); 41 | 42 | // Test not equals with different originalQueryText 43 | NeuralKNNQuery query3 = new NeuralKNNQuery(mockKnnQuery, "different query"); 44 | assertNotEquals("Different original query text should not be equal", query, query3); 45 | assertNotEquals("Different original query text should have different hash code", query.hashCode(), query3.hashCode()); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/search/HitsThresholdCheckerTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.search; 6 | 7 | import java.util.stream.IntStream; 8 | 9 | import org.apache.lucene.search.ScoreMode; 10 | import org.opensearch.neuralsearch.query.OpenSearchQueryTestCase; 11 | 12 | public class HitsThresholdCheckerTests extends OpenSearchQueryTestCase { 13 | 14 | public void testThresholdReached_whenIncrementCount_thenThresholdReached() { 15 | HitsThresholdChecker hitsThresholdChecker = new HitsThresholdChecker(5); 16 | assertEquals(5, hitsThresholdChecker.getTotalHitsThreshold()); 17 | assertEquals(ScoreMode.TOP_SCORES, hitsThresholdChecker.scoreMode()); 18 | assertFalse(hitsThresholdChecker.isThresholdReached()); 19 | hitsThresholdChecker.incrementHitCount(); 20 | assertFalse(hitsThresholdChecker.isThresholdReached()); 21 | IntStream.rangeClosed(1, 5).forEach((checker) -> hitsThresholdChecker.incrementHitCount()); 22 | assertTrue(hitsThresholdChecker.isThresholdReached()); 23 | } 24 | 25 | public void testThresholdLimit_whenThresholdNegative_thenFail() { 26 | expectThrows(IllegalArgumentException.class, () -> new HitsThresholdChecker(-1)); 27 | } 28 | 29 | public void testTrackThreshold_whenTrackThresholdSet_thenSuccessful() { 30 | HitsThresholdChecker hitsThresholdChecker = new HitsThresholdChecker(Integer.MAX_VALUE); 31 | assertEquals(ScoreMode.TOP_SCORES, hitsThresholdChecker.scoreMode()); 32 | assertFalse(hitsThresholdChecker.isThresholdReached()); 33 | hitsThresholdChecker.incrementHitCount(); 34 | assertFalse(hitsThresholdChecker.isThresholdReached()); 35 | IntStream.rangeClosed(1, 5).forEach((checker) -> hitsThresholdChecker.incrementHitCount()); 36 | assertFalse(hitsThresholdChecker.isThresholdReached()); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/search/HybridDisiWrapperTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.search; 6 | 7 | import org.apache.lucene.search.DocIdSetIterator; 8 | import org.apache.lucene.search.Scorer; 9 | import org.opensearch.neuralsearch.query.OpenSearchQueryTestCase; 10 | 11 | import static org.mockito.Mockito.mock; 12 | import static org.mockito.Mockito.when; 13 | 14 | public class HybridDisiWrapperTests extends OpenSearchQueryTestCase { 15 | 16 | public void testSubQueryIndex_whenCreateNewInstanceAndSetIndex_thenSuccessful() { 17 | Scorer scorer = mock(Scorer.class); 18 | DocIdSetIterator docIdSetIterator = mock(DocIdSetIterator.class); 19 | when(scorer.iterator()).thenReturn(docIdSetIterator); 20 | int subQueryIndex = 2; 21 | HybridDisiWrapper hybridDisiWrapper = new HybridDisiWrapper(scorer, subQueryIndex); 22 | assertEquals(2, hybridDisiWrapper.getSubQueryIndex()); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/search/collector/HybridCollectorTestCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.search.collector; 6 | 7 | import org.apache.lucene.search.LeafCollector; 8 | import org.opensearch.neuralsearch.query.HybridSubQueryScorer; 9 | import org.opensearch.neuralsearch.query.OpenSearchQueryTestCase; 10 | 11 | import java.io.IOException; 12 | import java.util.List; 13 | 14 | /** 15 | * Base class for HybridCollector test cases 16 | */ 17 | public class HybridCollectorTestCase extends OpenSearchQueryTestCase { 18 | /** 19 | * Collect docs and scores for each sub-query scorer and add them to the leaf collector 20 | * @param scorer HybridSubQueryScorer object 21 | * @param scores1 List of scores for the first sub-query 22 | * @param leafCollector LeafCollector object 23 | * @param subQueryIndex Index of the sub-query 24 | * @param docsIds Array of document IDs 25 | * @throws IOException 26 | */ 27 | void collectDocsAndScores( 28 | HybridSubQueryScorer scorer, 29 | List scores1, 30 | LeafCollector leafCollector, 31 | int subQueryIndex, 32 | int[] docsIds 33 | ) throws IOException { 34 | for (int i = 0; i < docsIds.length; i++) { 35 | scorer.getSubQueryScores()[subQueryIndex] = scores1.get(i); 36 | leafCollector.collect(docsIds[i]); 37 | scorer.resetScores(); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/stats/info/CountableInfoStatSnapshotTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.stats.info; 6 | 7 | import org.opensearch.common.xcontent.json.JsonXContent; 8 | import org.opensearch.core.xcontent.ToXContent; 9 | import org.opensearch.core.xcontent.XContentBuilder; 10 | import org.opensearch.neuralsearch.stats.common.StatSnapshot; 11 | import org.opensearch.test.OpenSearchTestCase; 12 | 13 | import java.io.IOException; 14 | import java.util.Map; 15 | 16 | import static org.opensearch.neuralsearch.util.TestUtils.xContentBuilderToMap; 17 | 18 | public class CountableInfoStatSnapshotTests extends OpenSearchTestCase { 19 | private static final InfoStatName STAT_NAME = InfoStatName.TEXT_EMBEDDING_PROCESSORS; 20 | 21 | public void test_increment() { 22 | CountableInfoStatSnapshot snapshot = new CountableInfoStatSnapshot(STAT_NAME); 23 | assertEquals(0L, snapshot.getValue().longValue()); 24 | snapshot.incrementBy(5L); 25 | assertEquals(5L, snapshot.getValue().longValue()); 26 | snapshot.incrementBy(3L); 27 | assertEquals(8L, snapshot.getValue().longValue()); 28 | } 29 | 30 | public void test_toXContent() throws IOException { 31 | CountableInfoStatSnapshot snapshot = new CountableInfoStatSnapshot(STAT_NAME); 32 | snapshot.incrementBy(8675309L); 33 | 34 | XContentBuilder builder = JsonXContent.contentBuilder(); 35 | snapshot.toXContent(builder, ToXContent.EMPTY_PARAMS); 36 | 37 | Map responseMap = xContentBuilderToMap(builder); 38 | 39 | assertEquals(8675309, responseMap.get(StatSnapshot.VALUE_FIELD)); 40 | assertEquals(STAT_NAME.getStatType().getTypeString(), responseMap.get(StatSnapshot.STAT_TYPE_FIELD)); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/stats/info/SettableInfoStatSnapshotTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.stats.info; 6 | 7 | import org.opensearch.common.xcontent.json.JsonXContent; 8 | import org.opensearch.core.xcontent.ToXContent; 9 | import org.opensearch.core.xcontent.XContentBuilder; 10 | import org.opensearch.neuralsearch.stats.common.StatSnapshot; 11 | import org.opensearch.test.OpenSearchTestCase; 12 | 13 | import java.io.IOException; 14 | import java.util.Map; 15 | 16 | import static org.opensearch.neuralsearch.util.TestUtils.xContentBuilderToMap; 17 | 18 | public class SettableInfoStatSnapshotTests extends OpenSearchTestCase { 19 | 20 | private static final InfoStatName STAT_NAME = InfoStatName.CLUSTER_VERSION; 21 | private static final String SETTABLE_VALUE = "test-value"; 22 | 23 | public void test_constructorWithoutValue() { 24 | SettableInfoStatSnapshot snapshot = new SettableInfoStatSnapshot<>(STAT_NAME); 25 | assertNull(snapshot.getValue()); 26 | } 27 | 28 | public void test_constructorWithValue() { 29 | SettableInfoStatSnapshot snapshot = new SettableInfoStatSnapshot<>(STAT_NAME, SETTABLE_VALUE); 30 | assertEquals(SETTABLE_VALUE, snapshot.getValue()); 31 | } 32 | 33 | public void test_setValueUpdates() { 34 | SettableInfoStatSnapshot snapshot = new SettableInfoStatSnapshot<>(STAT_NAME); 35 | snapshot.setValue("new-value"); 36 | assertEquals("new-value", snapshot.getValue()); 37 | } 38 | 39 | public void test_toXContent() throws IOException { 40 | SettableInfoStatSnapshot snapshot = new SettableInfoStatSnapshot<>(STAT_NAME, SETTABLE_VALUE); 41 | XContentBuilder builder = JsonXContent.contentBuilder(); 42 | snapshot.toXContent(builder, ToXContent.EMPTY_PARAMS); 43 | 44 | Map responseMap = xContentBuilderToMap(builder); 45 | 46 | assertEquals(SETTABLE_VALUE, responseMap.get(StatSnapshot.VALUE_FIELD)); 47 | assertEquals(STAT_NAME.getStatType().getTypeString(), responseMap.get(StatSnapshot.STAT_TYPE_FIELD)); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/util/PipelineServiceUtilTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.util; 6 | 7 | import org.opensearch.cluster.service.ClusterService; 8 | import org.opensearch.test.OpenSearchTestCase; 9 | 10 | import java.util.Collections; 11 | import java.util.List; 12 | import java.util.Map; 13 | 14 | import static org.mockito.Mockito.doReturn; 15 | import static org.mockito.Mockito.mock; 16 | import static org.mockito.Mockito.spy; 17 | import static org.mockito.Mockito.times; 18 | import static org.mockito.Mockito.verify; 19 | 20 | public class PipelineServiceUtilTests extends OpenSearchTestCase { 21 | public void test_getIngestPipelineConfigs_returnsEmptyList() { 22 | ClusterService mockClusterService = mock(ClusterService.class); 23 | PipelineServiceUtil utilSpy = spy(new PipelineServiceUtil(mockClusterService)); 24 | 25 | doReturn(Collections.emptyList()).when(utilSpy).getIngestPipelines(); 26 | 27 | List> configs = utilSpy.getIngestPipelineConfigs(); 28 | 29 | verify(utilSpy, times(1)).getIngestPipelines(); 30 | assertTrue(configs.isEmpty()); 31 | } 32 | 33 | public void test_getSearchPipelineConfigs_returnsEmptyList() { 34 | ClusterService mockClusterService = mock(ClusterService.class); 35 | PipelineServiceUtil utilSpy = spy(new PipelineServiceUtil(mockClusterService)); 36 | 37 | doReturn(Collections.emptyList()).when(utilSpy).getSearchPipelines(); 38 | 39 | List> configs = utilSpy.getSearchPipelineConfigs(); 40 | 41 | verify(utilSpy, times(1)).getSearchPipelines(); 42 | assertTrue(configs.isEmpty()); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/org/opensearch/neuralsearch/util/prune/PruneTypeTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.util.prune; 6 | 7 | import org.opensearch.test.OpenSearchTestCase; 8 | 9 | public class PruneTypeTests extends OpenSearchTestCase { 10 | public void testGetValue() { 11 | assertEquals("none", PruneType.NONE.getValue()); 12 | assertEquals("top_k", PruneType.TOP_K.getValue()); 13 | assertEquals("alpha_mass", PruneType.ALPHA_MASS.getValue()); 14 | assertEquals("max_ratio", PruneType.MAX_RATIO.getValue()); 15 | assertEquals("abs_value", PruneType.ABS_VALUE.getValue()); 16 | } 17 | 18 | public void testFromString() { 19 | assertEquals(PruneType.NONE, PruneType.fromString("none")); 20 | assertEquals(PruneType.NONE, PruneType.fromString(null)); 21 | assertEquals(PruneType.NONE, PruneType.fromString("")); 22 | assertEquals(PruneType.TOP_K, PruneType.fromString("top_k")); 23 | assertEquals(PruneType.ALPHA_MASS, PruneType.fromString("alpha_mass")); 24 | assertEquals(PruneType.MAX_RATIO, PruneType.fromString("max_ratio")); 25 | assertEquals(PruneType.ABS_VALUE, PruneType.fromString("abs_value")); 26 | 27 | IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> PruneType.fromString("test_value")); 28 | assertEquals("Unknown prune type: test_value", exception.getMessage()); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/test/resources/highlight/UploadSentenceHighlightingModelRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sentence_highlighting_qa_model", 3 | "version": "1.0.0", 4 | "function_name": "QUESTION_ANSWERING", 5 | "description": "Sentence highlighting question answering model for testing", 6 | "model_format": "TORCH_SCRIPT", 7 | "model_group_id": "%s", 8 | "model_content_hash_value": "15e97d44ca59f6cd3e977398e38a9cea401eb87f360b92ca9dd8b30afd41f926", 9 | "url": "https://github.com/opensearch-project/ml-commons/blob/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/question_answering/sentence_highlighting_qa_model_pt.zip?raw=true", 10 | "model_config": { 11 | "model_type": "sentence_highlighting", 12 | "framework_type": "huggingface_transformers" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/test/resources/mapper/mappingWithNestedSemanticFields.json: -------------------------------------------------------------------------------- 1 | { 2 | "products": { 3 | "type": "nested", 4 | "properties": { 5 | "product_description": { 6 | "type": "semantic", 7 | "model_id": "dummy model id" 8 | }, 9 | "price": { 10 | "type": "number" 11 | } 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/test/resources/mappingtransformer/transformedMappingMultipleSemanticFields.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "properties": { 4 | "inter_field": { 5 | "properties": { 6 | "semantic_field_1": { 7 | "model_id": "textEmbeddingModelId", 8 | "type": "semantic", 9 | "chunking": true 10 | }, 11 | "semantic_field_1_semantic_info": { 12 | "properties": { 13 | "chunks": { 14 | "type": "nested", 15 | "properties": { 16 | "embedding": { 17 | "type": "knn_vector", 18 | "method": { 19 | "space_type": "l2", 20 | "name": "hnsw" 21 | }, 22 | "dimension": 768 23 | }, 24 | "text": { 25 | "type": "text" 26 | } 27 | } 28 | }, 29 | "model": { 30 | "properties": { 31 | "id": { 32 | "type": "text", 33 | "index": false 34 | }, 35 | "type": { 36 | "type": "text", 37 | "index": false 38 | }, 39 | "name": { 40 | "type": "text", 41 | "index": false 42 | } 43 | } 44 | } 45 | } 46 | } 47 | } 48 | }, 49 | "semantic_field_2": { 50 | "model_id": "sparseModelId", 51 | "type": "semantic", 52 | "semantic_info_field_name": "custom_semantic_info_field" 53 | }, 54 | "custom_semantic_info_field": { 55 | "properties": { 56 | "embedding": { 57 | "type": "rank_features" 58 | }, 59 | "model": { 60 | "properties": { 61 | "id": { 62 | "type": "text", 63 | "index": false 64 | }, 65 | "type": { 66 | "type": "text", 67 | "index": false 68 | }, 69 | "name": { 70 | "type": "text", 71 | "index": false 72 | } 73 | } 74 | } 75 | } 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/test/resources/processor/CreateModelGroupRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "%s", 3 | "description": "This is a public model group" 4 | } 5 | -------------------------------------------------------------------------------- /src/test/resources/processor/NeuralSparseTwoPhaseAndNeuralEnrichProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "request_processors": [ 3 | { 4 | "neural_sparse_two_phase_processor": { 5 | "tag": "neural-sparse", 6 | "description": "This processor is making two-phase rescorer.", 7 | "enabled": true, 8 | "two_phase_parameter": { 9 | "prune_ratio": %f, 10 | "expansion_rate": %f, 11 | "max_window_size": %d 12 | } 13 | } 14 | }, 15 | { 16 | "neural_query_enricher": { 17 | "tag": "tag1", 18 | "description": "This processor is going to set the default model id.", 19 | "default_model_id": "%s" 20 | } 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "request_processors": [ 3 | { 4 | "neural_sparse_two_phase_processor": { 5 | "tag": "neural-sparse", 6 | "description": "This processor is making two-phase rescorer.", 7 | "enabled": true, 8 | "two_phase_parameter": { 9 | "prune_ratio": %f, 10 | "expansion_rate": %f, 11 | "max_window_size": %d 12 | } 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /src/test/resources/processor/PipelineConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text embedding pipeline for hybrid", 3 | "processors": [ 4 | { 5 | "text_embedding": { 6 | "model_id": "%s", 7 | "batch_size": "%d", 8 | "field_map": { 9 | "title": "title_knn", 10 | "favor_list": "favor_list_knn", 11 | "favorites": { 12 | "game": "game_knn", 13 | "movie": "movie_knn" 14 | }, 15 | "nested_passages": { 16 | "text": "embedding" 17 | } 18 | } 19 | } 20 | } 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /src/test/resources/processor/PipelineConfigurationWithBatchSize.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text embedding pipeline for hybrid", 3 | "processors": [ 4 | { 5 | "drop": { 6 | "if": "ctx.text.contains('drop')" 7 | } 8 | }, 9 | { 10 | "fail": { 11 | "if": "ctx.text.contains('fail')", 12 | "message": "fail" 13 | } 14 | }, 15 | { 16 | "text_embedding": { 17 | "model_id": "%s", 18 | "batch_size": 2, 19 | "field_map": { 20 | "title": "title_knn", 21 | "favor_list": "favor_list_knn", 22 | "favorites": { 23 | "game": "game_knn", 24 | "movie": "movie_knn" 25 | }, 26 | "nested_passages": { 27 | "text": "embedding" 28 | } 29 | } 30 | } 31 | } 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /src/test/resources/processor/PipelineConfigurationWithBatchSizeWithSkipExisting.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text embedding pipeline for hybrid", 3 | "processors": [ 4 | { 5 | "drop": { 6 | "if": "ctx.text.contains('drop')" 7 | } 8 | }, 9 | { 10 | "fail": { 11 | "if": "ctx.text.contains('fail')", 12 | "message": "fail" 13 | } 14 | }, 15 | { 16 | "text_embedding": { 17 | "model_id": "%s", 18 | "batch_size": 2, 19 | "field_map": { 20 | "title": "title_knn", 21 | "favor_list": "favor_list_knn", 22 | "favorites": { 23 | "game": "game_knn", 24 | "movie": "movie_knn" 25 | }, 26 | "nested_passages": { 27 | "text": "embedding" 28 | } 29 | }, 30 | "skip_existing": true 31 | } 32 | } 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /src/test/resources/processor/PipelineConfigurationWithNestedFieldsMapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text embedding pipeline for hybrid", 3 | "processors": [ 4 | { 5 | "text_embedding": { 6 | "model_id": "%s", 7 | "field_map": { 8 | "title": "title_knn", 9 | "favor_list": "favor_list_knn", 10 | "favorites": { 11 | "game": "game_knn", 12 | "movie": "movie_knn" 13 | }, 14 | "nested_passages.level_2.level_3_text": "level_3_container.level_3_embedding" 15 | } 16 | } 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /src/test/resources/processor/PipelineConfigurationWithNestedFieldsMappingWithSkipExisting.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text embedding pipeline for hybrid", 3 | "processors": [ 4 | { 5 | "text_embedding": { 6 | "model_id": "%s", 7 | "field_map": { 8 | "title": "title_knn", 9 | "favor_list": "favor_list_knn", 10 | "favorites": { 11 | "game": "game_knn", 12 | "movie": "movie_knn" 13 | }, 14 | "nested_passages.level_2.level_3_text": "level_3_container.level_3_embedding" 15 | }, 16 | "skip_existing": true 17 | } 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /src/test/resources/processor/PipelineConfigurationWithSkipExisting.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text embedding pipeline for optimized inference call", 3 | "processors": [ 4 | { 5 | "text_embedding": { 6 | "model_id": "%s", 7 | "batch_size": "%d", 8 | "field_map": { 9 | "title": "title_knn", 10 | "favor_list": "favor_list_knn", 11 | "favorites": { 12 | "game": "game_knn", 13 | "movie": "movie_knn" 14 | }, 15 | "nested_passages": { 16 | "text": "embedding" 17 | } 18 | }, 19 | "skip_existing": true 20 | } 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /src/test/resources/processor/PipelineForTextImageEmbeddingProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text image embedding pipeline", 3 | "processors": [ 4 | { 5 | "text_image_embedding": { 6 | "model_id": "%s", 7 | "embedding": "passage_embedding", 8 | "field_map": { 9 | "text": "passage_text", 10 | "image": "passage_image" 11 | } 12 | } 13 | } 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /src/test/resources/processor/PipelineForTextImageEmbeddingWithSkipExistingProcessorConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "text image embedding pipeline", 3 | "processors": [ 4 | { 5 | "text_image_embedding": { 6 | "model_id": "%s", 7 | "embedding": "passage_embedding", 8 | "field_map": { 9 | "text": "passage_text", 10 | "image": "passage_image" 11 | }, 12 | "skip_existing": true 13 | } 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /src/test/resources/processor/ReRankByFieldPipelineConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Pipeline for reranking ByField", 3 | "response_processors": [ 4 | { 5 | "rerank": { 6 | "by_field": { 7 | "target_field": "%s", 8 | "remove_target_field": "%s", 9 | "keep_previous_score": "%s" 10 | } 11 | } 12 | } 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /src/test/resources/processor/RerankMLOpenSearchPipelineConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Pipeline for reranking with a cross encoder", 3 | "response_processors": [ 4 | { 5 | "rerank": { 6 | "ml_opensearch": { 7 | "model_id": "%s" 8 | }, 9 | "context": { 10 | "document_fields": ["text_representation"] 11 | } 12 | } 13 | } 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /src/test/resources/processor/SearchRequestPipelineConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "request_processors": [ 3 | { 4 | "neural_query_enricher": { 5 | "tag": "tag1", 6 | "description": "This processor is going to restrict to publicly visible documents", 7 | "default_model_id": "%s" 8 | } 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /src/test/resources/processor/SparseEncodingIndexMappings.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings":{ 3 | "default_pipeline": "pipeline-sparse-encoding" 4 | }, 5 | "mappings": { 6 | "properties": { 7 | "title_sparse": { 8 | "type": "rank_features" 9 | }, 10 | "favor_list_sparse": { 11 | "type": "nested", 12 | "properties":{ 13 | "sparse_encoding":{ 14 | "type": "rank_features" 15 | } 16 | } 17 | }, 18 | "favorites.game_sparse": { 19 | "type": "rank_features" 20 | }, 21 | "favorites.movie_sparse": { 22 | "type": "rank_features" 23 | } 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/test/resources/processor/SparseEncodingPipelineConfiguration.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An example sparse Encoding pipeline", 3 | "processors" : [ 4 | { 5 | "sparse_encoding": { 6 | "model_id": "%s", 7 | "batch_size": "%d", 8 | "field_map": { 9 | "title": "title_sparse", 10 | "favor_list": "favor_list_sparse", 11 | "favorites": { 12 | "game": "game_sparse", 13 | "movie": "movie_sparse" 14 | } 15 | } 16 | } 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /src/test/resources/processor/SparseEncodingPipelineConfigurationWithPrune.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An example sparse Encoding pipeline", 3 | "processors" : [ 4 | { 5 | "sparse_encoding": { 6 | "model_id": "%s", 7 | "batch_size": "%d", 8 | "prune_type": "max_ratio", 9 | "prune_ratio": 0.8, 10 | "field_map": { 11 | "title": "title_sparse", 12 | "favor_list": "favor_list_sparse", 13 | "favorites": { 14 | "game": "game_sparse", 15 | "movie": "movie_sparse" 16 | } 17 | } 18 | } 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /src/test/resources/processor/SparseEncodingPipelineConfigurationWithSkipExisting.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An example sparse Encoding pipeline", 3 | "processors" : [ 4 | { 5 | "sparse_encoding": { 6 | "model_id": "%s", 7 | "batch_size": "%d", 8 | "field_map": { 9 | "title": "title_sparse", 10 | "favor_list": "favor_list_sparse", 11 | "favorites": { 12 | "game": "game_sparse", 13 | "movie": "movie_sparse" 14 | } 15 | }, 16 | "skip_existing": true 17 | } 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /src/test/resources/processor/UploadModelRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "traced_small_model", 3 | "version": "1.0.0", 4 | "model_format": "TORCH_SCRIPT", 5 | "function_name": "TEXT_EMBEDDING", 6 | "model_task_type": "text_embedding", 7 | "model_content_hash_value": "e13b74006290a9d0f58c1376f9629d4ebc05a0f9385f40db837452b167ae9021", 8 | "model_group_id": "%s", 9 | "model_config": { 10 | "model_type": "bert", 11 | "embedding_dimension": 768, 12 | "framework_type": "sentence_transformers", 13 | "all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}" 14 | }, 15 | "url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true" 16 | } 17 | -------------------------------------------------------------------------------- /src/test/resources/processor/UploadSparseEncodingModelRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "amazon/neural-sparse/opensearch-neural-sparse-tokenizer-v1", 3 | "version": "1.0.1", 4 | "model_group_id": "%s", 5 | "model_format": "TORCH_SCRIPT" 6 | } 7 | -------------------------------------------------------------------------------- /src/test/resources/processor/UploadTextSimilarityModelRequestBody.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ms-marco-TinyBERT-L-2-v2", 3 | "version": "1.0.0", 4 | "function_name": "TEXT_SIMILARITY", 5 | "description": "test model", 6 | "model_format": "TORCH_SCRIPT", 7 | "model_group_id": "%s", 8 | "model_content_hash_value": "90e39a926101d1a4e542aade0794319404689b12acfd5d7e65c03d91c668b5cf", 9 | "model_config": { 10 | "model_type": "bert", 11 | "embedding_dimension": 1, 12 | "framework_type": "huggingface_transformers", 13 | "all_config": "nobody will read this" 14 | }, 15 | "url": "https://github.com/opensearch-project/ml-commons/blob/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_similarity/TinyBERT-CE-torch_script.zip?raw=true" 16 | } 17 | -------------------------------------------------------------------------------- /src/test/resources/processor/bulk_item_template.json: -------------------------------------------------------------------------------- 1 | { "index": { "_index": "{{index}}", "_id": "{{id}}" } }, 2 | {{doc}} 3 | -------------------------------------------------------------------------------- /src/test/resources/processor/chunker/PipelineForCascadedChunker.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An example cascaded pipeline with fixed token length algorithm after chunking algorithm", 3 | "processors" : [ 4 | { 5 | "text_chunking": { 6 | "field_map": { 7 | "body": "body_chunk_intermediate" 8 | }, 9 | "algorithm": { 10 | "delimiter": { 11 | "delimiter": "." 12 | } 13 | } 14 | } 15 | }, 16 | { 17 | "text_chunking": { 18 | "field_map": { 19 | "body_chunk_intermediate": "body_chunk" 20 | }, 21 | "algorithm": { 22 | "fixed_token_length": { 23 | "token_limit": 10 24 | } 25 | } 26 | } 27 | } 28 | ] 29 | } 30 | -------------------------------------------------------------------------------- /src/test/resources/processor/chunker/PipelineForDelimiterChunker.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An example delimiter chunker pipeline", 3 | "processors" : [ 4 | { 5 | "text_chunking": { 6 | "field_map": { 7 | "body": "body_chunk" 8 | }, 9 | "algorithm": { 10 | "delimiter": { 11 | "delimiter": "." 12 | } 13 | } 14 | } 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /src/test/resources/processor/chunker/PipelineForFixedTokenLengthChunkerWithLetterTokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An example fixed token length chunker pipeline with letter tokenizer", 3 | "processors" : [ 4 | { 5 | "text_chunking": { 6 | "field_map": { 7 | "body": "body_chunk" 8 | }, 9 | "algorithm": { 10 | "fixed_token_length": { 11 | "token_limit": 10, 12 | "tokenizer": "letter" 13 | } 14 | } 15 | } 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /src/test/resources/processor/chunker/PipelineForFixedTokenLengthChunkerWithLowercaseTokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An example fixed token length chunker pipeline with lowercase tokenizer", 3 | "processors" : [ 4 | { 5 | "text_chunking": { 6 | "field_map": { 7 | "body": "body_chunk" 8 | }, 9 | "algorithm": { 10 | "fixed_token_length": { 11 | "token_limit": 10, 12 | "tokenizer": "lowercase" 13 | } 14 | } 15 | } 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /src/test/resources/processor/chunker/PipelineForFixedTokenLengthChunkerWithStandardTokenizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "An example fixed token length chunker pipeline with standard tokenizer", 3 | "processors" : [ 4 | { 5 | "text_chunking": { 6 | "field_map": { 7 | "body": "body_chunk" 8 | }, 9 | "algorithm": { 10 | "fixed_token_length": { 11 | "token_limit": 10, 12 | "tokenizer": "standard" 13 | } 14 | } 15 | } 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /src/test/resources/processor/chunker/TextChunkingIndexSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings":{ 3 | "index.analyze.max_token_count" : 100, 4 | "default_pipeline": "%s" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /src/test/resources/processor/chunker/TextChunkingTestDocument.json: -------------------------------------------------------------------------------- 1 | { 2 | "body": "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch." 3 | } 4 | -------------------------------------------------------------------------------- /src/test/resources/processor/chunker/TextChunkingTestLongDocument.json: -------------------------------------------------------------------------------- 1 | { 2 | "body": "This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch." 3 | } 4 | -------------------------------------------------------------------------------- /src/test/resources/processor/ingest_doc1.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "This is a good day", 3 | "text": "%s", 4 | "description": "daily logging", 5 | "favor_list": [ 6 | "test", 7 | "hello", 8 | "mock" 9 | ], 10 | "favorites": { 11 | "game": "overwatch", 12 | "movie": null 13 | }, 14 | "nested_passages": [ 15 | { 16 | "text_not_for_embedding": "test" 17 | }, 18 | { 19 | "text": "hello" 20 | }, 21 | { 22 | "text": "world" 23 | } 24 | ] 25 | } 26 | -------------------------------------------------------------------------------- /src/test/resources/processor/ingest_doc2.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "this is a second doc", 3 | "text": "%s", 4 | "description": "the description is not very long", 5 | "favor_list": [ 6 | "favor" 7 | ], 8 | "favorites": { 9 | "game": "golden state", 10 | "movie": null 11 | }, 12 | "nested_passages": [ 13 | { 14 | "text_not_for_embedding": "test" 15 | }, 16 | { 17 | "text": "apple" 18 | }, 19 | { 20 | "text": "banana" 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /src/test/resources/processor/ingest_doc3.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "This is a good day", 3 | "description": "daily logging", 4 | "favor_list": [ 5 | "test", 6 | "hello", 7 | "mock" 8 | ], 9 | "favorites": { 10 | "game": "overwatch", 11 | "movie": null 12 | }, 13 | "nested_passages": 14 | { 15 | "level_2": 16 | { 17 | "level_3_text": "hello", 18 | "level_3_container": { 19 | "level_4_text_field": "abc" 20 | } 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/test/resources/processor/ingest_doc4.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "This is a good day", 3 | "description": "daily logging", 4 | "favor_list": [ 5 | "key", 6 | "hey", 7 | "click" 8 | ], 9 | "favorites": { 10 | "game": "cossacks", 11 | "movie": "matrix" 12 | }, 13 | "nested_passages": 14 | { 15 | "level_2": 16 | { 17 | "level_3_text": "clown" 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/test/resources/processor/ingest_doc5.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "This is a good day", 3 | "description": "daily logging", 4 | "favor_list": [ 5 | "key", 6 | "hey", 7 | "click" 8 | ], 9 | "favorites": { 10 | "game": "cossacks", 11 | "movie": "matrix" 12 | }, 13 | "nested_passages":[ 14 | { 15 | "level_2": 16 | { 17 | "level_3_text": "clown" 18 | } 19 | }, 20 | { 21 | "level_2.level_3_text": "batman" 22 | } 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /src/test/resources/processor/semantic/ingest_doc1.json: -------------------------------------------------------------------------------- 1 | { 2 | "geo_data": "dummy_geo_data_1", 3 | "products": [ 4 | { 5 | "product_description": "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch." 6 | }, 7 | { 8 | "product_description": "dummy_product_description" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /src/test/resources/processor/semantic/ingest_doc2.json: -------------------------------------------------------------------------------- 1 | { 2 | "geo_data": "dummy_geo_data_1", 3 | "products": [ 4 | { 5 | "product_description": "dummy_product_description" 6 | } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /src/test/resources/processor/semantic/ingest_doc3.json: -------------------------------------------------------------------------------- 1 | { 2 | "geo_data": "dummy_geo_data_1" 3 | } 4 | -------------------------------------------------------------------------------- /src/test/resources/processor/semantic/ingested_doc1.json: -------------------------------------------------------------------------------- 1 | { 2 | "_routing" : "routing", 3 | "geo_data_semantic_info" : { 4 | "embedding" : { 5 | "dummy token" : 1.0 6 | }, 7 | "model" : { 8 | "name" : "SPARSE_ENCODING", 9 | "id" : "dummy_model_id_2", 10 | "type" : "SPARSE_ENCODING" 11 | } 12 | }, 13 | "_version_type" : "internal", 14 | "_index" : "index", 15 | "geo_data" : "dummy_geo_data_1", 16 | "_id" : "1", 17 | "_version" : 1, 18 | "products" : [ { 19 | "product_description" : "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.", 20 | "product_description_semantic_info" : { 21 | "chunks" : [ { 22 | "text" : "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example ", 23 | "embedding" : [ 2.0 ] 24 | }, { 25 | "text" : "24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.", 26 | "embedding" : [ 1.0 ] 27 | } ], 28 | "model" : { 29 | "name" : "TEXT_EMBEDDING", 30 | "id" : "dummy_model_id_1", 31 | "type" : "TEXT_EMBEDDING" 32 | } 33 | } 34 | }, { 35 | "product_description" : "dummy_product_description", 36 | "product_description_semantic_info" : { 37 | "chunks" : [ { 38 | "text" : "dummy_product_description", 39 | "embedding" : [ 3.0 ] 40 | } ], 41 | "model" : { 42 | "name" : "TEXT_EMBEDDING", 43 | "id" : "dummy_model_id_1", 44 | "type" : "TEXT_EMBEDDING" 45 | } 46 | } 47 | } ] 48 | } 49 | -------------------------------------------------------------------------------- /src/test/resources/processor/semantic/ingested_doc2.json: -------------------------------------------------------------------------------- 1 | { 2 | "_routing" : "routing", 3 | "geo_data_semantic_info" : { 4 | "embedding" : { 5 | "dummy token" : 1.0 6 | }, 7 | "model" : { 8 | "name" : "SPARSE_ENCODING", 9 | "id" : "dummy_model_id_2", 10 | "type" : "SPARSE_ENCODING" 11 | } 12 | }, 13 | "_version_type" : "internal", 14 | "_index" : "index", 15 | "geo_data" : "dummy_geo_data_1", 16 | "_id" : "2", 17 | "_version" : 1, 18 | "products" : [ { 19 | "product_description" : "dummy_product_description", 20 | "product_description_semantic_info" : { 21 | "chunks" : [ { 22 | "text" : "dummy_product_description", 23 | "embedding" : [ 3.0 ] 24 | } ], 25 | "model" : { 26 | "name" : "TEXT_EMBEDDING", 27 | "id" : "dummy_model_id_1", 28 | "type" : "TEXT_EMBEDDING" 29 | } 30 | } 31 | } ] 32 | } 33 | -------------------------------------------------------------------------------- /src/test/resources/processor/semantic/ingested_doc3.json: -------------------------------------------------------------------------------- 1 | { 2 | "_routing" : "routing", 3 | "geo_data_semantic_info" : { 4 | "embedding" : { 5 | "dummy token" : 1.0 6 | }, 7 | "model" : { 8 | "name" : "SPARSE_ENCODING", 9 | "id" : "dummy_model_id_2", 10 | "type" : "SPARSE_ENCODING" 11 | } 12 | }, 13 | "_version_type" : "internal", 14 | "_index" : "index", 15 | "geo_data" : "dummy_geo_data_1", 16 | "_id" : "2", 17 | "_version" : 1 18 | } 19 | -------------------------------------------------------------------------------- /src/test/resources/processor/semantic/invalid_ingest_doc.json: -------------------------------------------------------------------------------- 1 | { 2 | "geo_data": "dummy_geo_data_1", 3 | "products": [ 4 | { 5 | "product_description": { 6 | "invalid": "not a string" 7 | } 8 | } 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /src/test/resources/processor/update_doc1.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "This is a good day", 3 | "text": "%s", 4 | "description": "daily logging", 5 | "favor_list": [ 6 | "test", 7 | "hello", 8 | "mock" 9 | ], 10 | "favorites": { 11 | "game": "overwatch", 12 | "movie": null 13 | }, 14 | "nested_passages": [ 15 | { 16 | "text_not_for_embedding": "test" 17 | }, 18 | { 19 | "text": "bye" 20 | }, 21 | { 22 | "text": "world" 23 | } 24 | ] 25 | } 26 | -------------------------------------------------------------------------------- /src/test/resources/processor/update_doc2.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "this is a second doc", 3 | "text": "%s", 4 | "description": "the description is not very long", 5 | "favor_list": [ 6 | "favor" 7 | ], 8 | "favorites": { 9 | "game": "silver state", 10 | "movie": null 11 | }, 12 | "nested_passages": [ 13 | { 14 | "text_not_for_embedding": "test" 15 | }, 16 | { 17 | "text": "apple" 18 | }, 19 | { 20 | "text": "banana" 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /src/test/resources/processor/update_doc3.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "This is a good day", 3 | "description": "daily logging", 4 | "favor_list": [ 5 | "test", 6 | "hello", 7 | "mock" 8 | ], 9 | "favorites": { 10 | "game": "overwatch", 11 | "movie": null 12 | }, 13 | "nested_passages": 14 | { 15 | "level_2": 16 | { 17 | "level_3_text": "hello", 18 | "level_3_container": { 19 | "level_4_text_field": "def" 20 | } 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/test/resources/processor/update_doc4.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "This is a good day", 3 | "description": "daily logging", 4 | "favor_list": [ 5 | "key", 6 | "hey", 7 | "click" 8 | ], 9 | "favorites": { 10 | "game": "cossacks", 11 | "movie": "matrix" 12 | }, 13 | "nested_passages": 14 | { 15 | "level_2": 16 | { 17 | "level_3_text": "joker" 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/test/resources/processor/update_doc5.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "This is a good day", 3 | "description": "daily logging", 4 | "favor_list": [ 5 | "key", 6 | "hey", 7 | "click" 8 | ], 9 | "favorites": { 10 | "game": "cossacks", 11 | "movie": "matrix" 12 | }, 13 | "nested_passages":[ 14 | { 15 | "level_2": 16 | { 17 | "level_3_text": "joker" 18 | } 19 | }, 20 | { 21 | "level_2.level_3_text": "superman" 22 | } 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /src/testFixtures/java/org/opensearch/neuralsearch/util/AggregationsTestUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.util; 6 | 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | /** 11 | * Util class for routines associated with aggregations testing 12 | */ 13 | public class AggregationsTestUtils { 14 | 15 | public static List> getNestedHits(Map searchResponseAsMap) { 16 | Map hitsMap = (Map) searchResponseAsMap.get("hits"); 17 | return (List>) hitsMap.get("hits"); 18 | } 19 | 20 | public static Map getTotalHits(Map searchResponseAsMap) { 21 | Map hitsMap = (Map) searchResponseAsMap.get("hits"); 22 | return (Map) hitsMap.get("total"); 23 | } 24 | 25 | public static Map getAggregations(final Map searchResponseAsMap) { 26 | Map aggsMap = (Map) searchResponseAsMap.get("aggregations"); 27 | return aggsMap; 28 | } 29 | 30 | public static T getAggregationValue(final Map aggsMap, final String aggName) { 31 | Map aggValues = (Map) aggsMap.get(aggName); 32 | return (T) aggValues.get("value"); 33 | } 34 | 35 | public static T getAggregationBuckets(final Map aggsMap, final String aggName) { 36 | Map aggValues = (Map) aggsMap.get(aggName); 37 | return (T) aggValues.get("buckets"); 38 | } 39 | 40 | public static T getAggregationValues(final Map aggsMap, final String aggName) { 41 | return (T) aggsMap.get(aggName); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/testFixtures/java/org/opensearch/neuralsearch/util/BatchIngestionUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.util; 6 | 7 | import java.util.ArrayList; 8 | import java.util.Arrays; 9 | import java.util.HashMap; 10 | import java.util.List; 11 | import java.util.Map; 12 | 13 | /** 14 | * A helper class to build docs for bulk request which is used by batch ingestion tests. 15 | */ 16 | public class BatchIngestionUtils { 17 | private static final List TEXTS = Arrays.asList( 18 | "hello", 19 | "world", 20 | "an apple", 21 | "find me", 22 | "birdy", 23 | "flying piggy", 24 | "newspaper", 25 | "dynamic programming", 26 | "random text", 27 | "finally" 28 | ); 29 | 30 | public static List> prepareDataForBulkIngestion(int startId, int count) { 31 | List> docs = new ArrayList<>(); 32 | for (int i = startId; i < startId + count; ++i) { 33 | Map params = new HashMap<>(); 34 | params.put("id", Integer.toString(i)); 35 | params.put("text", TEXTS.get(i % TEXTS.size())); 36 | docs.add(params); 37 | } 38 | return docs; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/testFixtures/java/org/opensearch/neuralsearch/util/NeuralSearchClusterTestUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright OpenSearch Contributors 3 | * SPDX-License-Identifier: Apache-2.0 4 | */ 5 | package org.opensearch.neuralsearch.util; 6 | 7 | import static org.mockito.Mockito.mock; 8 | import static org.mockito.Mockito.when; 9 | 10 | import org.opensearch.Version; 11 | import org.opensearch.cluster.ClusterState; 12 | import org.opensearch.cluster.metadata.IndexNameExpressionResolver; 13 | import org.opensearch.cluster.node.DiscoveryNodes; 14 | import org.opensearch.cluster.service.ClusterService; 15 | import org.opensearch.common.settings.Settings; 16 | import org.opensearch.common.util.concurrent.ThreadContext; 17 | 18 | public class NeuralSearchClusterTestUtils { 19 | 20 | /** 21 | * Create new mock for ClusterService 22 | * @param version min version for cluster nodes 23 | * @return 24 | */ 25 | public static ClusterService mockClusterService(final Version version) { 26 | ClusterService clusterService = mock(ClusterService.class); 27 | ClusterState clusterState = mock(ClusterState.class); 28 | when(clusterService.state()).thenReturn(clusterState); 29 | DiscoveryNodes discoveryNodes = mock(DiscoveryNodes.class); 30 | when(clusterState.getNodes()).thenReturn(discoveryNodes); 31 | when(discoveryNodes.getMinNodeVersion()).thenReturn(version); 32 | return clusterService; 33 | } 34 | 35 | /** 36 | * Set up a simple NeuralSearchClusterUtil instance with a specified version. 37 | */ 38 | public static void setUpClusterService(Version version) { 39 | ClusterService clusterService = NeuralSearchClusterTestUtils.mockClusterService(version); 40 | IndexNameExpressionResolver indexNameExpressionResolver = new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)); 41 | NeuralSearchClusterUtil.instance().initialize(clusterService, indexNameExpressionResolver); 42 | } 43 | 44 | /** 45 | * Set up a simple NeuralSearchClusterUtil instance with current version. 46 | */ 47 | public static void setUpClusterService() { 48 | setUpClusterService(Version.CURRENT); 49 | } 50 | } 51 | --------------------------------------------------------------------------------