├── .github
├── CODEOWNERS
├── PULL_REQUEST_TEMPLATE.md
├── draft-release-notes-config.yml
└── workflows
│ ├── CI.yml
│ ├── add-untriaged.yml
│ ├── auto-release.yml
│ ├── backport.yml
│ ├── backwards_compatibility_tests_workflow.yml
│ ├── changelog_verifier.yml
│ ├── check-workflow-events.yml
│ ├── copy-linked-issue-labels.yml
│ ├── delete_backport_branch.yml
│ ├── draft-release-notes-workflow.yml
│ ├── links.yml
│ ├── maven-publish.yml
│ ├── test_aggregations.yml
│ └── test_security.yml
├── .gitignore
├── .idea
├── copyright
│ ├── SPDX_ALv2.xml
│ └── profiles_settings.xml
└── runConfigurations
│ ├── DebugNeuralSearch.xml
│ ├── Run_Neural_Search.xml
│ └── Run_With_Debug_Port.xml
├── .whitesource
├── ADMINS.md
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DEVELOPER_GUIDE.md
├── LICENSE
├── MAINTAINERS.md
├── NOTICE
├── README.md
├── RELEASING.md
├── SECURITY.md
├── TRIAGING.md
├── build.gradle
├── codecov.yml
├── formatter
├── formatterConfig.xml
└── license-header.txt
├── gradle.properties
├── gradle
├── formatting.gradle
└── wrapper
│ ├── gradle-wrapper.jar
│ └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── lombok.config
├── qa
├── build.gradle
├── restart-upgrade
│ ├── build.gradle
│ └── src
│ │ └── test
│ │ ├── java
│ │ └── org
│ │ │ └── opensearch
│ │ │ └── neuralsearch
│ │ │ └── bwc
│ │ │ └── restart
│ │ │ ├── AbstractRestartUpgradeRestTestCase.java
│ │ │ ├── BatchIngestionIT.java
│ │ │ ├── HybridSearchIT.java
│ │ │ ├── HybridSearchWithRescoreIT.java
│ │ │ ├── KnnRadialSearchIT.java
│ │ │ ├── MultiModalSearchIT.java
│ │ │ ├── NeuralQueryEnricherProcessorIT.java
│ │ │ ├── NeuralSparseSearchIT.java
│ │ │ ├── NeuralSparseTwoPhaseProcessorIT.java
│ │ │ ├── RestNeuralStatsActionIT.java
│ │ │ ├── SemanticSearchIT.java
│ │ │ └── TextChunkingProcessorIT.java
│ │ └── resources
│ │ └── processor
│ │ ├── ChunkingIndexSettings.json
│ │ ├── CreateModelGroupRequestBody.json
│ │ ├── IndexMappingMultipleShard.json
│ │ ├── IndexMappingSingleShard.json
│ │ ├── NeuralSparseTwoPhaseProcessorConfiguration.json
│ │ ├── PipelineConfiguration.json
│ │ ├── PipelineForSparseEncodingProcessorConfiguration.json
│ │ ├── PipelineForTextChunkingProcessorConfiguration.json
│ │ ├── PipelineForTextImageProcessorConfiguration.json
│ │ ├── SearchRequestPipelineConfiguration.json
│ │ ├── SparseIndexMappings.json
│ │ ├── UploadModelRequestBody.json
│ │ └── UploadSparseEncodingModelRequestBody.json
└── rolling-upgrade
│ ├── build.gradle
│ └── src
│ └── test
│ ├── java
│ └── org
│ │ └── opensearch
│ │ └── neuralsearch
│ │ └── bwc
│ │ └── rolling
│ │ ├── AbstractRollingUpgradeTestCase.java
│ │ ├── BatchIngestionIT.java
│ │ ├── HybridSearchIT.java
│ │ ├── HybridSearchRelevancyIT.java
│ │ ├── HybridSearchWithRescoreIT.java
│ │ ├── KnnRadialSearchIT.java
│ │ ├── MultiModalSearchIT.java
│ │ ├── NeuralQueryEnricherProcessorIT.java
│ │ ├── NeuralSparseSearchIT.java
│ │ ├── NeuralSparseTwoPhaseProcessorIT.java
│ │ ├── RestNeuralStatsActionIT.java
│ │ ├── SemanticSearchIT.java
│ │ └── TextChunkingProcessorIT.java
│ └── resources
│ └── processor
│ ├── ChunkingIndexSettings.json
│ ├── CreateModelGroupRequestBody.json
│ ├── IndexMappings.json
│ ├── NeuralSparseTwoPhaseProcessorConfiguration.json
│ ├── PipelineConfiguration.json
│ ├── PipelineForSparseEncodingProcessorConfiguration.json
│ ├── PipelineForTextChunkingProcessorConfiguration.json
│ ├── PipelineForTextImageProcessorConfiguration.json
│ ├── SearchRequestPipelineConfiguration.json
│ ├── SparseIndexMappings.json
│ ├── UploadModelRequestBody.json
│ └── UploadSparseEncodingModelRequestBody.json
├── release-notes
├── opensearch-neural-search.release-notes-2.10.0.0.md
├── opensearch-neural-search.release-notes-2.11.0.0.md
├── opensearch-neural-search.release-notes-2.12.0.0.md
├── opensearch-neural-search.release-notes-2.13.0.0.md
├── opensearch-neural-search.release-notes-2.14.0.0.md
├── opensearch-neural-search.release-notes-2.15.0.0.md
├── opensearch-neural-search.release-notes-2.16.0.0.md
├── opensearch-neural-search.release-notes-2.17.0.0.md
├── opensearch-neural-search.release-notes-2.18.0.0.md
├── opensearch-neural-search.release-notes-2.19.0.0.md
├── opensearch-neural-search.release-notes-2.4.0.0.md
├── opensearch-neural-search.release-notes-2.4.1.0.md
├── opensearch-neural-search.release-notes-2.5.0.0.md
├── opensearch-neural-search.release-notes-2.6.0.0.md
├── opensearch-neural-search.release-notes-2.7.0.0.md
├── opensearch-neural-search.release-notes-2.8.0.0.md
├── opensearch-neural-search.release-notes-2.9.0.0.md
├── opensearch-neural-search.release-notes-3.0.0.0-alpha1.md
├── opensearch-neural-search.release-notes-3.0.0.0-beta1.md
└── opensearch-neural-search.release-notes-3.0.0.0.md
├── repositories.gradle
├── settings.gradle
└── src
├── main
├── java
│ └── org
│ │ └── opensearch
│ │ └── neuralsearch
│ │ ├── common
│ │ ├── MinClusterVersionUtil.java
│ │ └── VectorUtil.java
│ │ ├── constants
│ │ ├── MappingConstants.java
│ │ ├── SemanticFieldConstants.java
│ │ └── SemanticInfoFieldConstants.java
│ │ ├── executors
│ │ ├── HybridQueryExecutor.java
│ │ ├── HybridQueryExecutorCollector.java
│ │ ├── HybridQueryExecutorCollectorManager.java
│ │ ├── HybridQueryRewriteCollectorManager.java
│ │ └── HybridQueryScoreSupplierCollectorManager.java
│ │ ├── highlight
│ │ ├── SemanticHighlighter.java
│ │ ├── SemanticHighlighterEngine.java
│ │ └── extractor
│ │ │ ├── BooleanQueryTextExtractor.java
│ │ │ ├── HybridQueryTextExtractor.java
│ │ │ ├── NestedQueryTextExtractor.java
│ │ │ ├── NeuralQueryTextExtractor.java
│ │ │ ├── QueryTextExtractor.java
│ │ │ ├── QueryTextExtractorRegistry.java
│ │ │ └── TermQueryTextExtractor.java
│ │ ├── mapper
│ │ ├── SemanticFieldMapper.java
│ │ └── dto
│ │ │ └── SemanticParameters.java
│ │ ├── mappingtransformer
│ │ ├── SemanticInfoConfigBuilder.java
│ │ └── SemanticMappingTransformer.java
│ │ ├── ml
│ │ └── MLCommonsClientAccessor.java
│ │ ├── plugin
│ │ └── NeuralSearch.java
│ │ ├── processor
│ │ ├── AbstractScoreHybridizationProcessor.java
│ │ ├── CompoundTopDocs.java
│ │ ├── ExplanationResponseProcessor.java
│ │ ├── InferenceProcessor.java
│ │ ├── InferenceRequest.java
│ │ ├── MapInferenceRequest.java
│ │ ├── NeuralQueryEnricherProcessor.java
│ │ ├── NeuralSparseTwoPhaseProcessor.java
│ │ ├── NormalizationExecuteDTO.java
│ │ ├── NormalizationProcessor.java
│ │ ├── NormalizationProcessorWorkflow.java
│ │ ├── NormalizationProcessorWorkflowExecuteRequest.java
│ │ ├── NormalizeScoresDTO.java
│ │ ├── RRFProcessor.java
│ │ ├── SearchShard.java
│ │ ├── SimilarityInferenceRequest.java
│ │ ├── SparseEncodingProcessor.java
│ │ ├── TechniqueCompatibilityCheckDTO.java
│ │ ├── TextChunkingProcessor.java
│ │ ├── TextEmbeddingProcessor.java
│ │ ├── TextImageEmbeddingProcessor.java
│ │ ├── TextInferenceRequest.java
│ │ ├── chunker
│ │ │ ├── Chunker.java
│ │ │ ├── ChunkerFactory.java
│ │ │ ├── ChunkerParameterParser.java
│ │ │ ├── DelimiterChunker.java
│ │ │ └── FixedTokenLengthChunker.java
│ │ ├── combination
│ │ │ ├── ArithmeticMeanScoreCombinationTechnique.java
│ │ │ ├── CombineScoresDto.java
│ │ │ ├── GeometricMeanScoreCombinationTechnique.java
│ │ │ ├── HarmonicMeanScoreCombinationTechnique.java
│ │ │ ├── RRFScoreCombinationTechnique.java
│ │ │ ├── ScoreCombinationFactory.java
│ │ │ ├── ScoreCombinationTechnique.java
│ │ │ ├── ScoreCombinationUtil.java
│ │ │ └── ScoreCombiner.java
│ │ ├── dto
│ │ │ └── SemanticFieldInfo.java
│ │ ├── explain
│ │ │ ├── CombinedExplanationDetails.java
│ │ │ ├── DocIdAtSearchShard.java
│ │ │ ├── ExplainableTechnique.java
│ │ │ ├── ExplanationDetails.java
│ │ │ ├── ExplanationPayload.java
│ │ │ └── ExplanationUtils.java
│ │ ├── factory
│ │ │ ├── ExplanationResponseProcessorFactory.java
│ │ │ ├── NormalizationProcessorFactory.java
│ │ │ ├── RRFProcessorFactory.java
│ │ │ ├── RerankProcessorFactory.java
│ │ │ ├── SemanticFieldProcessorFactory.java
│ │ │ ├── SparseEncodingProcessorFactory.java
│ │ │ ├── TextChunkingProcessorFactory.java
│ │ │ ├── TextEmbeddingProcessorFactory.java
│ │ │ └── TextImageEmbeddingProcessorFactory.java
│ │ ├── highlight
│ │ │ └── SentenceHighlightingRequest.java
│ │ ├── normalization
│ │ │ ├── L2ScoreNormalizationTechnique.java
│ │ │ ├── MinMaxScoreNormalizationTechnique.java
│ │ │ ├── RRFNormalizationTechnique.java
│ │ │ ├── ScoreNormalizationFactory.java
│ │ │ ├── ScoreNormalizationTechnique.java
│ │ │ ├── ScoreNormalizationUtil.java
│ │ │ ├── ScoreNormalizer.java
│ │ │ └── ZScoreNormalizationTechnique.java
│ │ ├── optimization
│ │ │ ├── InferenceFilter.java
│ │ │ ├── TextEmbeddingInferenceFilter.java
│ │ │ └── TextImageEmbeddingInferenceFilter.java
│ │ ├── rerank
│ │ │ ├── ByFieldRerankProcessor.java
│ │ │ ├── MLOpenSearchRerankProcessor.java
│ │ │ ├── RerankProcessor.java
│ │ │ ├── RerankType.java
│ │ │ ├── RescoringRerankProcessor.java
│ │ │ └── context
│ │ │ │ ├── ContextSourceFetcher.java
│ │ │ │ ├── DocumentContextSourceFetcher.java
│ │ │ │ └── QueryContextSourceFetcher.java
│ │ ├── semantic
│ │ │ └── SemanticFieldProcessor.java
│ │ └── util
│ │ │ ├── ChunkUtils.java
│ │ │ └── ProcessorUtils.java
│ │ ├── query
│ │ ├── HybridBulkScorer.java
│ │ ├── HybridQuery.java
│ │ ├── HybridQueryBuilder.java
│ │ ├── HybridQueryContext.java
│ │ ├── HybridQueryDocIdStream.java
│ │ ├── HybridQueryScorer.java
│ │ ├── HybridQueryWeight.java
│ │ ├── HybridScoreBlockBoundaryPropagator.java
│ │ ├── HybridScorerSupplier.java
│ │ ├── HybridSubQueryScorer.java
│ │ ├── ModelInferenceQueryBuilder.java
│ │ ├── NeuralKNNQuery.java
│ │ ├── NeuralKNNQueryBuilder.java
│ │ ├── NeuralQueryBuilder.java
│ │ ├── NeuralSparseQueryBuilder.java
│ │ ├── NeuralSparseQueryTwoPhaseInfo.java
│ │ ├── dto
│ │ │ ├── NeuralQueryBuildStage.java
│ │ │ └── NeuralQueryTargetFieldConfig.java
│ │ ├── ext
│ │ │ └── RerankSearchExtBuilder.java
│ │ ├── parser
│ │ │ └── NeuralQueryParser.java
│ │ └── visitor
│ │ │ └── NeuralSearchQueryVisitor.java
│ │ ├── rest
│ │ └── RestNeuralStatsAction.java
│ │ ├── search
│ │ ├── HitsThresholdChecker.java
│ │ ├── HybridDisiWrapper.java
│ │ ├── collector
│ │ │ ├── HybridLeafCollector.java
│ │ │ ├── HybridSearchCollector.java
│ │ │ ├── HybridTopFieldDocSortCollector.java
│ │ │ ├── HybridTopScoreDocCollector.java
│ │ │ ├── PagingFieldCollector.java
│ │ │ └── SimpleFieldCollector.java
│ │ ├── lucene
│ │ │ └── MultiLeafFieldComparator.java
│ │ ├── query
│ │ │ ├── HybridAggregationProcessor.java
│ │ │ ├── HybridCollectorManager.java
│ │ │ ├── HybridQueryFieldDocComparator.java
│ │ │ ├── HybridQueryPhaseSearcher.java
│ │ │ ├── HybridQueryScoreDocsMerger.java
│ │ │ ├── TopDocsMerger.java
│ │ │ └── exception
│ │ │ │ └── HybridSearchRescoreQueryException.java
│ │ └── util
│ │ │ ├── HybridSearchResultFormatUtil.java
│ │ │ └── HybridSearchSortUtil.java
│ │ ├── settings
│ │ ├── NeuralSearchSettings.java
│ │ └── NeuralSearchSettingsAccessor.java
│ │ ├── stats
│ │ ├── NeuralStatsInput.java
│ │ ├── common
│ │ │ ├── StatName.java
│ │ │ ├── StatSnapshot.java
│ │ │ └── StatType.java
│ │ ├── events
│ │ │ ├── EventStat.java
│ │ │ ├── EventStatName.java
│ │ │ ├── EventStatType.java
│ │ │ ├── EventStatsManager.java
│ │ │ ├── TimestampedEventStat.java
│ │ │ └── TimestampedEventStatSnapshot.java
│ │ └── info
│ │ │ ├── CountableInfoStatSnapshot.java
│ │ │ ├── InfoStatName.java
│ │ │ ├── InfoStatType.java
│ │ │ ├── InfoStatsManager.java
│ │ │ └── SettableInfoStatSnapshot.java
│ │ ├── transport
│ │ ├── NeuralStatsAction.java
│ │ ├── NeuralStatsNodeRequest.java
│ │ ├── NeuralStatsNodeResponse.java
│ │ ├── NeuralStatsRequest.java
│ │ ├── NeuralStatsResponse.java
│ │ └── NeuralStatsTransportAction.java
│ │ └── util
│ │ ├── HybridQueryUtil.java
│ │ ├── NeuralQueryValidationUtil.java
│ │ ├── NeuralSearchClusterUtil.java
│ │ ├── PipelineServiceUtil.java
│ │ ├── ProcessorDocumentUtils.java
│ │ ├── RetryUtil.java
│ │ ├── SemanticMLModelUtils.java
│ │ ├── SemanticMappingUtils.java
│ │ ├── TokenWeightUtil.java
│ │ └── prune
│ │ ├── PruneType.java
│ │ └── PruneUtils.java
└── plugin-metadata
│ └── plugin-security.policy
├── test
├── java
│ └── org
│ │ └── opensearch
│ │ └── neuralsearch
│ │ ├── NeuralSearchIT.java
│ │ ├── NeuralSearchTests.java
│ │ ├── ValidateDependentPluginInstallationIT.java
│ │ ├── common
│ │ └── VectorUtilTests.java
│ │ ├── constants
│ │ └── TestCommonConstants.java
│ │ ├── executors
│ │ └── HybridQueryExecutorIT.java
│ │ ├── highlight
│ │ ├── QueryTextExtractorTests.java
│ │ ├── SemanticHighlighterEngineTests.java
│ │ ├── SemanticHighlighterIT.java
│ │ └── SemanticHighlighterTests.java
│ │ ├── mapper
│ │ └── SemanticFieldMapperTests.java
│ │ ├── mappingtransformer
│ │ ├── SemanticInfoConfigBuilderTests.java
│ │ └── SemanticMappingTransformerTests.java
│ │ ├── ml
│ │ └── MLCommonsClientAccessorTests.java
│ │ ├── plugin
│ │ └── NeuralSearchTests.java
│ │ ├── processor
│ │ ├── AbstractScoreHybridizationProcessorTests.java
│ │ ├── CompoundTopDocsTests.java
│ │ ├── ExplanationResponseProcessorTests.java
│ │ ├── InferenceProcessorTestCase.java
│ │ ├── InferenceProcessorTests.java
│ │ ├── NeuralQueryEnricherProcessorIT.java
│ │ ├── NeuralQueryEnricherProcessorTests.java
│ │ ├── NeuralSparseTwoPhaseProcessorIT.java
│ │ ├── NeuralSparseTwoPhaseProcessorTests.java
│ │ ├── NormalizationProcessorIT.java
│ │ ├── NormalizationProcessorTests.java
│ │ ├── NormalizationProcessorWorkflowTests.java
│ │ ├── RRFProcessorIT.java
│ │ ├── RRFProcessorTests.java
│ │ ├── ScoreCombinationIT.java
│ │ ├── ScoreCombinationTechniqueTests.java
│ │ ├── ScoreNormalizationIT.java
│ │ ├── ScoreNormalizationTechniqueTests.java
│ │ ├── SparseEncodingProcessIT.java
│ │ ├── SparseEncodingProcessorTests.java
│ │ ├── TextChunkingProcessorIT.java
│ │ ├── TextChunkingProcessorTests.java
│ │ ├── TextEmbeddingProcessorIT.java
│ │ ├── TextEmbeddingProcessorTests.java
│ │ ├── TextImageEmbeddingProcessorIT.java
│ │ ├── TextImageEmbeddingProcessorTests.java
│ │ ├── chunker
│ │ │ ├── ChunkerFactoryTests.java
│ │ │ ├── ChunkerParameterParserTests.java
│ │ │ ├── DelimiterChunkerTests.java
│ │ │ └── FixedTokenLengthChunkerTests.java
│ │ ├── combination
│ │ │ ├── ArithmeticMeanScoreCombinationTechniqueTests.java
│ │ │ ├── BaseScoreCombinationTechniqueTests.java
│ │ │ ├── GeometricMeanScoreCombinationTechniqueTests.java
│ │ │ ├── HarmonicMeanScoreCombinationTechniqueTests.java
│ │ │ ├── RRFScoreCombinationTechniqueTests.java
│ │ │ ├── ScoreCombinationFactoryTests.java
│ │ │ └── ScoreNormalizationUtilTests.java
│ │ ├── dto
│ │ │ └── SemanticFieldInfoTests.java
│ │ ├── explain
│ │ │ └── ExplanationUtilsTests.java
│ │ ├── factory
│ │ │ ├── ExplanationResponseProcessorFactoryTests.java
│ │ │ ├── NormalizationProcessorFactoryTests.java
│ │ │ ├── RRFProcessorFactoryTests.java
│ │ │ ├── RerankProcessorFactoryTests.java
│ │ │ ├── SemanticFieldProcessorFactoryTests.java
│ │ │ ├── SparseEncodingEmbeddingProcessorFactoryTests.java
│ │ │ ├── TextChunkingProcessorFactoryTests.java
│ │ │ └── TextImageEmbeddingProcessorFactoryTests.java
│ │ ├── normalization
│ │ │ ├── L2ScoreNormalizationTechniqueTests.java
│ │ │ ├── MinMaxScoreNormalizationTechniqueTests.java
│ │ │ ├── RRFNormalizationTechniqueTests.java
│ │ │ ├── ScoreNormalizationFactoryTests.java
│ │ │ ├── ScoreNormalizationUtilTests.java
│ │ │ └── ZScoreNormalizationTechniqueTests.java
│ │ ├── optimization
│ │ │ ├── TextEmbeddingInferenceFilterTests.java
│ │ │ └── TextImageEmbeddingInferenceFilterTests.java
│ │ ├── rerank
│ │ │ ├── ByFieldRerankProcessorIT.java
│ │ │ ├── ByFieldRerankProcessorTests.java
│ │ │ ├── MLOpenSearchRerankProcessorIT.java
│ │ │ └── MLOpenSearchRerankProcessorTests.java
│ │ ├── semantic
│ │ │ └── SemanticFieldProcessorTests.java
│ │ └── util
│ │ │ └── ChunkUtilsTests.java
│ │ ├── query
│ │ ├── HybridBulkScorerTests.java
│ │ ├── HybridQueryAggregationsIT.java
│ │ ├── HybridQueryBuilderTests.java
│ │ ├── HybridQueryDocIdStreamTests.java
│ │ ├── HybridQueryExplainIT.java
│ │ ├── HybridQueryFilterIT.java
│ │ ├── HybridQueryIT.java
│ │ ├── HybridQueryInnerHitsIT.java
│ │ ├── HybridQueryPostFilterIT.java
│ │ ├── HybridQueryScorerTests.java
│ │ ├── HybridQuerySortIT.java
│ │ ├── HybridQueryTests.java
│ │ ├── HybridQueryWeightTests.java
│ │ ├── HybridScoreBlockBoundaryPropagatorTests.java
│ │ ├── HybridScorerSupplierTests.java
│ │ ├── HybridSubQueryScorerTests.java
│ │ ├── NeuralKNNQueryBuilderTests.java
│ │ ├── NeuralKNNQueryTests.java
│ │ ├── NeuralQueryBuilderBuilderTests.java
│ │ ├── NeuralQueryBuilderRewriteTests.java
│ │ ├── NeuralQueryBuilderTests.java
│ │ ├── NeuralQueryIT.java
│ │ ├── NeuralSparseQueryBuilderTests.java
│ │ ├── NeuralSparseQueryIT.java
│ │ ├── NeuralSparseQueryTwoPhaseInfoTests.java
│ │ ├── OpenSearchQueryTestCase.java
│ │ ├── aggregation
│ │ │ ├── BaseAggregationsWithHybridQueryIT.java
│ │ │ ├── BucketAggregationsWithHybridQueryIT.java
│ │ │ ├── MetricAggregationsWithHybridQueryIT.java
│ │ │ └── PipelineAggregationsWithHybridQueryIT.java
│ │ ├── ext
│ │ │ └── RerankSearchExtBuilderTests.java
│ │ └── visitor
│ │ │ └── NeuralSearchQueryVisitorTests.java
│ │ ├── rest
│ │ ├── RestNeuralStatsActionIT.java
│ │ └── RestNeuralStatsActionTests.java
│ │ ├── search
│ │ ├── HitsThresholdCheckerTests.java
│ │ ├── HybridDisiWrapperTests.java
│ │ ├── collector
│ │ │ ├── HybridCollectorTestCase.java
│ │ │ ├── HybridTopFieldDocSortCollectorTests.java
│ │ │ └── HybridTopScoreDocCollectorTests.java
│ │ ├── query
│ │ │ ├── HybridAggregationProcessorTests.java
│ │ │ ├── HybridCollectorManagerTests.java
│ │ │ ├── HybridQueryPhaseSearcherTests.java
│ │ │ ├── HybridQueryScoreDocsMergerTests.java
│ │ │ └── TopDocsMergerTests.java
│ │ └── util
│ │ │ └── HybridSearchResultFormatUtilTests.java
│ │ ├── stats
│ │ ├── NeuralStatsInputTests.java
│ │ ├── events
│ │ │ ├── EventStatNameTests.java
│ │ │ ├── EventStatsManagerTests.java
│ │ │ ├── TimestampedEventStatSnapshotTests.java
│ │ │ └── TimestampedEventStatTests.java
│ │ └── info
│ │ │ ├── CountableInfoStatSnapshotTests.java
│ │ │ ├── InfoStatNameTests.java
│ │ │ ├── InfoStatsManagerTests.java
│ │ │ └── SettableInfoStatSnapshotTests.java
│ │ ├── transport
│ │ ├── NeuralStatsResponseTests.java
│ │ └── NeuralStatsTransportActionTests.java
│ │ └── util
│ │ ├── HybridQueryUtilTests.java
│ │ ├── NeuralSearchClusterUtilTests.java
│ │ ├── PipelineServiceUtilTests.java
│ │ ├── ProcessorDocumentUtilsTests.java
│ │ ├── ProcessorUtilsTests.java
│ │ ├── SemanticFieldMapperTestUtil.java
│ │ ├── SemanticMLModelUtilsTests.java
│ │ ├── SemanticMappingUtilsTests.java
│ │ ├── TokenWeightUtilTests.java
│ │ └── prune
│ │ ├── PruneTypeTests.java
│ │ └── PruneUtilsTests.java
└── resources
│ ├── highlight
│ └── UploadSentenceHighlightingModelRequestBody.json
│ ├── mapper
│ └── mappingWithNestedSemanticFields.json
│ ├── mappingtransformer
│ └── transformedMappingMultipleSemanticFields.json
│ ├── processor
│ ├── CreateModelGroupRequestBody.json
│ ├── IndexMappings.json
│ ├── NeuralSparseTwoPhaseAndNeuralEnrichProcessorConfiguration.json
│ ├── NeuralSparseTwoPhaseProcessorConfiguration.json
│ ├── PipelineConfiguration.json
│ ├── PipelineConfigurationWithBatchSize.json
│ ├── PipelineConfigurationWithBatchSizeWithSkipExisting.json
│ ├── PipelineConfigurationWithNestedFieldsMapping.json
│ ├── PipelineConfigurationWithNestedFieldsMappingWithSkipExisting.json
│ ├── PipelineConfigurationWithSkipExisting.json
│ ├── PipelineForTextImageEmbeddingProcessorConfiguration.json
│ ├── PipelineForTextImageEmbeddingWithSkipExistingProcessorConfiguration.json
│ ├── ReRankByFieldPipelineConfiguration.json
│ ├── RerankMLOpenSearchPipelineConfiguration.json
│ ├── SearchRequestPipelineConfiguration.json
│ ├── SparseEncodingIndexMappings.json
│ ├── SparseEncodingPipelineConfiguration.json
│ ├── SparseEncodingPipelineConfigurationWithPrune.json
│ ├── SparseEncodingPipelineConfigurationWithSkipExisting.json
│ ├── UploadModelRequestBody.json
│ ├── UploadSparseEncodingModelRequestBody.json
│ ├── UploadTextSimilarityModelRequestBody.json
│ ├── bulk_item_template.json
│ ├── chunker
│ │ ├── PipelineForCascadedChunker.json
│ │ ├── PipelineForDelimiterChunker.json
│ │ ├── PipelineForFixedTokenLengthChunkerWithLetterTokenizer.json
│ │ ├── PipelineForFixedTokenLengthChunkerWithLowercaseTokenizer.json
│ │ ├── PipelineForFixedTokenLengthChunkerWithStandardTokenizer.json
│ │ ├── TextChunkingIndexSettings.json
│ │ ├── TextChunkingTestDocument.json
│ │ └── TextChunkingTestLongDocument.json
│ ├── ingest_bulk.json
│ ├── ingest_doc1.json
│ ├── ingest_doc2.json
│ ├── ingest_doc3.json
│ ├── ingest_doc4.json
│ ├── ingest_doc5.json
│ ├── semantic
│ │ ├── ingest_doc1.json
│ │ ├── ingest_doc2.json
│ │ ├── ingest_doc3.json
│ │ ├── ingested_doc1.json
│ │ ├── ingested_doc2.json
│ │ ├── ingested_doc3.json
│ │ └── invalid_ingest_doc.json
│ ├── update_doc1.json
│ ├── update_doc2.json
│ ├── update_doc3.json
│ ├── update_doc4.json
│ └── update_doc5.json
│ └── util
│ └── ProcessorDocumentUtils.json
└── testFixtures
└── java
└── org
└── opensearch
└── neuralsearch
├── BaseNeuralSearchIT.java
├── OpenSearchSecureRestTestCase.java
└── util
├── AggregationsTestUtils.java
├── BatchIngestionUtils.java
├── NeuralSearchClusterTestUtils.java
└── TestUtils.java
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # This should match the owning team set up in https://github.com/orgs/opensearch-project/teams
2 | * @heemin32 @navneet1v @VijayanB @vamshin @jmazanec15 @naveentatikonda @junqiu-lei @martin-gaievski @sean-zheng-amazon @model-collapse @zane-neo @vibrantvarun @zhichao-aws @yuye-aws @minalsha
3 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ### Description
2 | [Describe what this change achieves]
3 |
4 | ### Related Issues
5 | Resolves #[Issue number to be closed when this PR is merged]
6 |
7 |
8 | ### Check List
9 | - [ ] New functionality includes testing.
10 | - [ ] New functionality has been documented.
11 | - [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md).
12 | - [ ] Commits are signed per the DCO using `--signoff`.
13 | - [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose).
14 |
15 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
16 | For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/neural-search/blob/main/CONTRIBUTING.md#developer-certificate-of-origin).
17 |
--------------------------------------------------------------------------------
/.github/draft-release-notes-config.yml:
--------------------------------------------------------------------------------
1 | # The overall template of the release notes
2 | template: |
3 | Compatible with OpenSearch (**set version here**).
4 | $CHANGES
5 |
6 | # Setting the formatting and sorting for the release notes body
7 | name-template: Version (set version here)
8 | change-template: '* $TITLE (#$NUMBER)'
9 | sort-by: merged_at
10 | sort-direction: ascending
11 | replacers:
12 | - search: '##'
13 | replace: '###'
14 |
15 | # Organizing the tagged PRs into categories
16 | categories:
17 | - title: 'Breaking Changes'
18 | labels:
19 | - 'Breaking Changes'
20 | - title: 'Features'
21 | labels:
22 | - 'Features'
23 | - title: 'Enhancements'
24 | labels:
25 | - 'Enhancements'
26 | - title: 'Bug Fixes'
27 | labels:
28 | - 'Bug Fixes'
29 | - title: 'Infrastructure'
30 | labels:
31 | - 'Infrastructure'
32 | - title: 'Documentation'
33 | labels:
34 | - 'Documentation'
35 | - title: 'Maintenance'
36 | labels:
37 | - 'Maintenance'
38 | - title: 'Refactoring'
39 | labels:
40 | - 'Refactoring'
41 |
--------------------------------------------------------------------------------
/.github/workflows/add-untriaged.yml:
--------------------------------------------------------------------------------
1 | name: Apply 'untriaged' label during issue lifecycle
2 |
3 | on:
4 | issues:
5 | types: [opened, reopened, transferred]
6 |
7 | jobs:
8 | apply-label:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/github-script@v6
12 | with:
13 | script: |
14 | github.rest.issues.addLabels({
15 | issue_number: context.issue.number,
16 | owner: context.repo.owner,
17 | repo: context.repo.repo,
18 | labels: ['untriaged']
19 | })
20 |
--------------------------------------------------------------------------------
/.github/workflows/auto-release.yml:
--------------------------------------------------------------------------------
1 | name: Releases
2 |
3 | on:
4 | push:
5 | tags:
6 | - '*'
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 | permissions:
12 | contents: write
13 | steps:
14 | - name: GitHub App token
15 | id: github_app_token
16 | uses: tibdex/github-app-token@v1.5.0
17 | with:
18 | app_id: ${{ secrets.APP_ID }}
19 | private_key: ${{ secrets.APP_PRIVATE_KEY }}
20 | installation_id: 22958780
21 | - name: Get tag
22 | id: tag
23 | uses: dawidd6/action-get-tag@v1
24 | - uses: actions/checkout@v2
25 | - uses: ncipollo/release-action@v1
26 | with:
27 | github_token: ${{ steps.github_app_token.outputs.token }}
28 | bodyFile: release-notes/opensearch-neural-search.release-notes-${{steps.tag.outputs.tag}}.md
29 |
--------------------------------------------------------------------------------
/.github/workflows/backport.yml:
--------------------------------------------------------------------------------
1 | name: Backport
2 | on:
3 | pull_request_target:
4 | types:
5 | - closed
6 | - labeled
7 |
8 | jobs:
9 | backport:
10 | runs-on: ubuntu-latest
11 | permissions:
12 | contents: write
13 | pull-requests: write
14 | name: Backport
15 | steps:
16 | - name: GitHub App token
17 | id: github_app_token
18 | uses: tibdex/github-app-token@v1.5.0
19 | with:
20 | app_id: ${{ secrets.APP_ID }}
21 | private_key: ${{ secrets.APP_PRIVATE_KEY }}
22 | installation_id: 22958780
23 |
24 | - name: Backport
25 | uses: VachaShah/backport@v1.1.4
26 | with:
27 | github_token: ${{ steps.github_app_token.outputs.token }}
28 | branch_name: backport/backport-${{ github.event.number }}
29 |
--------------------------------------------------------------------------------
/.github/workflows/backwards_compatibility_tests_workflow.yml:
--------------------------------------------------------------------------------
1 | name: Backwards Compatibility Tests NeuralSearch
2 | on:
3 | push:
4 | branches:
5 | - "*"
6 | - "feature/**"
7 | pull_request:
8 | branches:
9 | - "*"
10 | - "feature/**"
11 |
12 | jobs:
13 | Restart-Upgrade-BWCTests-NeuralSearch:
14 | strategy:
15 | matrix:
16 | java: [ 21, 23 ]
17 | os: [ubuntu-latest]
18 | bwc_version : [ "2.9.0","2.10.0","2.11.0","2.12.0","2.13.0","2.14.0","2.15.0","2.16.0","2.17.0","2.18.0","2.19.0","2.20.0-SNAPSHOT","3.0.0" ]
19 | opensearch_version : [ "3.1.0-SNAPSHOT" ]
20 |
21 | name: NeuralSearch Restart-Upgrade BWC Tests
22 | runs-on: ${{ matrix.os }}
23 | env:
24 | BWC_VERSION_RESTART_UPGRADE: ${{ matrix.bwc_version }}
25 |
26 | steps:
27 | - name: Checkout neural-search
28 | uses: actions/checkout@v1
29 |
30 | - name: Setup Java ${{ matrix.java }}
31 | uses: actions/setup-java@v1
32 | with:
33 | java-version: ${{ matrix.java }}
34 |
35 | - name: Run NeuralSearch Restart-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}}
36 | run: |
37 | echo "Running restart-upgrade backwards compatibility tests ..."
38 | ./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}'
39 |
40 | Rolling-Upgrade-BWCTests-NeuralSearch:
41 | strategy:
42 | matrix:
43 | java: [21, 23]
44 | os: [ubuntu-latest]
45 | bwc_version: [ "2.20.0-SNAPSHOT","3.0.0" ]
46 | opensearch_version: [ "3.1.0-SNAPSHOT" ]
47 |
48 | name: NeuralSearch Rolling-Upgrade BWC Tests
49 | runs-on: ${{ matrix.os }}
50 | env:
51 | BWC_VERSION_ROLLING_UPGRADE: ${{ matrix.bwc_version }}
52 |
53 | steps:
54 | - name: Checkout neural-search
55 | uses: actions/checkout@v1
56 |
57 | - name: Setup Java ${{ matrix.java }}
58 | uses: actions/setup-java@v1
59 | with:
60 | java-version: ${{ matrix.java }}
61 |
62 | - name: Run NeuralSearch Rolling-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}}
63 | run: |
64 | echo "Running rolling-upgrade backwards compatibility tests ..."
65 | ./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}'
66 |
--------------------------------------------------------------------------------
/.github/workflows/changelog_verifier.yml:
--------------------------------------------------------------------------------
1 | name: "Changelog Verifier"
2 | on:
3 | pull_request:
4 | types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
5 |
6 | jobs:
7 | # Enforces the update of a changelog file on every pull request
8 | verify-changelog:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - uses: actions/checkout@v3
12 | with:
13 | token: ${{ secrets.GITHUB_TOKEN }}
14 | ref: ${{ github.event.pull_request.head.sha }}
15 |
16 | - uses: dangoslen/changelog-enforcer@v3
17 | with:
18 | skipLabels: "autocut, skip-changelog"
19 |
--------------------------------------------------------------------------------
/.github/workflows/check-workflow-events.yml:
--------------------------------------------------------------------------------
1 | name: Check Workflow Events
2 | on:
3 | pull_request:
4 |
5 | jobs:
6 | check-workflow-events:
7 | runs-on: ubuntu-latest
8 | name: Check Workflow Events
9 | steps:
10 | - uses: actions/checkout@v4
11 | - name: Check Workflow Events
12 | working-directory: .github/workflows
13 | run: |
14 | set +e
15 | EVENT_COUNT=0
16 | for file_found in `ls | grep .ym`; do
17 | yq -r e '.on | keys | .[0]' $file_found | grep -q pull_request_target
18 | EVENT_FOUND=$?
19 |
20 | if [ "$EVENT_FOUND" = 0 ] && [ "$file_found" != "backport.yml" ] && [ "$file_found" != "copy-linked-issue-labels.yml" ]; then
21 | EVENT_COUNT=$(( EVENT_COUNT+1 ))
22 | echo "'$file_found' workflow file contains 'pull_request_target' event, please remove!"
23 | fi
24 | done
25 |
26 | if [ "$EVENT_COUNT" != 0 ]; then
27 | exit 1
28 | fi
29 |
--------------------------------------------------------------------------------
/.github/workflows/copy-linked-issue-labels.yml:
--------------------------------------------------------------------------------
1 | name: Copy labels from linked issues
2 | on:
3 | pull_request_target:
4 | types: [opened, edited, review_requested, synchronize, reopened, ready_for_review]
5 |
6 | jobs:
7 | copy-issue-labels:
8 | if: github.repository == 'opensearch-project/neural-search'
9 | runs-on: ubuntu-latest
10 | permissions:
11 | issues: read
12 | contents: read
13 | pull-requests: write
14 | steps:
15 | - name: copy-issue-labels
16 | uses: michalvankodev/copy-issue-labels@v1.3.0
17 | with:
18 | repo-token: ${{ secrets.GITHUB_TOKEN }}
19 | labels-to-exclude: |
20 | untriaged
21 | triaged
22 |
--------------------------------------------------------------------------------
/.github/workflows/delete_backport_branch.yml:
--------------------------------------------------------------------------------
1 | name: Delete merged branch of the backport PRs
2 | on:
3 | pull_request:
4 | types:
5 | - closed
6 |
7 | jobs:
8 | delete-branch:
9 | runs-on: ubuntu-latest
10 | permissions:
11 | contents: write
12 | if: github.repository == 'opensearch-project/neural-search' && startsWith(github.event.pull_request.head.ref,'backport/')
13 | steps:
14 | - name: Delete merged branch
15 | uses: actions/github-script@v7
16 | with:
17 | script: |
18 | github.rest.git.deleteRef({
19 | owner: context.repo.owner,
20 | repo: context.repo.repo,
21 | ref: `heads/${context.payload.pull_request.head.ref}`,
22 | })
23 |
--------------------------------------------------------------------------------
/.github/workflows/draft-release-notes-workflow.yml:
--------------------------------------------------------------------------------
1 | name: Release Drafter
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | update_release_draft:
10 | name: Update draft release notes
11 | runs-on: ubuntu-latest
12 | steps:
13 | - name: Update draft release notes
14 | uses: release-drafter/release-drafter@v5
15 | with:
16 | config-name: draft-release-notes-config.yml
17 | name: Version (set here)
18 | tag: (None)
19 | env:
20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 |
--------------------------------------------------------------------------------
/.github/workflows/links.yml:
--------------------------------------------------------------------------------
1 | name: Link Checker
2 | on:
3 | push:
4 | branches: [ main ]
5 | pull_request:
6 | branches: [ main ]
7 |
8 | jobs:
9 | linkchecker:
10 |
11 | runs-on: ubuntu-latest
12 |
13 | steps:
14 | - uses: actions/checkout@v2
15 | - name: lychee Link Checker
16 | id: lychee
17 | uses: lycheeverse/lychee-action@master
18 | with:
19 | args: --accept=200,403,429 **/*.html **/*.md **/*.txt **/*.json
20 | env:
21 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
22 | - name: Fail if there were link errors
23 | run: exit ${{ steps.lychee.outputs.exit_code }}
24 |
--------------------------------------------------------------------------------
/.github/workflows/maven-publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish snapshots to maven
2 |
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches:
7 | - 'main'
8 | - '[0-9]+.[0-9]+'
9 | - '[0-9]+.x'
10 |
11 | jobs:
12 | build-and-publish-snapshots:
13 | runs-on: ubuntu-latest
14 |
15 | permissions:
16 | id-token: write
17 | contents: write
18 |
19 | steps:
20 | - uses: actions/setup-java@v3
21 | with:
22 | distribution: temurin # Temurin is a distribution of adoptium
23 | java-version: 21
24 | - uses: actions/checkout@v3
25 | - uses: aws-actions/configure-aws-credentials@v1
26 | with:
27 | role-to-assume: ${{ secrets.PUBLISH_SNAPSHOTS_ROLE }}
28 | aws-region: us-east-1
29 | - name: publish snapshots to maven
30 | run: |
31 | export SONATYPE_USERNAME=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-username --query SecretString --output text)
32 | export SONATYPE_PASSWORD=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-password --query SecretString --output text)
33 | echo "::add-mask::$SONATYPE_USERNAME"
34 | echo "::add-mask::$SONATYPE_PASSWORD"
35 | ./gradlew publishPluginZipPublicationToSnapshotsRepository
36 |
--------------------------------------------------------------------------------
/.github/workflows/test_aggregations.yml:
--------------------------------------------------------------------------------
1 | name: Run Additional Tests for Neural Search
2 | on:
3 | schedule:
4 | - cron: '0 0 * * *' # every night
5 | push:
6 | branches:
7 | - "*"
8 | - "feature/**"
9 | pull_request:
10 | branches:
11 | - "*"
12 | - "feature/**"
13 | jobs:
14 | Get-CI-Image-Tag:
15 | uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main
16 | with:
17 | product: opensearch
18 |
19 | Check-neural-search-linux:
20 | needs: Get-CI-Image-Tag
21 | strategy:
22 | matrix:
23 | java: [21, 23]
24 | os: [ubuntu-latest]
25 |
26 | name: Integ Tests Linux
27 | runs-on: ${{ matrix.os }}
28 | container:
29 | # using the same image which is used by opensearch-build team to build the OpenSearch Distribution
30 | # this image tag is subject to change as more dependencies and updates will arrive over time
31 | image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }}
32 | # need to switch to root so that github actions can install runner binary on container without permission issues.
33 | options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }}
34 |
35 |
36 | steps:
37 | - name: Run start commands
38 | run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }}
39 |
40 | - name: Checkout neural-search
41 | uses: actions/checkout@v4
42 |
43 | - name: Setup Java ${{ matrix.java }}
44 | uses: actions/setup-java@v4
45 | with:
46 | distribution: 'temurin'
47 | java-version: ${{ matrix.java }}
48 |
49 | - name: Run tests
50 | run: |
51 | chown -R 1000:1000 `pwd`
52 | su `id -un 1000` -c "./gradlew ':integTest' -Dtest_aggs=true --tests \"org.opensearch.neuralsearch.query.aggregation.*IT\""
53 |
54 | Check-neural-search-windows:
55 | strategy:
56 | matrix:
57 | java: [23]
58 | os: [windows-latest]
59 |
60 | name: Integ Tests Windows
61 | runs-on: ${{ matrix.os }}
62 |
63 | steps:
64 | - name: Checkout neural-search
65 | uses: actions/checkout@v4
66 |
67 | - name: Setup Java ${{ matrix.java }}
68 | uses: actions/setup-java@v4
69 | with:
70 | distribution: 'temurin'
71 | java-version: ${{ matrix.java }}
72 |
73 | - name: Run tests
74 | run: |
75 | ./gradlew ':integTest' -Dtest_aggs=true --tests "org.opensearch.neuralsearch.query.aggregation.*IT"
76 |
--------------------------------------------------------------------------------
/.github/workflows/test_security.yml:
--------------------------------------------------------------------------------
1 | name: Test neural-search on Secure Cluster
2 | on:
3 | schedule:
4 | - cron: '0 0 * * *' # every night
5 | push:
6 | branches:
7 | - "*"
8 | - "feature/**"
9 | pull_request:
10 | branches:
11 | - "*"
12 | - "feature/**"
13 |
14 | jobs:
15 | Get-CI-Image-Tag:
16 | uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main
17 | with:
18 | product: opensearch
19 |
20 | integ-test-with-security-linux:
21 | strategy:
22 | matrix:
23 | java: [21, 23]
24 |
25 | name: Run Integration Tests on Linux
26 | runs-on: ubuntu-latest
27 | needs: Get-CI-Image-Tag
28 | container:
29 | # using the same image which is used by opensearch-build team to build the OpenSearch Distribution
30 | # this image tag is subject to change as more dependencies and updates will arrive over time
31 | image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }}
32 | # need to switch to root so that github actions can install runner binary on container without permission issues.
33 | options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }}
34 |
35 | steps:
36 | - name: Run start commands
37 | run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }}
38 |
39 | - name: Checkout neural-search
40 | uses: actions/checkout@v4
41 | with:
42 | submodules: true
43 |
44 | - name: Setup Java ${{ matrix.java }}
45 | uses: actions/setup-java@v4
46 | with:
47 | distribution: 'temurin'
48 | java-version: ${{ matrix.java }}
49 |
50 | - name: Run tests
51 | # switching the user, as OpenSearch cluster can only be started as root/Administrator on linux-deb/linux-rpm/windows-zip.
52 | run: |
53 | chown -R 1000:1000 `pwd`
54 | su `id -un 1000` -c "whoami && java -version && ./gradlew integTest -Dsecurity.enabled=true"
55 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # intellij files
2 | .idea/
3 | *.iml
4 | *.ipr
5 | *.iws
6 | *.log
7 | build-idea/
8 | out/
9 |
10 | # eclipse files
11 | .classpath
12 | .project
13 | .settings
14 |
15 | # gradle stuff
16 | .gradle/
17 | build/
18 | bin/
19 |
20 | # vscode stuff
21 | .vscode/
22 |
23 | # osx stuff
24 | .DS_Store
25 |
26 | # git stuff
27 | .gitattributes
28 |
--------------------------------------------------------------------------------
/.idea/copyright/SPDX_ALv2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/DebugNeuralSearch.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/Run_Neural_Search.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
11 |
16 |
17 |
18 | true
19 | true
20 | false
21 |
22 |
23 |
--------------------------------------------------------------------------------
/.idea/runConfigurations/Run_With_Debug_Port.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | true
20 | true
21 | false
22 |
23 |
24 |
--------------------------------------------------------------------------------
/.whitesource:
--------------------------------------------------------------------------------
1 | {
2 | "scanSettings": {
3 | "configMode": "AUTO",
4 | "configExternalURL": "",
5 | "projectToken": "",
6 | "baseBranches": []
7 | },
8 | "checkRunSettings": {
9 | "vulnerableCheckRunConclusionLevel": "failure",
10 | "displayMode": "diff",
11 | "useMendCheckNames": true
12 | },
13 | "issueSettings": {
14 | "minSeverityLevel": "LOW",
15 | "issueType": "DEPENDENCY"
16 | },
17 | "remediateSettings": {
18 | "workflowRules": {
19 | "enabled": true
20 | }
21 | }
22 | }
--------------------------------------------------------------------------------
/ADMINS.md:
--------------------------------------------------------------------------------
1 | ## Overview
2 |
3 | This document explains who the admins are (see below), what they do in this repo, and how they should be doing it. If you're interested in becoming a maintainer, see [MAINTAINERS](MAINTAINERS.md). If you're interested in contributing, see [CONTRIBUTING](CONTRIBUTING.md).
4 |
5 | ## Current Admins
6 |
7 | | Admin | GitHub ID | Affiliation |
8 | | --------------- | --------------------------------------- | ----------- |
9 | | Charlotte | [CEHENKLE](https://github.com/CEHENKLE) | Amazon |
10 |
11 | ## Admin Responsibilities
12 |
13 | As an admin you own stewartship of the repository and its settings. Admins have [admin-level permissions on a repository](https://docs.github.com/en/organizations/managing-access-to-your-organizations-repositories/repository-permission-levels-for-an-organization). Use those privileges to serve the community and protect the repository as follows.
14 |
15 | ### Prioritize Security
16 |
17 | Security is your number one priority. Manage security keys and safeguard access to the repository.
18 |
19 | Note that this repository is monitored and supported 24/7 by Amazon Security, see [Reporting a Vulnerability](SECURITY.md) for details.
20 |
21 | ### Enforce Code of Conduct
22 |
23 | Act on [CODE_OF_CONDUCT](CODE_OF_CONDUCT.md) violations by revoking access, and blocking malicious actors.
24 |
25 | ### Adopt Organizational Best Practices
26 |
27 | Adopt organizational best practices, work in the open, and collaborate with other admins by opening issues before making process changes. Prefer consistency, and avoid diverging from practices in the opensearch-project organization.
28 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | [](https://github.com/opensearch-project/neural-search/actions/workflows/CI.yml)
4 | [](https://codecov.io/gh/opensearch-project/neural-search)
5 | [](https://opensearch.org/docs/latest/search-plugins/neural-search/)
6 | [](https://forum.opensearch.org)
7 | 
8 |
9 | ## OpenSearch Neural Search
10 | **OpenSearch Neural Search** is an OpenSearch plugin that adds dense neural retrieval into the OpenSearch ecosystem.
11 | The plugin provides the capability for indexing documents and doing neural search on the indexed documents.
12 |
13 | ## Project Resources
14 |
15 | * [Project Website](https://opensearch.org/)
16 | * [Downloads](https://opensearch.org/downloads.html).
17 | * [Documentation](https://opensearch.org/docs/)
18 | * Need help? Try [Forums](https://discuss.opendistrocommunity.dev/)
19 | * [Project Principles](https://opensearch.org/#principles)
20 | * [Contributing to OpenSearch](CONTRIBUTING.md)
21 | * [Maintainer Responsibilities](MAINTAINERS.md)
22 | * [Release Management](RELEASING.md)
23 | * [Admin Responsibilities](ADMINS.md)
24 | * [Security](SECURITY.md)
25 | * [Code of Conduct](#code-of-conduct)
26 | * [License](#license)
27 | * [Copyright](#copyright)
28 |
29 | ## Code of Conduct
30 |
31 | This project has adopted the [Amazon Open Source Code of Conduct](CODE_OF_CONDUCT.md). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq), or contact [opensource-codeofconduct@amazon.com](mailto:opensource-codeofconduct@amazon.com) with any additional questions or comments.
32 |
33 | ## License
34 |
35 | This project is licensed under the [Apache v2.0 License](LICENSE).
36 |
37 | ## Copyright
38 |
39 | Copyright OpenSearch Contributors. See [NOTICE](NOTICE) for details.
40 |
--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
1 | - [Overview](#overview)
2 | - [Branching](#branching)
3 | - [Release Branching](#release-branching)
4 | - [Feature Branches](#feature-branches)
5 | - [Release Labels](#release-labels)
6 | - [Releasing](#releasing)
7 |
8 | ## Overview
9 |
10 | This document explains the release strategy for artifacts in this organization.
11 |
12 | ## Branching
13 |
14 | ### Release Branching
15 |
16 | Given the current major release of 1.0, projects in this organization maintain the following active branches.
17 |
18 | * **main**: The next _major_ release. This is the branch where all merges take place and code moves fast.
19 | * **1.x**: The next _minor_ release. Once a change is merged into `main`, decide whether to backport it to `1.x`.
20 | * **1.0**: The _current_ release. In between minor releases, only hotfixes (e.g. security) are backported to `1.0`.
21 |
22 | Label PRs with the next major version label (e.g. `2.0.0`) and merge changes into `main`. Label PRs that you believe need to be backported as `1.x` and `1.0`. Backport PRs by checking out the versioned branch, cherry-pick changes and open a PR against each target backport branch.
23 |
24 | ### Feature Branches
25 |
26 | Do not creating branches in the upstream repo, use your fork, for the exception of long lasting feature branches that require active collaboration from multiple developers. Name feature branches `feature/`. Once the work is merged to `main`, please make sure to delete the feature branch.
27 |
28 | ## Release Labels
29 |
30 | Repositories create consistent release labels, such as `v1.0.0`, `v1.1.0` and `v2.0.0`, as well as `patch` and `backport`. Use release labels to target an issue or a PR for a given release. See [MAINTAINERS](MAINTAINERS.md#triage-open-issues) for more information on triaging issues.
31 |
32 | ## Releasing
33 |
34 | The release process is standard across repositories in this org and is run by a release manager volunteering from amongst [MAINTAINERS](MAINTAINERS.md).
35 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | ## Reporting a Vulnerability
2 |
3 | If you discover a potential security issue in this project we ask that you notify OpenSearch Security directly via email to security@opensearch.org. Please do **not** create a public GitHub issue.
4 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | ---
2 | coverage:
3 | precision: 2
4 | round: down
5 | range: '70...90'
6 | status:
7 | project:
8 | default:
9 | target: auto
10 | threshold: 5%
11 |
--------------------------------------------------------------------------------
/formatter/license-header.txt:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
--------------------------------------------------------------------------------
/gradle.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright OpenSearch Contributors
3 | # SPDX-License-Identifier: Apache-2.0
4 | #
5 |
6 | # The BWC version here should always be the latest opensearch version set in
7 | # https://github.com/opensearch-project/OpenSearch/blob/main/libs/core/src/main/java/org/opensearch/Version.java .
8 | # Wired compatibility of OpenSearch works like 3.x version is compatible with 2.(latest-major) version.
9 | # Therefore, to run rolling-upgrade BWC Test on local machine the BWC version here should be set 2.(latest-major).
10 | systemProp.bwc.version=3.1.0-SNAPSHOT
11 | systemProp.bwc.bundle.version=3.0.0
12 |
13 | # For fixing Spotless check with Java 17
14 | org.gradle.jvmargs=--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
15 | --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
16 | --add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
17 | --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
18 | --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
19 |
--------------------------------------------------------------------------------
/gradle/formatting.gradle:
--------------------------------------------------------------------------------
1 | allprojects {
2 | project.apply plugin: "com.diffplug.spotless"
3 | spotless {
4 | java {
5 | // Normally this isn't necessary, but we have Java sources in
6 | // non-standard places
7 | target '**/*.java'
8 |
9 | removeUnusedImports()
10 | eclipse().withP2Mirrors(Map.of("https://download.eclipse.org/", "https://mirror.umd.edu/eclipse/")).configFile rootProject.file('formatter/formatterConfig.xml')
11 | trimTrailingWhitespace()
12 | endWithNewline();
13 |
14 | custom 'Refuse wildcard imports', {
15 | // Wildcard imports can't be resolved; fail the build
16 | if (it =~ /\s+import .*\*;/) {
17 | throw new AssertionError("Do not use wildcard imports. 'spotlessApply' cannot resolve this issue.")
18 | }
19 | }
20 | }
21 | format 'misc', {
22 | target '*.md', '**/*.gradle', '**/*.json', '**/*.yaml', '**/*.yml', '**/*.svg', '**/*.properties'
23 |
24 | trimTrailingWhitespace()
25 | endWithNewline()
26 | }
27 | format("license", {
28 | licenseHeaderFile("${rootProject.file("formatter/license-header.txt")}", "package ");
29 | target("src/*/java/**/*.java","qa/*/java/**/*.java")
30 | })
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensearch-project/neural-search/a6669e4cc5f69b56e6eb00105b49e71599692a48/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright OpenSearch Contributors
3 | # SPDX-License-Identifier: Apache-2.0
4 | #
5 |
6 | distributionBase=GRADLE_USER_HOME
7 | distributionPath=wrapper/dists
8 | distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6
9 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip
10 | networkTimeout=10000
11 | validateDistributionUrl=true
12 | zipStoreBase=GRADLE_USER_HOME
13 | zipStorePath=wrapper/dists
14 |
--------------------------------------------------------------------------------
/lombok.config:
--------------------------------------------------------------------------------
1 | config.stopBubbling = true
2 | lombok.addLombokGeneratedAnnotation = true
3 | lombok.nonNull.exceptionType = JDK
4 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/ChunkingIndexSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "settings":{
3 | "default_pipeline": "%s",
4 | "number_of_shards": 3,
5 | "number_of_replicas": 1
6 | },
7 | "mappings": {
8 | "properties": {
9 | "body": {
10 | "type": "text"
11 | },
12 | "body_chunk": {
13 | "type": "text"
14 | }
15 | }
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/CreateModelGroupRequestBody.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "%s",
3 | "description": "This is a public model group"
4 | }
5 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/IndexMappingMultipleShard.json:
--------------------------------------------------------------------------------
1 | {
2 | "settings": {
3 | "index": {
4 | "knn": true,
5 | "refresh_interval": "30s",
6 | "default_pipeline": "%s"
7 | },
8 | "number_of_shards": 3,
9 | "number_of_replicas": 1
10 | },
11 | "mappings": {
12 | "properties": {
13 | "passage_embedding": {
14 | "type": "knn_vector",
15 | "dimension": 768,
16 | "method": {
17 | "name": "hnsw",
18 | "space_type": "l2",
19 | "engine": "lucene",
20 | "parameters": {
21 | "ef_construction": 128,
22 | "m": 24
23 | }
24 | }
25 | },
26 | "passage_text": {
27 | "type": "text"
28 | },
29 | "passage_image": {
30 | "type": "text"
31 | }
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/IndexMappingSingleShard.json:
--------------------------------------------------------------------------------
1 | {
2 | "settings": {
3 | "index": {
4 | "knn": true,
5 | "refresh_interval": "30s",
6 | "default_pipeline": "%s"
7 | },
8 | "number_of_shards": 1,
9 | "number_of_replicas": 0
10 | },
11 | "mappings": {
12 | "properties": {
13 | "passage_embedding": {
14 | "type": "knn_vector",
15 | "dimension": 768,
16 | "method": {
17 | "name": "hnsw",
18 | "space_type": "l2",
19 | "engine": "lucene",
20 | "parameters": {
21 | "ef_construction": 128,
22 | "m": 24
23 | }
24 | }
25 | },
26 | "passage_text": {
27 | "type": "text"
28 | },
29 | "passage_image": {
30 | "type": "text"
31 | }
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "request_processors": [
3 | {
4 | "neural_sparse_two_phase_processor": {
5 | "tag": "neural-sparse",
6 | "description": "This processor is making two-phase rescorer.",
7 | "enabled": true,
8 | "two_phase_parameter": {
9 | "prune_ratio": %f,
10 | "expansion_rate": %f,
11 | "max_window_size": %d
12 | }
13 | }
14 | }
15 | ]
16 | }
17 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/PipelineConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "description": "text embedding pipeline for hybrid",
3 | "processors": [
4 | {
5 | "text_embedding": {
6 | "model_id": "%s",
7 | "field_map": {
8 | "passage_text": "passage_embedding"
9 | }
10 | }
11 | }
12 | ]
13 | }
14 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "description": "An sparse encoding ingest pipeline",
3 | "processors": [
4 | {
5 | "sparse_encoding": {
6 | "model_id": "%s",
7 | "field_map": {
8 | "passage_text": "passage_embedding"
9 | }
10 | }
11 | }
12 | ]
13 | }
14 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "description": "An example fixed token length chunker pipeline with standard tokenizer",
3 | "processors" : [
4 | {
5 | "text_chunking": {
6 | "field_map": {
7 | "body": "body_chunk"
8 | },
9 | "algorithm": {
10 | "fixed_token_length": {
11 | "token_limit": 10,
12 | "tokenizer": "standard"
13 | }
14 | }
15 | }
16 | }
17 | ]
18 | }
19 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/PipelineForTextImageProcessorConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "description": "text image embedding pipeline",
3 | "processors": [
4 | {
5 | "text_image_embedding": {
6 | "model_id": "%s",
7 | "embedding": "passage_embedding",
8 | "field_map": {
9 | "text": "passage_text",
10 | "image": "passage_image"
11 | }
12 | }
13 | }
14 | ]
15 | }
16 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/SearchRequestPipelineConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "request_processors": [
3 | {
4 | "neural_query_enricher": {
5 | "tag": "tag1",
6 | "description": "This processor is going to restrict to publicly visible documents",
7 | "default_model_id": "%s"
8 | }
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/SparseIndexMappings.json:
--------------------------------------------------------------------------------
1 | {
2 | "settings": {
3 | "default_pipeline": "%s",
4 | "number_of_shards": 3,
5 | "number_of_replicas": 1
6 | },
7 | "mappings": {
8 | "properties": {
9 | "passage_embedding": {
10 | "type": "rank_features"
11 | },
12 | "passage_text": {
13 | "type": "text"
14 | }
15 | }
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/UploadModelRequestBody.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "traced_small_model",
3 | "version": "1.0.0",
4 | "model_format": "TORCH_SCRIPT",
5 | "function_name": "TEXT_EMBEDDING",
6 | "model_task_type": "text_embedding",
7 | "model_content_hash_value": "e13b74006290a9d0f58c1376f9629d4ebc05a0f9385f40db837452b167ae9021",
8 | "model_group_id": "%s",
9 | "model_config": {
10 | "model_type": "bert",
11 | "embedding_dimension": 768,
12 | "framework_type": "sentence_transformers",
13 | "all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}"
14 | },
15 | "url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true"
16 | }
17 |
--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "tokenize-idf-0915",
3 | "version": "1.0.0",
4 | "function_name": "SPARSE_TOKENIZE",
5 | "description": "test model",
6 | "model_format": "TORCH_SCRIPT",
7 | "model_group_id": "%s",
8 | "model_content_hash_value": "b345e9e943b62c405a8dd227ef2c46c84c5ff0a0b71b584be9132b37bce91a9a",
9 | "url": "https://github.com/opensearch-project/ml-commons/raw/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/sparse_encoding/sparse_demo.zip"
10 | }
11 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/ChunkingIndexSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "settings":{
3 | "default_pipeline": "%s",
4 | "number_of_shards": 3,
5 | "number_of_replicas": 1
6 | },
7 | "mappings": {
8 | "properties": {
9 | "body": {
10 | "type": "text"
11 | },
12 | "body_chunk": {
13 | "type": "text"
14 | }
15 | }
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/CreateModelGroupRequestBody.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "%s",
3 | "description": "This is a public model group"
4 | }
5 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/IndexMappings.json:
--------------------------------------------------------------------------------
1 | {
2 | "settings": {
3 | "index": {
4 | "knn": true,
5 | "refresh_interval": "30s",
6 | "default_pipeline": "%s"
7 | },
8 | "number_of_shards": 3,
9 | "number_of_replicas": 1
10 | },
11 | "mappings": {
12 | "properties": {
13 | "passage_embedding": {
14 | "type": "knn_vector",
15 | "dimension": 768,
16 | "method": {
17 | "name": "hnsw",
18 | "space_type": "l2",
19 | "engine": "lucene",
20 | "parameters": {
21 | "ef_construction": 128,
22 | "m": 24
23 | }
24 | }
25 | },
26 | "passage_text": {
27 | "type": "text"
28 | },
29 | "passage_image": {
30 | "type": "text"
31 | }
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "request_processors": [
3 | {
4 | "neural_sparse_two_phase_processor": {
5 | "tag": "neural-sparse",
6 | "description": "This processor is making two-phase rescorer.",
7 | "enabled": true,
8 | "two_phase_parameter": {
9 | "prune_ratio": %f,
10 | "expansion_rate": %f,
11 | "max_window_size": %d
12 | }
13 | }
14 | }
15 | ]
16 | }
17 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/PipelineConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "description": "text embedding pipeline for hybrid",
3 | "processors": [
4 | {
5 | "text_embedding": {
6 | "model_id": "%s",
7 | "field_map": {
8 | "passage_text": "passage_embedding"
9 | }
10 | }
11 | }
12 | ]
13 | }
14 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "description": "An sparse encoding ingest pipeline",
3 | "processors": [
4 | {
5 | "sparse_encoding": {
6 | "model_id": "%s",
7 | "batch_size": "%d",
8 | "field_map": {
9 | "passage_text": "passage_embedding"
10 | }
11 | }
12 | }
13 | ]
14 | }
15 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "description": "An example fixed token length chunker pipeline with standard tokenizer",
3 | "processors" : [
4 | {
5 | "text_chunking": {
6 | "field_map": {
7 | "body": "body_chunk"
8 | },
9 | "algorithm": {
10 | "fixed_token_length": {
11 | "token_limit": 10,
12 | "tokenizer": "standard"
13 | }
14 | }
15 | }
16 | }
17 | ]
18 | }
19 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/PipelineForTextImageProcessorConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "description": "text image embedding pipeline",
3 | "processors": [
4 | {
5 | "text_image_embedding": {
6 | "model_id": "%s",
7 | "embedding": "passage_embedding",
8 | "field_map": {
9 | "text": "passage_text",
10 | "image": "passage_image"
11 | }
12 | }
13 | }
14 | ]
15 | }
16 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/SearchRequestPipelineConfiguration.json:
--------------------------------------------------------------------------------
1 | {
2 | "request_processors": [
3 | {
4 | "neural_query_enricher": {
5 | "tag": "tag1",
6 | "description": "This processor is going to restrict to publicly visible documents",
7 | "default_model_id": "%s"
8 | }
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/SparseIndexMappings.json:
--------------------------------------------------------------------------------
1 | {
2 | "settings": {
3 | "default_pipeline": "%s",
4 | "number_of_shards": 3,
5 | "number_of_replicas": 1
6 | },
7 | "mappings": {
8 | "properties": {
9 | "passage_embedding": {
10 | "type": "rank_features"
11 | },
12 | "passage_text": {
13 | "type": "text"
14 | }
15 | }
16 | }
17 | }
18 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/UploadModelRequestBody.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "traced_small_model",
3 | "version": "1.0.0",
4 | "model_format": "TORCH_SCRIPT",
5 | "function_name": "TEXT_EMBEDDING",
6 | "model_task_type": "text_embedding",
7 | "model_content_hash_value": "e13b74006290a9d0f58c1376f9629d4ebc05a0f9385f40db837452b167ae9021",
8 | "model_group_id": "%s",
9 | "model_config": {
10 | "model_type": "bert",
11 | "embedding_dimension": 768,
12 | "framework_type": "sentence_transformers",
13 | "all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}"
14 | },
15 | "url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true"
16 | }
17 |
--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "tokenize-idf-0915",
3 | "version": "1.0.0",
4 | "function_name": "SPARSE_TOKENIZE",
5 | "description": "test model",
6 | "model_format": "TORCH_SCRIPT",
7 | "model_group_id": "%s",
8 | "model_content_hash_value": "b345e9e943b62c405a8dd227ef2c46c84c5ff0a0b71b584be9132b37bce91a9a",
9 | "url": "https://github.com/opensearch-project/ml-commons/raw/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/sparse_encoding/sparse_demo.zip"
10 | }
11 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.10.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.10.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.10.0
4 |
5 | ### Features
6 | * Improved Hybrid Search relevancy by Score Normalization and Combination ([#241](https://github.com/opensearch-project/neural-search/pull/241/))
7 |
8 | ### Enhancements
9 | * Changed format for hybrid query results to a single list of scores with delimiter ([#259](https://github.com/opensearch-project/neural-search/pull/259))
10 | * Added validations for score combination weights in Hybrid Search ([#265](https://github.com/opensearch-project/neural-search/pull/265))
11 | * Made hybrid search active by default ([#274](https://github.com/opensearch-project/neural-search/pull/274))
12 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.11.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.11.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.11.0
4 |
5 | ### Features
6 | * Support sparse semantic retrieval by introducing `sparse_encoding` ingest processor and query builder ([#333](https://github.com/opensearch-project/neural-search/pull/333))
7 | * Enabled support for applying default modelId in neural search query ([#337](https://github.com/opensearch-project/neural-search/pull/337)
8 | ### Bug Fixes
9 | * Fixed exception in Hybrid Query for one shard and multiple node ([#396](https://github.com/opensearch-project/neural-search/pull/396))
10 | ### Maintenance
11 | * Consumed latest changes from core, use QueryPhaseSearcherWrapper as parent class for Hybrid QPS ([#356](https://github.com/opensearch-project/neural-search/pull/356))
12 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.12.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.12.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.12.0
4 |
5 | ### Features
6 | - Add rerank processor interface and ml-commons reranker ([#494](https://github.com/opensearch-project/neural-search/pull/494))
7 | ### Bug Fixes
8 | - Fixing multiple issues reported in #497 ([#524](https://github.com/opensearch-project/neural-search/pull/524))
9 | - Fix Flaky test reported in #433 ([#533](https://github.com/opensearch-project/neural-search/pull/533))
10 | - Enable support for default model id on HybridQueryBuilder ([#541](https://github.com/opensearch-project/neural-search/pull/541))
11 | - Fix Flaky test reported in #384 ([#559](https://github.com/opensearch-project/neural-search/pull/559))
12 | - Add validations for reranker requests per #555 ([#562](https://github.com/opensearch-project/neural-search/pull/562))
13 | ### Infrastructure
14 | - BWC tests for Neural Search ([#515](https://github.com/opensearch-project/neural-search/pull/515))
15 | - Github action to run integ tests in secure opensearch cluster ([#535](https://github.com/opensearch-project/neural-search/pull/535))
16 | - BWC tests for Multimodal search, Hybrid Search and Neural Sparse Search ([#533](https://github.com/opensearch-project/neural-search/pull/533))
17 | - Distribution bundle bwc tests ([#579])(https://github.com/opensearch-project/neural-search/pull/579)
18 | ### Maintenance
19 | - Update spotless and eclipse dependencies ([#589](https://github.com/opensearch-project/neural-search/pull/589))
20 | ### Refactoring
21 | - Added spotless check in the build ([#515](https://github.com/opensearch-project/neural-search/pull/515))
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.13.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.13.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.13.0
4 |
5 | ### Features
6 | - Implement document chunking processor with fixed token length and delimiter algorithm ([#607](https://github.com/opensearch-project/neural-search/pull/607/))
7 | - Enabled support for applying default modelId in neural sparse query ([#614](https://github.com/opensearch-project/neural-search/pull/614)
8 | ### Enhancements
9 | - Adding aggregations in hybrid query ([#630](https://github.com/opensearch-project/neural-search/pull/630))
10 | - Support for post filter in hybrid query ([#633](https://github.com/opensearch-project/neural-search/pull/633))
11 | ### Bug Fixes
12 | - Fix runtime exceptions in hybrid query for case when sub-query scorer return TwoPhase iterator that is incompatible with DISI iterator ([#624](https://github.com/opensearch-project/neural-search/pull/624))
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.14.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.14.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.14.0
4 |
5 | ### Features
6 | * Support k-NN radial search parameters in neural search([#697](https://github.com/opensearch-project/neural-search/pull/697))
7 | ### Enhancements
8 | * BWC tests for text chunking processor ([#661](https://github.com/opensearch-project/neural-search/pull/661))
9 | * Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
10 | * Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))
11 | * Allowing query by raw tokens in neural_sparse query ([#693](https://github.com/opensearch-project/neural-search/pull/693))
12 | * Removed stream.findFirst implementation to use more native iteration implement to improve hybrid query latencies by 35% ([#706](https://github.com/opensearch-project/neural-search/pull/706))
13 | * Removed map of subquery to subquery index in favor of storing index as part of disi wrapper to improve hybrid query latencies by 20% ([#711](https://github.com/opensearch-project/neural-search/pull/711))
14 | * Avoid change max_chunk_limit exceed exception in text chunking processor ([#717](https://github.com/opensearch-project/neural-search/pull/717))
15 | ### Bug Fixes
16 | * Fix async actions are left in neural_sparse query ([#438](https://github.com/opensearch-project/neural-search/pull/438))
17 | * Fix typo for sparse encoding processor factory([#578](https://github.com/opensearch-project/neural-search/pull/578))
18 | * Add non-null check for queryBuilder in NeuralQueryEnricherProcessor ([#615](https://github.com/opensearch-project/neural-search/pull/615))
19 | * Add max_token_score field placeholder in NeuralSparseQueryBuilder to fix the rolling-upgrade from 2.x nodes bwc tests. ([#696](https://github.com/opensearch-project/neural-search/pull/696))
20 | * Fix multi node "no such index" error in text chunking processor. ([#713](https://github.com/opensearch-project/neural-search/pull/713))
21 | ### Infrastructure
22 | * Adding integration tests for scenario of hybrid query with aggregations ([#632](https://github.com/opensearch-project/neural-search/pull/632))
23 | ### Maintenance
24 | * Update bwc tests for neural_query_enricher neural_sparse search ([#652](https://github.com/opensearch-project/neural-search/pull/652))
25 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.15.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.15.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.15.0
4 |
5 | ### Features
6 | * Speed up NeuralSparseQuery by two-phase using a custom search pipeline.([#646](https://github.com/opensearch-project/neural-search/issues/646))
7 | * Support batchExecute in TextEmbeddingProcessor and SparseEncodingProcessor ([#743](https://github.com/opensearch-project/neural-search/issues/743))
8 | ### Enhancements
9 | * Pass empty doc collector instead of top docs collector to improve hybrid query latencies by 20% ([#731](https://github.com/opensearch-project/neural-search/pull/731))
10 | * Optimize parameter parsing in text chunking processor ([#733](https://github.com/opensearch-project/neural-search/pull/733))
11 | * Use lazy initialization for priority queue of hits and scores to improve latencies by 20% ([#746](https://github.com/opensearch-project/neural-search/pull/746))
12 | * Optimize max score calculation in the Query Phase of the Hybrid Search ([765](https://github.com/opensearch-project/neural-search/pull/765))
13 | * Implement parallel execution of sub-queries for hybrid search ([#749](https://github.com/opensearch-project/neural-search/pull/749))
14 | ### Bug Fixes
15 | * Total hit count fix in Hybrid Query ([756](https://github.com/opensearch-project/neural-search/pull/756))
16 | * Fix map type validation issue in multiple pipeline processors ([#661](https://github.com/opensearch-project/neural-search/pull/661))
17 | ### Infrastructure
18 | * Disable memory circuit breaker for integ tests ([#770](https://github.com/opensearch-project/neural-search/pull/770))
19 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.16.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.16.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.16.0
4 |
5 | ### Features
6 | - Enable sorting and search_after features in Hybrid Search [#827](https://github.com/opensearch-project/neural-search/pull/827)
7 | ### Enhancements
8 | - InferenceProcessor inherits from AbstractBatchingProcessor to support sub batching in processor [#820](https://github.com/opensearch-project/neural-search/pull/820)
9 | - Adds dynamic knn query parameters efsearch and nprobes [#814](https://github.com/opensearch-project/neural-search/pull/814/)
10 | - Enable '.' for nested field in text embedding processor ([#811](https://github.com/opensearch-project/neural-search/pull/811))
11 | - Enhance syntax for nested mapping in destination fields([#841](https://github.com/opensearch-project/neural-search/pull/841))
12 | ### Bug Fixes
13 | - Fix function names and comments in the gradle file for BWC tests ([#795](https://github.com/opensearch-project/neural-search/pull/795/files))
14 | - Fix for missing HybridQuery results when concurrent segment search is enabled ([#800](https://github.com/opensearch-project/neural-search/pull/800))
15 | ### Infrastructure
16 | - Add BWC for batch ingestion ([#769](https://github.com/opensearch-project/neural-search/pull/769))
17 | - Add backward test cases for neural sparse two phase processor ([#777](https://github.com/opensearch-project/neural-search/pull/777))
18 | - Fix CI for JDK upgrade towards 21 ([#835](https://github.com/opensearch-project/neural-search/pull/835))
19 | - Maven publishing workflow by upgrade jdk to 21 ([#837](https://github.com/opensearch-project/neural-search/pull/837))
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.17.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.17.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.17.0
4 |
5 | ### Enhancements
6 | - Adds rescore parameter support ([#885](https://github.com/opensearch-project/neural-search/pull/885))
7 | ### Bug Fixes
8 | - Removing code to cut search results of hybrid search in the priority queue ([#867](https://github.com/opensearch-project/neural-search/pull/867))
9 | - Fixed merge logic in hybrid query for multiple shards case ([#877](https://github.com/opensearch-project/neural-search/pull/877))
10 | ### Infrastructure
11 | - Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852))
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.18.0.0.md:
--------------------------------------------------------------------------------
1 |
2 | ## Version 2.18.0.0 Release Notes
3 |
4 | Compatible with OpenSearch 2.18.0
5 |
6 | ### Features
7 | - Introduces ByFieldRerankProcessor for second level reranking on documents ([#932](https://github.com/opensearch-project/neural-search/pull/932))
8 | ### Bug Fixes
9 | - Fixed incorrect document order for nested aggregations in hybrid query ([#956](https://github.com/opensearch-project/neural-search/pull/956))
10 | ### Enhancements
11 | - Implement `ignore_missing` field in text chunking processors ([#907](https://github.com/opensearch-project/neural-search/pull/907))
12 | - Added rescorer in hybrid query ([#917](https://github.com/opensearch-project/neural-search/pull/917))
13 |
14 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.19.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.19.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.19.0
4 |
5 | ### Features
6 | * Pagination in Hybrid query ([#1048](https://github.com/opensearch-project/neural-search/pull/1048))
7 | * Implement Reciprocal Rank Fusion score normalization/combination technique in hybrid query ([#874](https://github.com/opensearch-project/neural-search/pull/874))
8 | ### Bug Fixes
9 | * Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
10 | * Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
11 | * Fixed document source and score field mismatch in sorted hybrid queries ([#1043](https://github.com/opensearch-project/neural-search/pull/1043))
12 | * Update NeuralQueryBuilder doEquals() and doHashCode() to cater the missing parameters information ([#1045](https://github.com/opensearch-project/neural-search/pull/1045)).
13 | * Fix bug where embedding is missing when ingested document has "." in field name, and mismatches fieldMap config ([#1062](https://github.com/opensearch-project/neural-search/pull/1062))
14 | ### Enhancements
15 | * Explainability in hybrid query ([#970](https://github.com/opensearch-project/neural-search/pull/970))
16 | * Support new knn query parameter expand_nested ([#1013](https://github.com/opensearch-project/neural-search/pull/1013))
17 | * Implement pruning for neural sparse ingestion pipeline and two phase search processor ([#988](https://github.com/opensearch-project/neural-search/pull/988))
18 | * Support empty string for fields in text embedding processor ([#1041](https://github.com/opensearch-project/neural-search/pull/1041))
19 | * Optimize ML inference connection retry logic ([#1054](https://github.com/opensearch-project/neural-search/pull/1054))
20 | * Support for builder constructor in Neural Query Builder ([#1047](https://github.com/opensearch-project/neural-search/pull/1047))
21 | * Validate Disjunction query to avoid having nested hybrid query ([#1127](https://github.com/opensearch-project/neural-search/pull/1127))
22 | ### Maintenance
23 | * Add reindex integration tests for ingest processors ([#1075](https://github.com/opensearch-project/neural-search/pull/1075))
24 | * Fix github CI by adding eclipse dependency in formatting.gradle ([#1079](https://github.com/opensearch-project/neural-search/pull/1079))
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.4.1.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.4.1.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.4.1
4 |
5 | ### Bug Fixes
6 |
7 | * Change the behavior when embedding fields are not present ([#72](https://github.com/opensearch-project/neural-search/pull/72))
8 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.5.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.5.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.5.0
4 |
5 | ### Enhancements
6 |
7 | * Add filter option for query type ([#88](https://github.com/opensearch-project/neural-search/pull/88))
8 | * Add retry mechanism for neural search inference ([#91](https://github.com/opensearch-project/neural-search/pull/91))
9 | * Enable core branching strategy and make Neural Plugin as extensible plugin. ([#87](https://github.com/opensearch-project/neural-search/pull/87))
10 |
11 | ### Documentation
12 |
13 | * Update MAINTAINERS.md format ([#95](https://github.com/opensearch-project/neural-search/pull/95))
14 | * Use short-form MAINTAINERS.md ([#84](https://github.com/opensearch-project/neural-search/pull/84))
15 |
16 | ### Refactoring
17 |
18 | * Remove unused MLPredict Transport action from src ([#94](https://github.com/opensearch-project/neural-search/pull/94))
19 |
20 | ### Maintenance
21 |
22 | * Increment version to 2.5.0-SNAPSHOT ([#76](https://github.com/opensearch-project/neural-search/pull/76))
23 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.6.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.6.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.6.0
4 |
5 | ### Maintenance
6 |
7 | * Increment version to 2.6.0-SNAPSHOT ([#117](https://github.com/opensearch-project/neural-search/pull/117))
8 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.7.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.7.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.7.0
4 |
5 | ### Infrastructure
6 |
7 | * Add GHA to publish to maven repository ([#237](https://github.com/opensearch-project/neural-search/pull/130))
8 | * Add reflection dependency ([#136](https://github.com/opensearch-project/neural-search/pull/136))
9 | * Add CHANGELOG ([#135](https://github.com/opensearch-project/neural-search/pull/135))
10 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.8.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.8.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.8.0
4 |
5 | ### Infrastructure
6 |
7 | * Bump gradle version to 8.1.1 ([#169](https://github.com/opensearch-project/neural-search/pull/169))
8 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.9.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.9.0.0 Release Notes
2 |
3 | Compatible with OpenSearch 2.9.0
4 |
5 | ### Maintenance
6 | Increment version to 2.9.0-SNAPSHOT ([#191](https://github.com/opensearch-project/neural-search/pull/191))
7 |
8 | ### Bug Fixes
9 | Fix update document with knnn_vector size not matching issue ([#208](https://github.com/opensearch-project/neural-search/pull/208))
10 |
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-3.0.0.0-alpha1.md:
--------------------------------------------------------------------------------
1 | ## Version 3.0.0.0-alpha1 Release Notes
2 |
3 | Compatible with OpenSearch 3.0.0-alpha1
4 |
5 | ### Enhancements
6 | - Set neural-search plugin 3.0.0 baseline JDK version to JDK-21 ([#838](https://github.com/opensearch-project/neural-search/pull/838))
7 | - Support different embedding types in model's response ([#1007](https://github.com/opensearch-project/neural-search/pull/1007))
8 | ### Bug Fixes
9 | - Fix a bug to unflatten the doc with list of map with multiple entries correctly ([#1204](https://github.com/opensearch-project/neural-search/pull/1204)).
10 | ### Infrastructure
11 | - [3.0] Update neural-search for OpenSearch 3.0 compatibility ([#1141](https://github.com/opensearch-project/neural-search/pull/1141))
12 | ### Refactoring
13 | - Encapsulate KNNQueryBuilder creation within NeuralKNNQueryBuilder ([#1183](https://github.com/opensearch-project/neural-search/pull/1183))
14 | ### Documentation
15 | - Adding code guidelines ([#502](https://github.com/opensearch-project/neural-search/pull/502))
--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-3.0.0.0-beta1.md:
--------------------------------------------------------------------------------
1 | ## Version 3.0.0.0-beta1 Release Notes
2 |
3 | Compatible with OpenSearch 3.0.0-beta1
4 |
5 | ### Features
6 | - Lower bound for min-max normalization technique in hybrid query ([#1195](https://github.com/opensearch-project/neural-search/pull/1195))
7 | - Support filter function for HybridQueryBuilder and NeuralQueryBuilder ([#1206](https://github.com/opensearch-project/neural-search/pull/1206))
8 | - Add Z Score normalization technique ([#1224](https://github.com/opensearch-project/neural-search/pull/1224))
9 | - Support semantic sentence highlighter ([#1193](https://github.com/opensearch-project/neural-search/pull/1193))
10 | - Optimize embedding generation in Text Embedding Processor ([#1191](https://github.com/opensearch-project/neural-search/pull/1191))
11 | - Optimize embedding generation in Sparse Encoding Processor ([#1246](https://github.com/opensearch-project/neural-search/pull/1246))
12 | - Optimize embedding generation in Text/Image Embedding Processor ([#1249](https://github.com/opensearch-project/neural-search/pull/1249))
13 | - Inner hits support with hybrid query ([#1253](https://github.com/opensearch-project/neural-search/pull/1253))
14 | - Support custom tags in semantic highlighter ([#1254](https://github.com/opensearch-project/neural-search/pull/1254))
15 | - Add stats API ([#1256](https://github.com/opensearch-project/neural-search/pull/1256))
16 |
17 | ### Bug Fixes
18 | - Remove validations for unmapped fields (text and image) in TextImageEmbeddingProcessor ([#1230](https://github.com/opensearch-project/neural-search/pull/1230))
19 |
20 | ### Infrastructure
21 | - [3.0] Update neural-search for OpenSearch 3.0 beta compatibility ([#1245](https://github.com/opensearch-project/neural-search/pull/1245))
22 |
--------------------------------------------------------------------------------
/repositories.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 |
6 | repositories {
7 | mavenLocal()
8 | maven { url "https://aws.oss.sonatype.org/content/repositories/snapshots" }
9 | mavenCentral()
10 | maven { url "https://plugins.gradle.org/m2/" }
11 | }
12 |
--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * This file was generated by the Gradle 'init' task.
3 | *
4 | * The settings file is used to specify which projects to include in your build.
5 | *
6 | * Detailed information about configuring a multi-project build in Gradle can be found
7 | * in the user manual at https://docs.gradle.org/7.5.1/userguide/multi_project_builds.html
8 | * This project uses @Incubating APIs which are subject to change.
9 | */
10 |
11 | rootProject.name = 'neural-search'
12 |
13 | include ":qa"
14 | include ":qa:rolling-upgrade"
15 | include ":qa:restart-upgrade"
16 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/common/VectorUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.common;
6 |
7 | import java.util.List;
8 |
9 | import lombok.AccessLevel;
10 | import lombok.NoArgsConstructor;
11 |
12 | /**
13 | * Utility class for working with vectors
14 | */
15 | @NoArgsConstructor(access = AccessLevel.PRIVATE)
16 | public class VectorUtil {
17 |
18 | /**
19 | * Converts a vector represented as a list to an array
20 | *
21 | * @param vectorAsList {@link List} of {@link Float}'s representing the vector
22 | * @return array of floats produced from input list
23 | */
24 | public static float[] vectorAsListToArray(List vectorAsList) {
25 | float[] vector = new float[vectorAsList.size()];
26 | for (int i = 0; i < vectorAsList.size(); i++) {
27 | vector[i] = vectorAsList.get(i).floatValue();
28 | }
29 | return vector;
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/constants/MappingConstants.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.constants;
6 |
7 | /**
8 | * Constants related to the index mapping.
9 | */
10 | public class MappingConstants {
11 | /**
12 | * Name for the field type. In index mapping we use this key to define the field type.
13 | */
14 | public static final String TYPE = "type";
15 | /**
16 | * Name for doc. Actions like create index and legacy create/update index template will have the
17 | * mapping properties under a _doc key.
18 | */
19 | public static final String DOC = "_doc";
20 | /**
21 | * Name for properties. An object field will define subfields as properties.
22 | */
23 | public static final String PROPERTIES = "properties";
24 |
25 | /**
26 | * Separator in a field path.
27 | */
28 | public static final String PATH_SEPARATOR = ".";
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/constants/SemanticFieldConstants.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.constants;
6 |
7 | /**
8 | * Constants for semantic field
9 | */
10 | public class SemanticFieldConstants {
11 | /**
12 | * Name of the model id parameter. We use this key to define the id of the ML model that we will use for the
13 | * semantic field.
14 | */
15 | public static final String MODEL_ID = "model_id";
16 |
17 | /**
18 | * Name of the search model id parameter. We use this key to define the id of the ML model that we will use to
19 | * inference the query text during the search. If this parameter is not defined we will use the model_id instead.
20 | */
21 | public static final String SEARCH_MODEL_ID = "search_model_id";
22 |
23 | /**
24 | * Name of the raw field type parameter. We use this key to define the field type for the raw data. It will control
25 | * how to store and query the raw data.
26 | */
27 | public static final String RAW_FIELD_TYPE = "raw_field_type";
28 |
29 | /**
30 | * Name of the raw field type parameter. We use this key to define a custom field name for the semantic info.
31 | */
32 | public static final String SEMANTIC_INFO_FIELD_NAME = "semantic_info_field_name";
33 |
34 | /**
35 | * Default suffix for semantic info field name. It will be used to construct the field name of the semantic info.
36 | */
37 | public static final String DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX = "_semantic_info";
38 |
39 | /**
40 | * Name of the field to control if we should do chunking for the semantic field. By default, the chunking is
41 | * disabled to not downgrade the search performance.
42 | */
43 | public static final String CHUNKING = "chunking";
44 |
45 | /**
46 | * Name of the field for search analyzer parameter. With this field set up, user has not to specify
47 | * it during query time.
48 | */
49 | public static final String SEMANTIC_FIELD_SEARCH_ANALYZER = "semantic_field_search_analyzer";
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/executors/HybridQueryExecutorCollector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.executors;
6 |
7 | import lombok.AccessLevel;
8 | import lombok.Getter;
9 | import lombok.RequiredArgsConstructor;
10 | import lombok.Synchronized;
11 |
12 | import java.util.Optional;
13 | import java.util.function.Function;
14 |
15 | /**
16 | * {@link HybridQueryExecutorCollector} is a generic Collector used by Hybrid Search Query during
17 | * Query phase to parallelize sub query's action to improve latency
18 | */
19 | @RequiredArgsConstructor(staticName = "newCollector", access = AccessLevel.PACKAGE)
20 | public final class HybridQueryExecutorCollector {
21 |
22 | // will be used as input for all instances of collector generated by newCollector method,
23 | // if it is required for collect operation
24 | private final I param;
25 |
26 | // getResult should only be called after collector's collect method is invoked.
27 | @Getter(onMethod_ = { @Synchronized })
28 | private Optional result = Optional.empty();
29 |
30 | /**
31 | * Called once for every time an action has to be performed on this Collector
32 | * @param action function that will be executed and result will be stored at result.
33 | */
34 | @Synchronized
35 | public void collect(Function action) {
36 | result = Optional.ofNullable(action.apply(param));
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/executors/HybridQueryExecutorCollectorManager.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.executors;
6 |
7 | /**
8 | * {@link HybridQueryExecutorCollectorManager} is responsible for creating new {@link HybridQueryExecutorCollector} instances
9 | */
10 | public interface HybridQueryExecutorCollectorManager {
11 | /**
12 | * Return a new Collector instance that extends {@link HybridQueryExecutor}.
13 | * This will be used during Hybrid Search when sub queries wants to execute part of
14 | * operation that is independent of each other that can be parallelized to improve
15 | * the performance.
16 | * @return HybridQueryExecutorCollector
17 | */
18 | C newCollector();
19 | }
20 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/BooleanQueryTextExtractor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.highlight.extractor;
6 |
7 | import org.apache.lucene.search.BooleanClause;
8 | import org.apache.lucene.search.BooleanQuery;
9 | import org.apache.lucene.search.Query;
10 |
11 | import lombok.extern.log4j.Log4j2;
12 |
13 | /**
14 | * Extractor for boolean queries
15 | */
16 | @Log4j2
17 | public class BooleanQueryTextExtractor implements QueryTextExtractor {
18 |
19 | private final QueryTextExtractorRegistry registry;
20 |
21 | public BooleanQueryTextExtractor(QueryTextExtractorRegistry registry) {
22 | this.registry = registry;
23 | }
24 |
25 | @Override
26 | public String extractQueryText(Query query, String fieldName) {
27 | BooleanQuery booleanQuery = toQueryType(query, BooleanQuery.class);
28 |
29 | StringBuilder sb = new StringBuilder();
30 |
31 | for (BooleanClause clause : booleanQuery.clauses()) {
32 | // Skip MUST_NOT clauses as they represent negative terms
33 | if (clause.isProhibited()) {
34 | continue;
35 | }
36 |
37 | try {
38 | String clauseText = registry.extractQueryText(clause.query(), fieldName);
39 | if (clauseText.isEmpty() == false) {
40 | if (sb.isEmpty() == false) {
41 | sb.append(" ");
42 | }
43 | sb.append(clauseText);
44 | }
45 | } catch (IllegalArgumentException e) {
46 | log.warn("Failed to extract text from clause {}: {}", clause, e.getMessage(), e);
47 | }
48 | }
49 |
50 | return sb.toString();
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/HybridQueryTextExtractor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.highlight.extractor;
6 |
7 | import org.apache.lucene.search.Query;
8 | import org.opensearch.neuralsearch.query.HybridQuery;
9 |
10 | import java.util.HashSet;
11 | import java.util.Set;
12 |
13 | /**
14 | * Extractor for hybrid queries that combines text from all sub-queries
15 | */
16 | public class HybridQueryTextExtractor implements QueryTextExtractor {
17 |
18 | private final QueryTextExtractorRegistry registry;
19 |
20 | public HybridQueryTextExtractor(QueryTextExtractorRegistry registry) {
21 | this.registry = registry;
22 | }
23 |
24 | @Override
25 | public String extractQueryText(Query query, String fieldName) {
26 | HybridQuery hybridQuery = toQueryType(query, HybridQuery.class);
27 |
28 | // Create a set to avoid duplicates
29 | Set queryTexts = new HashSet<>();
30 |
31 | // Extract text from each sub-query
32 | for (Query subQuery : hybridQuery.getSubQueries()) {
33 | String extractedText = registry.extractQueryText(subQuery, fieldName);
34 | if (extractedText != null && extractedText.isEmpty() == false) {
35 | queryTexts.add(extractedText);
36 | }
37 | }
38 |
39 | // Join with spaces
40 | return String.join(" ", queryTexts).trim();
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/NestedQueryTextExtractor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.highlight.extractor;
6 |
7 | import org.apache.lucene.search.Query;
8 | import org.opensearch.index.search.OpenSearchToParentBlockJoinQuery;
9 |
10 | public class NestedQueryTextExtractor implements QueryTextExtractor {
11 | private final QueryTextExtractorRegistry registry;
12 |
13 | public NestedQueryTextExtractor(QueryTextExtractorRegistry registry) {
14 | this.registry = registry;
15 | }
16 |
17 | @Override
18 | public String extractQueryText(Query query, String fieldName) {
19 | OpenSearchToParentBlockJoinQuery neuralQuery = toQueryType(query, OpenSearchToParentBlockJoinQuery.class);
20 | return registry.extractQueryText(neuralQuery.getChildQuery(), fieldName);
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/NeuralQueryTextExtractor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.highlight.extractor;
6 |
7 | import org.apache.lucene.search.Query;
8 | import org.opensearch.neuralsearch.query.NeuralKNNQuery;
9 |
10 | /**
11 | * Extractor for neural queries
12 | */
13 | public class NeuralQueryTextExtractor implements QueryTextExtractor {
14 |
15 | @Override
16 | public String extractQueryText(Query query, String fieldName) {
17 | NeuralKNNQuery neuralQuery = toQueryType(query, NeuralKNNQuery.class);
18 | return neuralQuery.getOriginalQueryText();
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/QueryTextExtractor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.highlight.extractor;
6 |
7 | import org.apache.lucene.search.Query;
8 |
9 | import java.util.Locale;
10 |
11 | /**
12 | * Interface for extracting query text from different query types
13 | */
14 | public interface QueryTextExtractor {
15 | /**
16 | * Converts a query to the expected type, throwing an exception if the type doesn't match
17 | *
18 | * @param query The query to convert
19 | * @param expectedType The expected query type
20 | * @return The query cast to the expected type
21 | * @throws IllegalArgumentException if the query is not of the expected type
22 | */
23 | default T toQueryType(Query query, Class expectedType) {
24 | if (!expectedType.isInstance(query)) {
25 | throw new IllegalArgumentException(
26 | String.format(Locale.ROOT, "Expected %s but got %s", expectedType.getSimpleName(), query.getClass().getSimpleName())
27 | );
28 | }
29 | return expectedType.cast(query);
30 | }
31 |
32 | /**
33 | * Extracts text from a query for highlighting
34 | *
35 | * @param query The query to extract text from
36 | * @param fieldName The name of the field being highlighted
37 | * @return The extracted query text
38 | */
39 | String extractQueryText(Query query, String fieldName);
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/TermQueryTextExtractor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.highlight.extractor;
6 |
7 | import org.apache.lucene.index.Term;
8 | import org.apache.lucene.search.Query;
9 | import org.apache.lucene.search.TermQuery;
10 |
11 | /**
12 | * Extractor for term queries
13 | */
14 | public class TermQueryTextExtractor implements QueryTextExtractor {
15 |
16 | @Override
17 | public String extractQueryText(Query query, String fieldName) {
18 | TermQuery termQuery = toQueryType(query, TermQuery.class);
19 |
20 | Term term = termQuery.getTerm();
21 | // Only include terms from the field we're highlighting
22 | if (fieldName.equals(term.field())) {
23 | return term.text();
24 | }
25 | return "";
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/mapper/dto/SemanticParameters.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.mapper.dto;
6 |
7 | import lombok.Builder;
8 | import lombok.Getter;
9 |
10 | /**
11 | * A DTO to hold all the semantic parameters.
12 | */
13 | @Getter
14 | @Builder
15 | public class SemanticParameters {
16 | private final String modelId;
17 | private final String searchModelId;
18 | private final String rawFieldType;
19 | private final String semanticInfoFieldName;
20 | private final Boolean chunkingEnabled;
21 | private final String semanticFieldSearchAnalyzer;
22 | }
23 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/InferenceRequest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.processor;
6 |
7 | import java.util.List;
8 |
9 | import lombok.Builder;
10 | import lombok.Getter;
11 | import lombok.NoArgsConstructor;
12 | import lombok.NonNull;
13 | import lombok.Setter;
14 | import lombok.experimental.SuperBuilder;
15 |
16 | @SuperBuilder
17 | @NoArgsConstructor
18 | @Getter
19 | @Setter
20 | /**
21 | * Base abstract class for inference requests.
22 | * This class contains common fields and behaviors shared across different types of inference requests.
23 | */
24 | public abstract class InferenceRequest {
25 | /**
26 | * Unique identifier for the model to be used for inference.
27 | * This field is required and cannot be null.
28 | */
29 | @NonNull
30 | private String modelId;
31 | /**
32 | * List of targetResponseFilters to be applied.
33 | * Defaults value if not specified.
34 | */
35 | @Builder.Default
36 | private List targetResponseFilters = List.of("sentence_embedding");
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/MapInferenceRequest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.processor;
6 |
7 | import java.util.Map;
8 | import lombok.Getter;
9 | import lombok.NoArgsConstructor;
10 | import lombok.Setter;
11 | import lombok.experimental.SuperBuilder;
12 |
13 | /**
14 | * Implementation of InferenceRequest for inputObjects based inference requests.
15 | * Use this class when the input data consists of key-value pairs.
16 | *
17 | * @see InferenceRequest
18 | */
19 | @SuperBuilder
20 | @NoArgsConstructor
21 | @Getter
22 | @Setter
23 | public class MapInferenceRequest extends InferenceRequest {
24 | private Map inputObjects;
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/NormalizationExecuteDTO.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.processor;
6 |
7 | import lombok.AllArgsConstructor;
8 | import lombok.Builder;
9 | import lombok.Getter;
10 | import lombok.NonNull;
11 | import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique;
12 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique;
13 | import org.opensearch.search.fetch.FetchSearchResult;
14 | import org.opensearch.search.query.QuerySearchResult;
15 |
16 | import java.util.List;
17 | import java.util.Optional;
18 |
19 | /**
20 | * DTO object to hold data in NormalizationProcessorWorkflow class
21 | * in NormalizationProcessorWorkflow.
22 | */
23 | @AllArgsConstructor
24 | @Builder
25 | @Getter
26 | public class NormalizationExecuteDTO {
27 | @NonNull
28 | private List querySearchResults;
29 | @NonNull
30 | private Optional fetchSearchResultOptional;
31 | @NonNull
32 | private ScoreNormalizationTechnique normalizationTechnique;
33 | @NonNull
34 | private ScoreCombinationTechnique combinationTechnique;
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflowExecuteRequest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.processor;
6 |
7 | import lombok.AllArgsConstructor;
8 | import lombok.Builder;
9 | import lombok.Getter;
10 | import org.opensearch.action.search.SearchPhaseContext;
11 | import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique;
12 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique;
13 | import org.opensearch.search.fetch.FetchSearchResult;
14 | import org.opensearch.search.pipeline.PipelineProcessingContext;
15 | import org.opensearch.search.query.QuerySearchResult;
16 |
17 | import java.util.List;
18 | import java.util.Optional;
19 |
20 | @Builder
21 | @AllArgsConstructor
22 | @Getter
23 | /**
24 | * DTO class to hold request parameters for normalization and combination
25 | */
26 | public class NormalizationProcessorWorkflowExecuteRequest {
27 | final List querySearchResults;
28 | final Optional fetchSearchResultOptional;
29 | final ScoreNormalizationTechnique normalizationTechnique;
30 | final ScoreCombinationTechnique combinationTechnique;
31 | boolean explain;
32 | final PipelineProcessingContext pipelineProcessingContext;
33 | final SearchPhaseContext searchPhaseContext;
34 | }
35 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/NormalizeScoresDTO.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.processor;
6 |
7 | import lombok.AllArgsConstructor;
8 | import lombok.Builder;
9 | import lombok.Getter;
10 | import lombok.NonNull;
11 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique;
12 |
13 | import java.util.List;
14 |
15 | /**
16 | * DTO object to hold data required for score normalization.
17 | */
18 | @AllArgsConstructor
19 | @Builder
20 | @Getter
21 | public class NormalizeScoresDTO {
22 | @NonNull
23 | private List queryTopDocs;
24 | @NonNull
25 | private ScoreNormalizationTechnique normalizationTechnique;
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/SearchShard.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.processor;
6 |
7 | import lombok.AllArgsConstructor;
8 | import lombok.Value;
9 | import org.opensearch.search.SearchShardTarget;
10 |
11 | /**
12 | * DTO class to store index, shardId and nodeId for a search shard.
13 | */
14 | @Value
15 | @AllArgsConstructor
16 | public class SearchShard {
17 | String index;
18 | int shardId;
19 | String nodeId;
20 |
21 | /**
22 | * Create SearchShard from SearchShardTarget
23 | * @param searchShardTarget
24 | * @return SearchShard
25 | */
26 | public static SearchShard createSearchShard(final SearchShardTarget searchShardTarget) {
27 | return new SearchShard(searchShardTarget.getIndex(), searchShardTarget.getShardId().id(), searchShardTarget.getNodeId());
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/SimilarityInferenceRequest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.processor;
6 |
7 | import lombok.NoArgsConstructor;
8 | import lombok.Getter;
9 | import lombok.Setter;
10 | import lombok.experimental.SuperBuilder;
11 |
12 | /**
13 | * Implementation of InferenceRequest for similarity based text inference requests.
14 | *
15 | * @see TextInferenceRequest
16 | */
17 | @SuperBuilder
18 | @NoArgsConstructor
19 | @Getter
20 | @Setter
21 | public class SimilarityInferenceRequest extends TextInferenceRequest {
22 | private String queryText;
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/TechniqueCompatibilityCheckDTO.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.processor;
6 |
7 | import lombok.AllArgsConstructor;
8 | import lombok.Builder;
9 | import lombok.Getter;
10 | import lombok.NonNull;
11 | import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique;
12 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique;
13 |
14 | /**
15 | * DTO object to hold data required for validation.
16 | */
17 | @AllArgsConstructor
18 | @Builder
19 | @Getter
20 | public class TechniqueCompatibilityCheckDTO {
21 | @NonNull
22 | private ScoreCombinationTechnique scoreCombinationTechnique;
23 | @NonNull
24 | private ScoreNormalizationTechnique scoreNormalizationTechnique;
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/TextInferenceRequest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.processor;
6 |
7 | import java.util.List;
8 | import lombok.Getter;
9 | import lombok.NoArgsConstructor;
10 | import lombok.Setter;
11 | import lombok.experimental.SuperBuilder;
12 |
13 | /**
14 | * Implementation of InferenceRequest for inputTexts based inference requests.
15 | * Use this class when the input data consists of list of strings.
16 | *
17 | * @see InferenceRequest
18 | */
19 | @SuperBuilder
20 | @NoArgsConstructor
21 | @Getter
22 | @Setter
23 | public class TextInferenceRequest extends InferenceRequest {
24 | private List inputTexts; // on which inference needs to happen
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright OpenSearch Contributors
3 | * SPDX-License-Identifier: Apache-2.0
4 | */
5 | package org.opensearch.neuralsearch.processor.chunker;
6 |
7 | import com.google.common.collect.ImmutableMap;
8 |
9 | import java.util.Map;
10 | import java.util.Objects;
11 | import java.util.Set;
12 | import java.util.function.Function;
13 |
14 | /**
15 | * A factory to create different chunking algorithm objects.
16 | */
17 | public final class ChunkerFactory {
18 |
19 | private ChunkerFactory() {} // no instance of this factory class
20 |
21 | private static final Map, Chunker>> CHUNKERS_CONSTRUCTORS = ImmutableMap.of(
22 | FixedTokenLengthChunker.ALGORITHM_NAME,
23 | FixedTokenLengthChunker::new,
24 | DelimiterChunker.ALGORITHM_NAME,
25 | DelimiterChunker::new
26 | );
27 |
28 | /** Set of supported chunker algorithm types */
29 | public static Set CHUNKER_ALGORITHMS = CHUNKERS_CONSTRUCTORS.keySet();
30 |
31 | /**
32 | * Creates a new Chunker instance based on the specified type and parameters.
33 | *
34 | * @param type the type of chunker to create
35 | * @param parameters configuration parameters for the chunker
36 | * @return a new Chunker instance configured with the given parameters
37 | */
38 | public static Chunker create(final String type, final Map parameters) {
39 | Function