├── .github
    ├── CODEOWNERS
    ├── PULL_REQUEST_TEMPLATE.md
    ├── draft-release-notes-config.yml
    └── workflows
    │   ├── CI.yml
    │   ├── add-untriaged.yml
    │   ├── auto-release.yml
    │   ├── backport.yml
    │   ├── backwards_compatibility_tests_workflow.yml
    │   ├── changelog_verifier.yml
    │   ├── check-workflow-events.yml
    │   ├── copy-linked-issue-labels.yml
    │   ├── delete_backport_branch.yml
    │   ├── draft-release-notes-workflow.yml
    │   ├── links.yml
    │   ├── maven-publish.yml
    │   ├── test_aggregations.yml
    │   └── test_security.yml
├── .gitignore
├── .idea
    ├── copyright
    │   ├── SPDX_ALv2.xml
    │   └── profiles_settings.xml
    └── runConfigurations
    │   ├── DebugNeuralSearch.xml
    │   ├── Run_Neural_Search.xml
    │   └── Run_With_Debug_Port.xml
├── .whitesource
├── ADMINS.md
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DEVELOPER_GUIDE.md
├── LICENSE
├── MAINTAINERS.md
├── NOTICE
├── README.md
├── RELEASING.md
├── SECURITY.md
├── TRIAGING.md
├── build.gradle
├── codecov.yml
├── formatter
    ├── formatterConfig.xml
    └── license-header.txt
├── gradle.properties
├── gradle
    ├── formatting.gradle
    └── wrapper
    │   ├── gradle-wrapper.jar
    │   └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── lombok.config
├── qa
    ├── build.gradle
    ├── restart-upgrade
    │   ├── build.gradle
    │   └── src
    │   │   └── test
    │   │       ├── java
    │   │           └── org
    │   │           │   └── opensearch
    │   │           │       └── neuralsearch
    │   │           │           └── bwc
    │   │           │               └── restart
    │   │           │                   ├── AbstractRestartUpgradeRestTestCase.java
    │   │           │                   ├── BatchIngestionIT.java
    │   │           │                   ├── HybridSearchIT.java
    │   │           │                   ├── HybridSearchWithRescoreIT.java
    │   │           │                   ├── KnnRadialSearchIT.java
    │   │           │                   ├── MultiModalSearchIT.java
    │   │           │                   ├── NeuralQueryEnricherProcessorIT.java
    │   │           │                   ├── NeuralSparseSearchIT.java
    │   │           │                   ├── NeuralSparseTwoPhaseProcessorIT.java
    │   │           │                   ├── RestNeuralStatsActionIT.java
    │   │           │                   ├── SemanticSearchIT.java
    │   │           │                   └── TextChunkingProcessorIT.java
    │   │       └── resources
    │   │           └── processor
    │   │               ├── ChunkingIndexSettings.json
    │   │               ├── CreateModelGroupRequestBody.json
    │   │               ├── IndexMappingMultipleShard.json
    │   │               ├── IndexMappingSingleShard.json
    │   │               ├── NeuralSparseTwoPhaseProcessorConfiguration.json
    │   │               ├── PipelineConfiguration.json
    │   │               ├── PipelineForSparseEncodingProcessorConfiguration.json
    │   │               ├── PipelineForTextChunkingProcessorConfiguration.json
    │   │               ├── PipelineForTextImageProcessorConfiguration.json
    │   │               ├── SearchRequestPipelineConfiguration.json
    │   │               ├── SparseIndexMappings.json
    │   │               ├── UploadModelRequestBody.json
    │   │               └── UploadSparseEncodingModelRequestBody.json
    └── rolling-upgrade
    │   ├── build.gradle
    │   └── src
    │       └── test
    │           ├── java
    │               └── org
    │               │   └── opensearch
    │               │       └── neuralsearch
    │               │           └── bwc
    │               │               └── rolling
    │               │                   ├── AbstractRollingUpgradeTestCase.java
    │               │                   ├── BatchIngestionIT.java
    │               │                   ├── HybridSearchIT.java
    │               │                   ├── HybridSearchRelevancyIT.java
    │               │                   ├── HybridSearchWithRescoreIT.java
    │               │                   ├── KnnRadialSearchIT.java
    │               │                   ├── MultiModalSearchIT.java
    │               │                   ├── NeuralQueryEnricherProcessorIT.java
    │               │                   ├── NeuralSparseSearchIT.java
    │               │                   ├── NeuralSparseTwoPhaseProcessorIT.java
    │               │                   ├── RestNeuralStatsActionIT.java
    │               │                   ├── SemanticSearchIT.java
    │               │                   └── TextChunkingProcessorIT.java
    │           └── resources
    │               └── processor
    │                   ├── ChunkingIndexSettings.json
    │                   ├── CreateModelGroupRequestBody.json
    │                   ├── IndexMappings.json
    │                   ├── NeuralSparseTwoPhaseProcessorConfiguration.json
    │                   ├── PipelineConfiguration.json
    │                   ├── PipelineForSparseEncodingProcessorConfiguration.json
    │                   ├── PipelineForTextChunkingProcessorConfiguration.json
    │                   ├── PipelineForTextImageProcessorConfiguration.json
    │                   ├── SearchRequestPipelineConfiguration.json
    │                   ├── SparseIndexMappings.json
    │                   ├── UploadModelRequestBody.json
    │                   └── UploadSparseEncodingModelRequestBody.json
├── release-notes
    ├── opensearch-neural-search.release-notes-2.10.0.0.md
    ├── opensearch-neural-search.release-notes-2.11.0.0.md
    ├── opensearch-neural-search.release-notes-2.12.0.0.md
    ├── opensearch-neural-search.release-notes-2.13.0.0.md
    ├── opensearch-neural-search.release-notes-2.14.0.0.md
    ├── opensearch-neural-search.release-notes-2.15.0.0.md
    ├── opensearch-neural-search.release-notes-2.16.0.0.md
    ├── opensearch-neural-search.release-notes-2.17.0.0.md
    ├── opensearch-neural-search.release-notes-2.18.0.0.md
    ├── opensearch-neural-search.release-notes-2.19.0.0.md
    ├── opensearch-neural-search.release-notes-2.4.0.0.md
    ├── opensearch-neural-search.release-notes-2.4.1.0.md
    ├── opensearch-neural-search.release-notes-2.5.0.0.md
    ├── opensearch-neural-search.release-notes-2.6.0.0.md
    ├── opensearch-neural-search.release-notes-2.7.0.0.md
    ├── opensearch-neural-search.release-notes-2.8.0.0.md
    ├── opensearch-neural-search.release-notes-2.9.0.0.md
    ├── opensearch-neural-search.release-notes-3.0.0.0-alpha1.md
    ├── opensearch-neural-search.release-notes-3.0.0.0-beta1.md
    └── opensearch-neural-search.release-notes-3.0.0.0.md
├── repositories.gradle
├── settings.gradle
└── src
    ├── main
        ├── java
        │   └── org
        │   │   └── opensearch
        │   │       └── neuralsearch
        │   │           ├── common
        │   │               ├── MinClusterVersionUtil.java
        │   │               └── VectorUtil.java
        │   │           ├── constants
        │   │               ├── MappingConstants.java
        │   │               ├── SemanticFieldConstants.java
        │   │               └── SemanticInfoFieldConstants.java
        │   │           ├── executors
        │   │               ├── HybridQueryExecutor.java
        │   │               ├── HybridQueryExecutorCollector.java
        │   │               ├── HybridQueryExecutorCollectorManager.java
        │   │               ├── HybridQueryRewriteCollectorManager.java
        │   │               └── HybridQueryScoreSupplierCollectorManager.java
        │   │           ├── highlight
        │   │               ├── SemanticHighlighter.java
        │   │               ├── SemanticHighlighterEngine.java
        │   │               └── extractor
        │   │               │   ├── BooleanQueryTextExtractor.java
        │   │               │   ├── HybridQueryTextExtractor.java
        │   │               │   ├── NestedQueryTextExtractor.java
        │   │               │   ├── NeuralQueryTextExtractor.java
        │   │               │   ├── QueryTextExtractor.java
        │   │               │   ├── QueryTextExtractorRegistry.java
        │   │               │   └── TermQueryTextExtractor.java
        │   │           ├── mapper
        │   │               ├── SemanticFieldMapper.java
        │   │               └── dto
        │   │               │   └── SemanticParameters.java
        │   │           ├── mappingtransformer
        │   │               ├── SemanticInfoConfigBuilder.java
        │   │               └── SemanticMappingTransformer.java
        │   │           ├── ml
        │   │               └── MLCommonsClientAccessor.java
        │   │           ├── plugin
        │   │               └── NeuralSearch.java
        │   │           ├── processor
        │   │               ├── AbstractScoreHybridizationProcessor.java
        │   │               ├── CompoundTopDocs.java
        │   │               ├── ExplanationResponseProcessor.java
        │   │               ├── InferenceProcessor.java
        │   │               ├── InferenceRequest.java
        │   │               ├── MapInferenceRequest.java
        │   │               ├── NeuralQueryEnricherProcessor.java
        │   │               ├── NeuralSparseTwoPhaseProcessor.java
        │   │               ├── NormalizationExecuteDTO.java
        │   │               ├── NormalizationProcessor.java
        │   │               ├── NormalizationProcessorWorkflow.java
        │   │               ├── NormalizationProcessorWorkflowExecuteRequest.java
        │   │               ├── NormalizeScoresDTO.java
        │   │               ├── RRFProcessor.java
        │   │               ├── SearchShard.java
        │   │               ├── SimilarityInferenceRequest.java
        │   │               ├── SparseEncodingProcessor.java
        │   │               ├── TechniqueCompatibilityCheckDTO.java
        │   │               ├── TextChunkingProcessor.java
        │   │               ├── TextEmbeddingProcessor.java
        │   │               ├── TextImageEmbeddingProcessor.java
        │   │               ├── TextInferenceRequest.java
        │   │               ├── chunker
        │   │               │   ├── Chunker.java
        │   │               │   ├── ChunkerFactory.java
        │   │               │   ├── ChunkerParameterParser.java
        │   │               │   ├── DelimiterChunker.java
        │   │               │   └── FixedTokenLengthChunker.java
        │   │               ├── combination
        │   │               │   ├── ArithmeticMeanScoreCombinationTechnique.java
        │   │               │   ├── CombineScoresDto.java
        │   │               │   ├── GeometricMeanScoreCombinationTechnique.java
        │   │               │   ├── HarmonicMeanScoreCombinationTechnique.java
        │   │               │   ├── RRFScoreCombinationTechnique.java
        │   │               │   ├── ScoreCombinationFactory.java
        │   │               │   ├── ScoreCombinationTechnique.java
        │   │               │   ├── ScoreCombinationUtil.java
        │   │               │   └── ScoreCombiner.java
        │   │               ├── dto
        │   │               │   └── SemanticFieldInfo.java
        │   │               ├── explain
        │   │               │   ├── CombinedExplanationDetails.java
        │   │               │   ├── DocIdAtSearchShard.java
        │   │               │   ├── ExplainableTechnique.java
        │   │               │   ├── ExplanationDetails.java
        │   │               │   ├── ExplanationPayload.java
        │   │               │   └── ExplanationUtils.java
        │   │               ├── factory
        │   │               │   ├── ExplanationResponseProcessorFactory.java
        │   │               │   ├── NormalizationProcessorFactory.java
        │   │               │   ├── RRFProcessorFactory.java
        │   │               │   ├── RerankProcessorFactory.java
        │   │               │   ├── SemanticFieldProcessorFactory.java
        │   │               │   ├── SparseEncodingProcessorFactory.java
        │   │               │   ├── TextChunkingProcessorFactory.java
        │   │               │   ├── TextEmbeddingProcessorFactory.java
        │   │               │   └── TextImageEmbeddingProcessorFactory.java
        │   │               ├── highlight
        │   │               │   └── SentenceHighlightingRequest.java
        │   │               ├── normalization
        │   │               │   ├── L2ScoreNormalizationTechnique.java
        │   │               │   ├── MinMaxScoreNormalizationTechnique.java
        │   │               │   ├── RRFNormalizationTechnique.java
        │   │               │   ├── ScoreNormalizationFactory.java
        │   │               │   ├── ScoreNormalizationTechnique.java
        │   │               │   ├── ScoreNormalizationUtil.java
        │   │               │   ├── ScoreNormalizer.java
        │   │               │   └── ZScoreNormalizationTechnique.java
        │   │               ├── optimization
        │   │               │   ├── InferenceFilter.java
        │   │               │   ├── TextEmbeddingInferenceFilter.java
        │   │               │   └── TextImageEmbeddingInferenceFilter.java
        │   │               ├── rerank
        │   │               │   ├── ByFieldRerankProcessor.java
        │   │               │   ├── MLOpenSearchRerankProcessor.java
        │   │               │   ├── RerankProcessor.java
        │   │               │   ├── RerankType.java
        │   │               │   ├── RescoringRerankProcessor.java
        │   │               │   └── context
        │   │               │   │   ├── ContextSourceFetcher.java
        │   │               │   │   ├── DocumentContextSourceFetcher.java
        │   │               │   │   └── QueryContextSourceFetcher.java
        │   │               ├── semantic
        │   │               │   └── SemanticFieldProcessor.java
        │   │               └── util
        │   │               │   ├── ChunkUtils.java
        │   │               │   └── ProcessorUtils.java
        │   │           ├── query
        │   │               ├── HybridBulkScorer.java
        │   │               ├── HybridQuery.java
        │   │               ├── HybridQueryBuilder.java
        │   │               ├── HybridQueryContext.java
        │   │               ├── HybridQueryDocIdStream.java
        │   │               ├── HybridQueryScorer.java
        │   │               ├── HybridQueryWeight.java
        │   │               ├── HybridScoreBlockBoundaryPropagator.java
        │   │               ├── HybridScorerSupplier.java
        │   │               ├── HybridSubQueryScorer.java
        │   │               ├── ModelInferenceQueryBuilder.java
        │   │               ├── NeuralKNNQuery.java
        │   │               ├── NeuralKNNQueryBuilder.java
        │   │               ├── NeuralQueryBuilder.java
        │   │               ├── NeuralSparseQueryBuilder.java
        │   │               ├── NeuralSparseQueryTwoPhaseInfo.java
        │   │               ├── dto
        │   │               │   ├── NeuralQueryBuildStage.java
        │   │               │   └── NeuralQueryTargetFieldConfig.java
        │   │               ├── ext
        │   │               │   └── RerankSearchExtBuilder.java
        │   │               ├── parser
        │   │               │   └── NeuralQueryParser.java
        │   │               └── visitor
        │   │               │   └── NeuralSearchQueryVisitor.java
        │   │           ├── rest
        │   │               └── RestNeuralStatsAction.java
        │   │           ├── search
        │   │               ├── HitsThresholdChecker.java
        │   │               ├── HybridDisiWrapper.java
        │   │               ├── collector
        │   │               │   ├── HybridLeafCollector.java
        │   │               │   ├── HybridSearchCollector.java
        │   │               │   ├── HybridTopFieldDocSortCollector.java
        │   │               │   ├── HybridTopScoreDocCollector.java
        │   │               │   ├── PagingFieldCollector.java
        │   │               │   └── SimpleFieldCollector.java
        │   │               ├── lucene
        │   │               │   └── MultiLeafFieldComparator.java
        │   │               ├── query
        │   │               │   ├── HybridAggregationProcessor.java
        │   │               │   ├── HybridCollectorManager.java
        │   │               │   ├── HybridQueryFieldDocComparator.java
        │   │               │   ├── HybridQueryPhaseSearcher.java
        │   │               │   ├── HybridQueryScoreDocsMerger.java
        │   │               │   ├── TopDocsMerger.java
        │   │               │   └── exception
        │   │               │   │   └── HybridSearchRescoreQueryException.java
        │   │               └── util
        │   │               │   ├── HybridSearchResultFormatUtil.java
        │   │               │   └── HybridSearchSortUtil.java
        │   │           ├── settings
        │   │               ├── NeuralSearchSettings.java
        │   │               └── NeuralSearchSettingsAccessor.java
        │   │           ├── stats
        │   │               ├── NeuralStatsInput.java
        │   │               ├── common
        │   │               │   ├── StatName.java
        │   │               │   ├── StatSnapshot.java
        │   │               │   └── StatType.java
        │   │               ├── events
        │   │               │   ├── EventStat.java
        │   │               │   ├── EventStatName.java
        │   │               │   ├── EventStatType.java
        │   │               │   ├── EventStatsManager.java
        │   │               │   ├── TimestampedEventStat.java
        │   │               │   └── TimestampedEventStatSnapshot.java
        │   │               └── info
        │   │               │   ├── CountableInfoStatSnapshot.java
        │   │               │   ├── InfoStatName.java
        │   │               │   ├── InfoStatType.java
        │   │               │   ├── InfoStatsManager.java
        │   │               │   └── SettableInfoStatSnapshot.java
        │   │           ├── transport
        │   │               ├── NeuralStatsAction.java
        │   │               ├── NeuralStatsNodeRequest.java
        │   │               ├── NeuralStatsNodeResponse.java
        │   │               ├── NeuralStatsRequest.java
        │   │               ├── NeuralStatsResponse.java
        │   │               └── NeuralStatsTransportAction.java
        │   │           └── util
        │   │               ├── HybridQueryUtil.java
        │   │               ├── NeuralQueryValidationUtil.java
        │   │               ├── NeuralSearchClusterUtil.java
        │   │               ├── PipelineServiceUtil.java
        │   │               ├── ProcessorDocumentUtils.java
        │   │               ├── RetryUtil.java
        │   │               ├── SemanticMLModelUtils.java
        │   │               ├── SemanticMappingUtils.java
        │   │               ├── TokenWeightUtil.java
        │   │               └── prune
        │   │                   ├── PruneType.java
        │   │                   └── PruneUtils.java
        └── plugin-metadata
        │   └── plugin-security.policy
    ├── test
        ├── java
        │   └── org
        │   │   └── opensearch
        │   │       └── neuralsearch
        │   │           ├── NeuralSearchIT.java
        │   │           ├── NeuralSearchTests.java
        │   │           ├── ValidateDependentPluginInstallationIT.java
        │   │           ├── common
        │   │               └── VectorUtilTests.java
        │   │           ├── constants
        │   │               └── TestCommonConstants.java
        │   │           ├── executors
        │   │               └── HybridQueryExecutorIT.java
        │   │           ├── highlight
        │   │               ├── QueryTextExtractorTests.java
        │   │               ├── SemanticHighlighterEngineTests.java
        │   │               ├── SemanticHighlighterIT.java
        │   │               └── SemanticHighlighterTests.java
        │   │           ├── mapper
        │   │               └── SemanticFieldMapperTests.java
        │   │           ├── mappingtransformer
        │   │               ├── SemanticInfoConfigBuilderTests.java
        │   │               └── SemanticMappingTransformerTests.java
        │   │           ├── ml
        │   │               └── MLCommonsClientAccessorTests.java
        │   │           ├── plugin
        │   │               └── NeuralSearchTests.java
        │   │           ├── processor
        │   │               ├── AbstractScoreHybridizationProcessorTests.java
        │   │               ├── CompoundTopDocsTests.java
        │   │               ├── ExplanationResponseProcessorTests.java
        │   │               ├── InferenceProcessorTestCase.java
        │   │               ├── InferenceProcessorTests.java
        │   │               ├── NeuralQueryEnricherProcessorIT.java
        │   │               ├── NeuralQueryEnricherProcessorTests.java
        │   │               ├── NeuralSparseTwoPhaseProcessorIT.java
        │   │               ├── NeuralSparseTwoPhaseProcessorTests.java
        │   │               ├── NormalizationProcessorIT.java
        │   │               ├── NormalizationProcessorTests.java
        │   │               ├── NormalizationProcessorWorkflowTests.java
        │   │               ├── RRFProcessorIT.java
        │   │               ├── RRFProcessorTests.java
        │   │               ├── ScoreCombinationIT.java
        │   │               ├── ScoreCombinationTechniqueTests.java
        │   │               ├── ScoreNormalizationIT.java
        │   │               ├── ScoreNormalizationTechniqueTests.java
        │   │               ├── SparseEncodingProcessIT.java
        │   │               ├── SparseEncodingProcessorTests.java
        │   │               ├── TextChunkingProcessorIT.java
        │   │               ├── TextChunkingProcessorTests.java
        │   │               ├── TextEmbeddingProcessorIT.java
        │   │               ├── TextEmbeddingProcessorTests.java
        │   │               ├── TextImageEmbeddingProcessorIT.java
        │   │               ├── TextImageEmbeddingProcessorTests.java
        │   │               ├── chunker
        │   │               │   ├── ChunkerFactoryTests.java
        │   │               │   ├── ChunkerParameterParserTests.java
        │   │               │   ├── DelimiterChunkerTests.java
        │   │               │   └── FixedTokenLengthChunkerTests.java
        │   │               ├── combination
        │   │               │   ├── ArithmeticMeanScoreCombinationTechniqueTests.java
        │   │               │   ├── BaseScoreCombinationTechniqueTests.java
        │   │               │   ├── GeometricMeanScoreCombinationTechniqueTests.java
        │   │               │   ├── HarmonicMeanScoreCombinationTechniqueTests.java
        │   │               │   ├── RRFScoreCombinationTechniqueTests.java
        │   │               │   ├── ScoreCombinationFactoryTests.java
        │   │               │   └── ScoreNormalizationUtilTests.java
        │   │               ├── dto
        │   │               │   └── SemanticFieldInfoTests.java
        │   │               ├── explain
        │   │               │   └── ExplanationUtilsTests.java
        │   │               ├── factory
        │   │               │   ├── ExplanationResponseProcessorFactoryTests.java
        │   │               │   ├── NormalizationProcessorFactoryTests.java
        │   │               │   ├── RRFProcessorFactoryTests.java
        │   │               │   ├── RerankProcessorFactoryTests.java
        │   │               │   ├── SemanticFieldProcessorFactoryTests.java
        │   │               │   ├── SparseEncodingEmbeddingProcessorFactoryTests.java
        │   │               │   ├── TextChunkingProcessorFactoryTests.java
        │   │               │   └── TextImageEmbeddingProcessorFactoryTests.java
        │   │               ├── normalization
        │   │               │   ├── L2ScoreNormalizationTechniqueTests.java
        │   │               │   ├── MinMaxScoreNormalizationTechniqueTests.java
        │   │               │   ├── RRFNormalizationTechniqueTests.java
        │   │               │   ├── ScoreNormalizationFactoryTests.java
        │   │               │   ├── ScoreNormalizationUtilTests.java
        │   │               │   └── ZScoreNormalizationTechniqueTests.java
        │   │               ├── optimization
        │   │               │   ├── TextEmbeddingInferenceFilterTests.java
        │   │               │   └── TextImageEmbeddingInferenceFilterTests.java
        │   │               ├── rerank
        │   │               │   ├── ByFieldRerankProcessorIT.java
        │   │               │   ├── ByFieldRerankProcessorTests.java
        │   │               │   ├── MLOpenSearchRerankProcessorIT.java
        │   │               │   └── MLOpenSearchRerankProcessorTests.java
        │   │               ├── semantic
        │   │               │   └── SemanticFieldProcessorTests.java
        │   │               └── util
        │   │               │   └── ChunkUtilsTests.java
        │   │           ├── query
        │   │               ├── HybridBulkScorerTests.java
        │   │               ├── HybridQueryAggregationsIT.java
        │   │               ├── HybridQueryBuilderTests.java
        │   │               ├── HybridQueryDocIdStreamTests.java
        │   │               ├── HybridQueryExplainIT.java
        │   │               ├── HybridQueryFilterIT.java
        │   │               ├── HybridQueryIT.java
        │   │               ├── HybridQueryInnerHitsIT.java
        │   │               ├── HybridQueryPostFilterIT.java
        │   │               ├── HybridQueryScorerTests.java
        │   │               ├── HybridQuerySortIT.java
        │   │               ├── HybridQueryTests.java
        │   │               ├── HybridQueryWeightTests.java
        │   │               ├── HybridScoreBlockBoundaryPropagatorTests.java
        │   │               ├── HybridScorerSupplierTests.java
        │   │               ├── HybridSubQueryScorerTests.java
        │   │               ├── NeuralKNNQueryBuilderTests.java
        │   │               ├── NeuralKNNQueryTests.java
        │   │               ├── NeuralQueryBuilderBuilderTests.java
        │   │               ├── NeuralQueryBuilderRewriteTests.java
        │   │               ├── NeuralQueryBuilderTests.java
        │   │               ├── NeuralQueryIT.java
        │   │               ├── NeuralSparseQueryBuilderTests.java
        │   │               ├── NeuralSparseQueryIT.java
        │   │               ├── NeuralSparseQueryTwoPhaseInfoTests.java
        │   │               ├── OpenSearchQueryTestCase.java
        │   │               ├── aggregation
        │   │               │   ├── BaseAggregationsWithHybridQueryIT.java
        │   │               │   ├── BucketAggregationsWithHybridQueryIT.java
        │   │               │   ├── MetricAggregationsWithHybridQueryIT.java
        │   │               │   └── PipelineAggregationsWithHybridQueryIT.java
        │   │               ├── ext
        │   │               │   └── RerankSearchExtBuilderTests.java
        │   │               └── visitor
        │   │               │   └── NeuralSearchQueryVisitorTests.java
        │   │           ├── rest
        │   │               ├── RestNeuralStatsActionIT.java
        │   │               └── RestNeuralStatsActionTests.java
        │   │           ├── search
        │   │               ├── HitsThresholdCheckerTests.java
        │   │               ├── HybridDisiWrapperTests.java
        │   │               ├── collector
        │   │               │   ├── HybridCollectorTestCase.java
        │   │               │   ├── HybridTopFieldDocSortCollectorTests.java
        │   │               │   └── HybridTopScoreDocCollectorTests.java
        │   │               ├── query
        │   │               │   ├── HybridAggregationProcessorTests.java
        │   │               │   ├── HybridCollectorManagerTests.java
        │   │               │   ├── HybridQueryPhaseSearcherTests.java
        │   │               │   ├── HybridQueryScoreDocsMergerTests.java
        │   │               │   └── TopDocsMergerTests.java
        │   │               └── util
        │   │               │   └── HybridSearchResultFormatUtilTests.java
        │   │           ├── stats
        │   │               ├── NeuralStatsInputTests.java
        │   │               ├── events
        │   │               │   ├── EventStatNameTests.java
        │   │               │   ├── EventStatsManagerTests.java
        │   │               │   ├── TimestampedEventStatSnapshotTests.java
        │   │               │   └── TimestampedEventStatTests.java
        │   │               └── info
        │   │               │   ├── CountableInfoStatSnapshotTests.java
        │   │               │   ├── InfoStatNameTests.java
        │   │               │   ├── InfoStatsManagerTests.java
        │   │               │   └── SettableInfoStatSnapshotTests.java
        │   │           ├── transport
        │   │               ├── NeuralStatsResponseTests.java
        │   │               └── NeuralStatsTransportActionTests.java
        │   │           └── util
        │   │               ├── HybridQueryUtilTests.java
        │   │               ├── NeuralSearchClusterUtilTests.java
        │   │               ├── PipelineServiceUtilTests.java
        │   │               ├── ProcessorDocumentUtilsTests.java
        │   │               ├── ProcessorUtilsTests.java
        │   │               ├── SemanticFieldMapperTestUtil.java
        │   │               ├── SemanticMLModelUtilsTests.java
        │   │               ├── SemanticMappingUtilsTests.java
        │   │               ├── TokenWeightUtilTests.java
        │   │               └── prune
        │   │                   ├── PruneTypeTests.java
        │   │                   └── PruneUtilsTests.java
        └── resources
        │   ├── highlight
        │       └── UploadSentenceHighlightingModelRequestBody.json
        │   ├── mapper
        │       └── mappingWithNestedSemanticFields.json
        │   ├── mappingtransformer
        │       └── transformedMappingMultipleSemanticFields.json
        │   ├── processor
        │       ├── CreateModelGroupRequestBody.json
        │       ├── IndexMappings.json
        │       ├── NeuralSparseTwoPhaseAndNeuralEnrichProcessorConfiguration.json
        │       ├── NeuralSparseTwoPhaseProcessorConfiguration.json
        │       ├── PipelineConfiguration.json
        │       ├── PipelineConfigurationWithBatchSize.json
        │       ├── PipelineConfigurationWithBatchSizeWithSkipExisting.json
        │       ├── PipelineConfigurationWithNestedFieldsMapping.json
        │       ├── PipelineConfigurationWithNestedFieldsMappingWithSkipExisting.json
        │       ├── PipelineConfigurationWithSkipExisting.json
        │       ├── PipelineForTextImageEmbeddingProcessorConfiguration.json
        │       ├── PipelineForTextImageEmbeddingWithSkipExistingProcessorConfiguration.json
        │       ├── ReRankByFieldPipelineConfiguration.json
        │       ├── RerankMLOpenSearchPipelineConfiguration.json
        │       ├── SearchRequestPipelineConfiguration.json
        │       ├── SparseEncodingIndexMappings.json
        │       ├── SparseEncodingPipelineConfiguration.json
        │       ├── SparseEncodingPipelineConfigurationWithPrune.json
        │       ├── SparseEncodingPipelineConfigurationWithSkipExisting.json
        │       ├── UploadModelRequestBody.json
        │       ├── UploadSparseEncodingModelRequestBody.json
        │       ├── UploadTextSimilarityModelRequestBody.json
        │       ├── bulk_item_template.json
        │       ├── chunker
        │       │   ├── PipelineForCascadedChunker.json
        │       │   ├── PipelineForDelimiterChunker.json
        │       │   ├── PipelineForFixedTokenLengthChunkerWithLetterTokenizer.json
        │       │   ├── PipelineForFixedTokenLengthChunkerWithLowercaseTokenizer.json
        │       │   ├── PipelineForFixedTokenLengthChunkerWithStandardTokenizer.json
        │       │   ├── TextChunkingIndexSettings.json
        │       │   ├── TextChunkingTestDocument.json
        │       │   └── TextChunkingTestLongDocument.json
        │       ├── ingest_bulk.json
        │       ├── ingest_doc1.json
        │       ├── ingest_doc2.json
        │       ├── ingest_doc3.json
        │       ├── ingest_doc4.json
        │       ├── ingest_doc5.json
        │       ├── semantic
        │       │   ├── ingest_doc1.json
        │       │   ├── ingest_doc2.json
        │       │   ├── ingest_doc3.json
        │       │   ├── ingested_doc1.json
        │       │   ├── ingested_doc2.json
        │       │   ├── ingested_doc3.json
        │       │   └── invalid_ingest_doc.json
        │       ├── update_doc1.json
        │       ├── update_doc2.json
        │       ├── update_doc3.json
        │       ├── update_doc4.json
        │       └── update_doc5.json
        │   └── util
        │       └── ProcessorDocumentUtils.json
    └── testFixtures
        └── java
            └── org
                └── opensearch
                    └── neuralsearch
                        ├── BaseNeuralSearchIT.java
                        ├── OpenSearchSecureRestTestCase.java
                        └── util
                            ├── AggregationsTestUtils.java
                            ├── BatchIngestionUtils.java
                            ├── NeuralSearchClusterTestUtils.java
                            └── TestUtils.java


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # This should match the owning team set up in https://github.com/orgs/opensearch-project/teams
2 | *   @heemin32 @navneet1v @VijayanB @vamshin @jmazanec15 @naveentatikonda @junqiu-lei @martin-gaievski @sean-zheng-amazon @model-collapse @zane-neo @vibrantvarun @zhichao-aws @yuye-aws @minalsha
3 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ### Description
 2 | [Describe what this change achieves]
 3 | 
 4 | ### Related Issues
 5 | Resolves #[Issue number to be closed when this PR is merged]
 6 | <!-- List any other related issues here -->
 7 | 
 8 | ### Check List
 9 | - [ ] New functionality includes testing.
10 | - [ ] New functionality has been documented.
11 | - [ ] API changes companion pull request [created](https://github.com/opensearch-project/opensearch-api-specification/blob/main/DEVELOPER_GUIDE.md).
12 | - [ ] Commits are signed per the DCO using `--signoff`.
13 | - [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose).
14 | 
15 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
16 | For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/neural-search/blob/main/CONTRIBUTING.md#developer-certificate-of-origin).
17 | 


--------------------------------------------------------------------------------
/.github/draft-release-notes-config.yml:
--------------------------------------------------------------------------------
 1 | # The overall template of the release notes
 2 | template: |
 3 |   Compatible with OpenSearch (**set version here**).
 4 |   $CHANGES
 5 | 
 6 | # Setting the formatting and sorting for the release notes body
 7 | name-template: Version (set version here)
 8 | change-template: '* $TITLE (#$NUMBER)'
 9 | sort-by: merged_at
10 | sort-direction: ascending
11 | replacers:
12 |   - search: '##'
13 |     replace: '###'
14 | 
15 | # Organizing the tagged PRs into categories
16 | categories:
17 |   - title: 'Breaking Changes'
18 |     labels:
19 |       - 'Breaking Changes'
20 |   - title: 'Features'
21 |     labels:
22 |       - 'Features'
23 |   - title: 'Enhancements'
24 |     labels:
25 |       - 'Enhancements'
26 |   - title: 'Bug Fixes'
27 |     labels:
28 |       - 'Bug Fixes'
29 |   - title: 'Infrastructure'
30 |     labels:
31 |       - 'Infrastructure'
32 |   - title: 'Documentation'
33 |     labels:
34 |       - 'Documentation'
35 |   - title: 'Maintenance'
36 |     labels:
37 |       - 'Maintenance'
38 |   - title: 'Refactoring'
39 |     labels:
40 |       - 'Refactoring'
41 | 


--------------------------------------------------------------------------------
/.github/workflows/add-untriaged.yml:
--------------------------------------------------------------------------------
 1 | name: Apply 'untriaged' label during issue lifecycle
 2 | 
 3 | on:
 4 |   issues:
 5 |     types: [opened, reopened, transferred]
 6 | 
 7 | jobs:
 8 |   apply-label:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/github-script@v6
12 |         with:
13 |           script: |
14 |             github.rest.issues.addLabels({
15 |               issue_number: context.issue.number,
16 |               owner: context.repo.owner,
17 |               repo: context.repo.repo,
18 |               labels: ['untriaged']
19 |             })
20 | 


--------------------------------------------------------------------------------
/.github/workflows/auto-release.yml:
--------------------------------------------------------------------------------
 1 | name: Releases
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - '*'
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       contents: write
13 |     steps:
14 |       - name: GitHub App token
15 |         id: github_app_token
16 |         uses: tibdex/github-app-token@v1.5.0
17 |         with:
18 |           app_id: ${{ secrets.APP_ID }}
19 |           private_key: ${{ secrets.APP_PRIVATE_KEY }}
20 |           installation_id: 22958780
21 |       - name: Get tag
22 |         id: tag
23 |         uses: dawidd6/action-get-tag@v1
24 |       - uses: actions/checkout@v2
25 |       - uses: ncipollo/release-action@v1
26 |         with:
27 |           github_token: ${{ steps.github_app_token.outputs.token }}
28 |           bodyFile: release-notes/opensearch-neural-search.release-notes-${{steps.tag.outputs.tag}}.md
29 | 


--------------------------------------------------------------------------------
/.github/workflows/backport.yml:
--------------------------------------------------------------------------------
 1 | name: Backport
 2 | on:
 3 |   pull_request_target:
 4 |     types:
 5 |       - closed
 6 |       - labeled
 7 | 
 8 | jobs:
 9 |   backport:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       contents: write
13 |       pull-requests: write
14 |     name: Backport
15 |     steps:
16 |       - name: GitHub App token
17 |         id: github_app_token
18 |         uses: tibdex/github-app-token@v1.5.0
19 |         with:
20 |           app_id: ${{ secrets.APP_ID }}
21 |           private_key: ${{ secrets.APP_PRIVATE_KEY }}
22 |           installation_id: 22958780
23 | 
24 |       - name: Backport
25 |         uses: VachaShah/backport@v1.1.4
26 |         with:
27 |           github_token: ${{ steps.github_app_token.outputs.token }}
28 |           branch_name: backport/backport-${{ github.event.number }}
29 | 


--------------------------------------------------------------------------------
/.github/workflows/backwards_compatibility_tests_workflow.yml:
--------------------------------------------------------------------------------
 1 | name: Backwards Compatibility Tests NeuralSearch
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - "*"
 6 |       - "feature/**"
 7 |   pull_request:
 8 |     branches:
 9 |       - "*"
10 |       - "feature/**"
11 | 
12 | jobs:
13 |   Restart-Upgrade-BWCTests-NeuralSearch:
14 |     strategy:
15 |       matrix:
16 |         java: [ 21, 23 ]
17 |         os: [ubuntu-latest]
18 |         bwc_version : [ "2.9.0","2.10.0","2.11.0","2.12.0","2.13.0","2.14.0","2.15.0","2.16.0","2.17.0","2.18.0","2.19.0","2.20.0-SNAPSHOT","3.0.0" ]
19 |         opensearch_version : [ "3.1.0-SNAPSHOT" ]
20 | 
21 |     name: NeuralSearch Restart-Upgrade BWC Tests
22 |     runs-on: ${{ matrix.os }}
23 |     env:
24 |       BWC_VERSION_RESTART_UPGRADE: ${{ matrix.bwc_version }}
25 | 
26 |     steps:
27 |       - name: Checkout neural-search
28 |         uses: actions/checkout@v1
29 | 
30 |       - name: Setup Java ${{ matrix.java }}
31 |         uses: actions/setup-java@v1
32 |         with:
33 |           java-version: ${{ matrix.java }}
34 | 
35 |       - name: Run NeuralSearch Restart-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}}
36 |         run: |
37 |           echo "Running restart-upgrade backwards compatibility tests ..."
38 |           ./gradlew :qa:restart-upgrade:testAgainstNewCluster -D'tests.bwc.version=${{ matrix.bwc_version }}'
39 | 
40 |   Rolling-Upgrade-BWCTests-NeuralSearch:
41 |     strategy:
42 |       matrix:
43 |         java: [21, 23]
44 |         os: [ubuntu-latest]
45 |         bwc_version: [ "2.20.0-SNAPSHOT","3.0.0" ]
46 |         opensearch_version: [ "3.1.0-SNAPSHOT" ]
47 | 
48 |     name: NeuralSearch Rolling-Upgrade BWC Tests
49 |     runs-on: ${{ matrix.os }}
50 |     env:
51 |       BWC_VERSION_ROLLING_UPGRADE: ${{ matrix.bwc_version }}
52 | 
53 |     steps:
54 |       - name: Checkout neural-search
55 |         uses: actions/checkout@v1
56 | 
57 |       - name: Setup Java ${{ matrix.java }}
58 |         uses: actions/setup-java@v1
59 |         with:
60 |           java-version: ${{ matrix.java }}
61 | 
62 |       - name: Run NeuralSearch Rolling-Upgrade BWC Tests from BWCVersion-${{ matrix.bwc_version }} to OpenSearch Version-${{ matrix.opensearch_version }} on ${{matrix.os}}
63 |         run: |
64 |           echo "Running rolling-upgrade backwards compatibility tests ..."
65 |           ./gradlew :qa:rolling-upgrade:testRollingUpgrade -D'tests.bwc.version=${{ matrix.bwc_version }}'
66 | 


--------------------------------------------------------------------------------
/.github/workflows/changelog_verifier.yml:
--------------------------------------------------------------------------------
 1 | name: "Changelog Verifier"
 2 | on:
 3 |   pull_request:
 4 |     types: [opened, synchronize, reopened, ready_for_review, labeled, unlabeled]
 5 | 
 6 | jobs:
 7 |   # Enforces the update of a changelog file on every pull request
 8 |   verify-changelog:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v3
12 |         with:
13 |           token: ${{ secrets.GITHUB_TOKEN }}
14 |           ref: ${{ github.event.pull_request.head.sha }}
15 | 
16 |       - uses: dangoslen/changelog-enforcer@v3
17 |         with:
18 |           skipLabels: "autocut, skip-changelog"
19 | 


--------------------------------------------------------------------------------
/.github/workflows/check-workflow-events.yml:
--------------------------------------------------------------------------------
 1 | name: Check Workflow Events
 2 | on:
 3 |   pull_request:
 4 | 
 5 | jobs:
 6 |   check-workflow-events:
 7 |     runs-on: ubuntu-latest
 8 |     name: Check Workflow Events
 9 |     steps:
10 |       - uses: actions/checkout@v4
11 |       - name: Check Workflow Events
12 |         working-directory: .github/workflows
13 |         run: |
14 |           set +e
15 |           EVENT_COUNT=0
16 |           for file_found in `ls | grep .ym`; do
17 |               yq -r e '.on | keys | .[0]' $file_found | grep -q pull_request_target
18 |               EVENT_FOUND=$?
19 | 
20 |               if [ "$EVENT_FOUND" = 0 ] && [ "$file_found" != "backport.yml" ] && [ "$file_found" != "copy-linked-issue-labels.yml" ]; then
21 |                 EVENT_COUNT=$(( EVENT_COUNT+1 ))
22 |                 echo "'$file_found' workflow file contains 'pull_request_target' event, please remove!"
23 |               fi
24 |           done
25 | 
26 |           if [ "$EVENT_COUNT" != 0 ]; then
27 |               exit 1
28 |           fi
29 | 


--------------------------------------------------------------------------------
/.github/workflows/copy-linked-issue-labels.yml:
--------------------------------------------------------------------------------
 1 | name: Copy labels from linked issues
 2 | on:
 3 |   pull_request_target:
 4 |     types: [opened, edited, review_requested, synchronize, reopened, ready_for_review]
 5 | 
 6 | jobs:
 7 |   copy-issue-labels:
 8 |     if: github.repository == 'opensearch-project/neural-search'
 9 |     runs-on: ubuntu-latest
10 |     permissions:
11 |       issues: read
12 |       contents: read
13 |       pull-requests: write
14 |     steps:
15 |       - name: copy-issue-labels
16 |         uses: michalvankodev/copy-issue-labels@v1.3.0
17 |         with:
18 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
19 |           labels-to-exclude: |
20 |             untriaged
21 |             triaged
22 | 


--------------------------------------------------------------------------------
/.github/workflows/delete_backport_branch.yml:
--------------------------------------------------------------------------------
 1 | name: Delete merged branch of the backport PRs
 2 | on:
 3 |   pull_request:
 4 |     types:
 5 |       - closed
 6 | 
 7 | jobs:
 8 |   delete-branch:
 9 |     runs-on: ubuntu-latest
10 |     permissions:
11 |       contents: write
12 |     if: github.repository == 'opensearch-project/neural-search' && startsWith(github.event.pull_request.head.ref,'backport/')
13 |     steps:
14 |     - name: Delete merged branch
15 |       uses: actions/github-script@v7
16 |       with:
17 |         script: |
18 |           github.rest.git.deleteRef({
19 |             owner: context.repo.owner,
20 |             repo: context.repo.repo,
21 |             ref: `heads/${context.payload.pull_request.head.ref}`,
22 |           })
23 | 


--------------------------------------------------------------------------------
/.github/workflows/draft-release-notes-workflow.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   update_release_draft:
10 |     name: Update draft release notes
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Update draft release notes
14 |         uses: release-drafter/release-drafter@v5
15 |         with:
16 |           config-name: draft-release-notes-config.yml
17 |           name: Version (set here)
18 |           tag: (None)
19 |         env:
20 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 | 


--------------------------------------------------------------------------------
/.github/workflows/links.yml:
--------------------------------------------------------------------------------
 1 | name: Link Checker
 2 | on:
 3 |   push:
 4 |     branches: [ main ]
 5 |   pull_request:
 6 |     branches: [ main ]
 7 | 
 8 | jobs:
 9 |   linkchecker:
10 | 
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - uses: actions/checkout@v2
15 |       - name: lychee Link Checker
16 |         id: lychee
17 |         uses: lycheeverse/lychee-action@master
18 |         with:
19 |           args: --accept=200,403,429  **/*.html **/*.md **/*.txt **/*.json
20 |         env:
21 |           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
22 |       - name: Fail if there were link errors
23 |         run: exit ${{ steps.lychee.outputs.exit_code }}
24 | 


--------------------------------------------------------------------------------
/.github/workflows/maven-publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish snapshots to maven
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - 'main'
 8 |       - '[0-9]+.[0-9]+'
 9 |       - '[0-9]+.x'
10 | 
11 | jobs:
12 |   build-and-publish-snapshots:
13 |     runs-on: ubuntu-latest
14 | 
15 |     permissions:
16 |       id-token: write
17 |       contents: write
18 | 
19 |     steps:
20 |       - uses: actions/setup-java@v3
21 |         with:
22 |           distribution: temurin # Temurin is a distribution of adoptium
23 |           java-version: 21
24 |       - uses: actions/checkout@v3
25 |       - uses: aws-actions/configure-aws-credentials@v1
26 |         with:
27 |           role-to-assume: ${{ secrets.PUBLISH_SNAPSHOTS_ROLE }}
28 |           aws-region: us-east-1
29 |       - name: publish snapshots to maven
30 |         run: |
31 |           export SONATYPE_USERNAME=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-username --query SecretString --output text)
32 |           export SONATYPE_PASSWORD=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-password --query SecretString --output text)
33 |           echo "::add-mask::$SONATYPE_USERNAME"
34 |           echo "::add-mask::$SONATYPE_PASSWORD"
35 |           ./gradlew publishPluginZipPublicationToSnapshotsRepository
36 | 


--------------------------------------------------------------------------------
/.github/workflows/test_aggregations.yml:
--------------------------------------------------------------------------------
 1 | name: Run Additional Tests for Neural Search
 2 | on:
 3 |   schedule:
 4 |     - cron: '0 0 * * *'  # every night
 5 |   push:
 6 |     branches:
 7 |       - "*"
 8 |       - "feature/**"
 9 |   pull_request:
10 |     branches:
11 |       - "*"
12 |       - "feature/**"
13 | jobs:
14 |   Get-CI-Image-Tag:
15 |     uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main
16 |     with:
17 |       product: opensearch
18 | 
19 |   Check-neural-search-linux:
20 |     needs: Get-CI-Image-Tag
21 |     strategy:
22 |       matrix:
23 |         java: [21, 23]
24 |         os: [ubuntu-latest]
25 | 
26 |     name: Integ Tests Linux
27 |     runs-on: ${{ matrix.os }}
28 |     container:
29 |       # using the same image which is used by opensearch-build team to build the OpenSearch Distribution
30 |       # this image tag is subject to change as more dependencies and updates will arrive over time
31 |       image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }}
32 |       # need to switch to root so that github actions can install runner binary on container without permission issues.
33 |       options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }}
34 | 
35 | 
36 |     steps:
37 |       - name: Run start commands
38 |         run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }}
39 | 
40 |       - name: Checkout neural-search
41 |         uses: actions/checkout@v4
42 | 
43 |       - name: Setup Java ${{ matrix.java }}
44 |         uses: actions/setup-java@v4
45 |         with:
46 |           distribution: 'temurin'
47 |           java-version: ${{ matrix.java }}
48 | 
49 |       - name: Run tests
50 |         run: |
51 |           chown -R 1000:1000 `pwd`
52 |           su `id -un 1000` -c "./gradlew ':integTest' -Dtest_aggs=true --tests \"org.opensearch.neuralsearch.query.aggregation.*IT\""
53 | 
54 |   Check-neural-search-windows:
55 |     strategy:
56 |       matrix:
57 |         java: [23]
58 |         os: [windows-latest]
59 | 
60 |     name: Integ Tests Windows
61 |     runs-on: ${{ matrix.os }}
62 | 
63 |     steps:
64 |       - name: Checkout neural-search
65 |         uses: actions/checkout@v4
66 | 
67 |       - name: Setup Java ${{ matrix.java }}
68 |         uses: actions/setup-java@v4
69 |         with:
70 |           distribution: 'temurin'
71 |           java-version: ${{ matrix.java }}
72 | 
73 |       - name: Run tests
74 |         run: |
75 |           ./gradlew ':integTest' -Dtest_aggs=true --tests "org.opensearch.neuralsearch.query.aggregation.*IT"
76 | 


--------------------------------------------------------------------------------
/.github/workflows/test_security.yml:
--------------------------------------------------------------------------------
 1 | name: Test neural-search on Secure Cluster
 2 | on:
 3 |   schedule:
 4 |     - cron: '0 0 * * *'  # every night
 5 |   push:
 6 |     branches:
 7 |       - "*"
 8 |       - "feature/**"
 9 |   pull_request:
10 |     branches:
11 |       - "*"
12 |       - "feature/**"
13 | 
14 | jobs:
15 |   Get-CI-Image-Tag:
16 |     uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main
17 |     with:
18 |       product: opensearch
19 | 
20 |   integ-test-with-security-linux:
21 |     strategy:
22 |       matrix:
23 |         java: [21, 23]
24 | 
25 |     name: Run Integration Tests on Linux
26 |     runs-on: ubuntu-latest
27 |     needs: Get-CI-Image-Tag
28 |     container:
29 |       # using the same image which is used by opensearch-build team to build the OpenSearch Distribution
30 |       # this image tag is subject to change as more dependencies and updates will arrive over time
31 |       image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }}
32 |       # need to switch to root so that github actions can install runner binary on container without permission issues.
33 |       options: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-options }}
34 | 
35 |     steps:
36 |       - name: Run start commands
37 |         run: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-start-command }}
38 | 
39 |       - name: Checkout neural-search
40 |         uses: actions/checkout@v4
41 |         with:
42 |           submodules: true
43 | 
44 |       - name: Setup Java ${{ matrix.java }}
45 |         uses: actions/setup-java@v4
46 |         with:
47 |           distribution: 'temurin'
48 |           java-version: ${{ matrix.java }}
49 | 
50 |       - name: Run tests
51 |         # switching the user, as OpenSearch cluster can only be started as root/Administrator on linux-deb/linux-rpm/windows-zip.
52 |         run: |
53 |           chown -R 1000:1000 `pwd`
54 |           su `id -un 1000` -c "whoami && java -version && ./gradlew integTest -Dsecurity.enabled=true"
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # intellij files
 2 | .idea/
 3 | *.iml
 4 | *.ipr
 5 | *.iws
 6 | *.log
 7 | build-idea/
 8 | out/
 9 | 
10 | # eclipse files
11 | .classpath
12 | .project
13 | .settings
14 | 
15 | # gradle stuff
16 | .gradle/
17 | build/
18 | bin/
19 | 
20 | # vscode stuff
21 | .vscode/
22 | 
23 | # osx stuff
24 | .DS_Store
25 | 
26 | # git stuff
27 | .gitattributes
28 | 


--------------------------------------------------------------------------------
/.idea/copyright/SPDX_ALv2.xml:
--------------------------------------------------------------------------------
1 | <component name="CopyrightManager">
2 |   <copyright>
3 |     <option name="notice" value="Copyright OpenSearch Contributors&#10;SPDX-License-Identifier: Apache-2.0" />
4 |     <option name="myName" value="SPDX-ALv2" />
5 |   </copyright>
6 | </component>


--------------------------------------------------------------------------------
/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="CopyrightManager">
2 |     <settings default="SPDX-ALv2" />
3 | </component>


--------------------------------------------------------------------------------
/.idea/runConfigurations/DebugNeuralSearch.xml:
--------------------------------------------------------------------------------
 1 | <component name="ProjectRunConfigurationManager">
 2 |   <configuration default="false" name="DebugNeuralSearch" type="Remote">
 3 |     <module name="neural-search" />
 4 |     <option name="USE_SOCKET_TRANSPORT" value="true" />
 5 |     <option name="SERVER_MODE" value="true" />
 6 |     <option name="SHMEM_ADDRESS" />
 7 |     <option name="HOST" value="localhost" />
 8 |     <option name="PORT" value="5005" />
 9 |     <option name="AUTO_RESTART" value="true" />
10 |     <RunnerSettings RunnerId="Debug">
11 |       <option name="DEBUG_PORT" value="5005" />
12 |       <option name="LOCAL" value="false" />
13 |     </RunnerSettings>
14 |     <method v="2" />
15 |   </configuration>
16 | </component>


--------------------------------------------------------------------------------
/.idea/runConfigurations/Run_Neural_Search.xml:
--------------------------------------------------------------------------------
 1 | <component name="ProjectRunConfigurationManager">
 2 |   <configuration default="false" name="Run Neural Search" type="GradleRunConfiguration" factoryName="Gradle">
 3 |     <ExternalSystemSettings>
 4 |       <option name="executionName" />
 5 |       <option name="externalProjectPath" value="$PROJECT_DIR$" />
 6 |       <option name="externalSystemIdString" value="GRADLE" />
 7 |       <option name="scriptParameters" value="" />
 8 |       <option name="taskDescriptions">
 9 |         <list />
10 |       </option>
11 |       <option name="taskNames">
12 |         <list>
13 |           <option value=":run" />
14 |         </list>
15 |       </option>
16 |       <option name="vmOptions" />
17 |     </ExternalSystemSettings>
18 |     <ExternalSystemDebugServerProcess>true</ExternalSystemDebugServerProcess>
19 |     <ExternalSystemReattachDebugProcess>true</ExternalSystemReattachDebugProcess>
20 |     <DebugAllEnabled>false</DebugAllEnabled>
21 |     <method v="2" />
22 |   </configuration>
23 | </component>


--------------------------------------------------------------------------------
/.idea/runConfigurations/Run_With_Debug_Port.xml:
--------------------------------------------------------------------------------
 1 | <component name="ProjectRunConfigurationManager">
 2 |   <configuration default="false" name="Run With Debug Port" type="GradleRunConfiguration" factoryName="Gradle">
 3 |     <ExternalSystemSettings>
 4 |       <option name="executionName" />
 5 |       <option name="externalProjectPath" value="$PROJECT_DIR$" />
 6 |       <option name="externalSystemIdString" value="GRADLE" />
 7 |       <option name="scriptParameters" value="" />
 8 |       <option name="taskDescriptions">
 9 |         <list />
10 |       </option>
11 |       <option name="taskNames">
12 |         <list>
13 |           <option value=":run" />
14 |           <option value="--debug-jvm" />
15 |         </list>
16 |       </option>
17 |       <option name="vmOptions" />
18 |     </ExternalSystemSettings>
19 |     <ExternalSystemDebugServerProcess>true</ExternalSystemDebugServerProcess>
20 |     <ExternalSystemReattachDebugProcess>true</ExternalSystemReattachDebugProcess>
21 |     <DebugAllEnabled>false</DebugAllEnabled>
22 |     <method v="2" />
23 |   </configuration>
24 | </component>


--------------------------------------------------------------------------------
/.whitesource:
--------------------------------------------------------------------------------
 1 | {
 2 |   "scanSettings": {
 3 |     "configMode": "AUTO",
 4 |     "configExternalURL": "",
 5 |     "projectToken": "",
 6 |     "baseBranches": []
 7 |   },
 8 |   "checkRunSettings": {
 9 |     "vulnerableCheckRunConclusionLevel": "failure",
10 |     "displayMode": "diff",
11 |     "useMendCheckNames": true
12 |   },
13 |   "issueSettings": {
14 |     "minSeverityLevel": "LOW",
15 |     "issueType": "DEPENDENCY"
16 |   },
17 |   "remediateSettings": {
18 |     "workflowRules": {
19 |       "enabled": true
20 |     }
21 |   }
22 | }


--------------------------------------------------------------------------------
/ADMINS.md:
--------------------------------------------------------------------------------
 1 | ## Overview
 2 | 
 3 | This document explains who the admins are (see below), what they do in this repo, and how they should be doing it. If you're interested in becoming a maintainer, see [MAINTAINERS](MAINTAINERS.md). If you're interested in contributing, see [CONTRIBUTING](CONTRIBUTING.md).
 4 | 
 5 | ## Current Admins
 6 | 
 7 | | Admin           | GitHub ID                               | Affiliation |
 8 | | --------------- | --------------------------------------- | ----------- |
 9 | | Charlotte       | [CEHENKLE](https://github.com/CEHENKLE) | Amazon      |
10 | 
11 | ## Admin Responsibilities
12 | 
13 | As an admin you own stewartship of the repository and its settings. Admins have [admin-level permissions on a repository](https://docs.github.com/en/organizations/managing-access-to-your-organizations-repositories/repository-permission-levels-for-an-organization). Use those privileges to serve the community and protect the repository as follows.
14 | 
15 | ### Prioritize Security
16 | 
17 | Security is your number one priority. Manage security keys and safeguard access to the repository.
18 | 
19 | Note that this repository is monitored and supported 24/7 by Amazon Security, see [Reporting a Vulnerability](SECURITY.md) for details.
20 | 
21 | ### Enforce Code of Conduct
22 | 
23 | Act on [CODE_OF_CONDUCT](CODE_OF_CONDUCT.md) violations by revoking access, and blocking malicious actors.
24 | 
25 | ### Adopt Organizational Best Practices
26 | 
27 | Adopt organizational best practices, work in the open, and collaborate with other admins by opening issues before making process changes. Prefer consistency, and avoid diverging from practices in the opensearch-project organization.
28 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="https://opensearch.org/assets/img/opensearch-logo-themed.svg" height="64px">
 2 | 
 3 | [![Build and Test Neural Search](https://github.com/opensearch-project/neural-search/actions/workflows/CI.yml/badge.svg)](https://github.com/opensearch-project/neural-search/actions/workflows/CI.yml)
 4 | [![codecov](https://codecov.io/gh/opensearch-project/neural-search/branch/main/graph/badge.svg?token=PYQO2GW39S)](https://codecov.io/gh/opensearch-project/neural-search)
 5 | [![Documentation](https://img.shields.io/badge/doc-reference-blue)](https://opensearch.org/docs/latest/search-plugins/neural-search/)
 6 | [![Chat](https://img.shields.io/badge/chat-on%20forums-blue)](https://forum.opensearch.org)
 7 | ![PRs welcome!](https://img.shields.io/badge/PRs-welcome!-success)
 8 | 
 9 | ## OpenSearch Neural Search
10 | **OpenSearch Neural Search** is an OpenSearch plugin that adds dense neural retrieval into the OpenSearch ecosystem.
11 | The plugin provides the capability for indexing documents and doing neural search on the indexed documents.
12 | 
13 | ## Project Resources
14 | 
15 | * [Project Website](https://opensearch.org/)
16 | * [Downloads](https://opensearch.org/downloads.html).
17 | * [Documentation](https://opensearch.org/docs/)
18 | * Need help? Try [Forums](https://discuss.opendistrocommunity.dev/)
19 | * [Project Principles](https://opensearch.org/#principles)
20 | * [Contributing to OpenSearch](CONTRIBUTING.md)
21 | * [Maintainer Responsibilities](MAINTAINERS.md)
22 | * [Release Management](RELEASING.md)
23 | * [Admin Responsibilities](ADMINS.md)
24 | * [Security](SECURITY.md)
25 | * [Code of Conduct](#code-of-conduct)
26 | * [License](#license)
27 | * [Copyright](#copyright)
28 | 
29 | ## Code of Conduct
30 | 
31 | This project has adopted the [Amazon Open Source Code of Conduct](CODE_OF_CONDUCT.md). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq), or contact [opensource-codeofconduct@amazon.com](mailto:opensource-codeofconduct@amazon.com) with any additional questions or comments.
32 | 
33 | ## License
34 | 
35 | This project is licensed under the [Apache v2.0 License](LICENSE).
36 | 
37 | ## Copyright
38 | 
39 | Copyright OpenSearch Contributors. See [NOTICE](NOTICE) for details.
40 | 


--------------------------------------------------------------------------------
/RELEASING.md:
--------------------------------------------------------------------------------
 1 | - [Overview](#overview)
 2 | - [Branching](#branching)
 3 |     - [Release Branching](#release-branching)
 4 |     - [Feature Branches](#feature-branches)
 5 | - [Release Labels](#release-labels)
 6 | - [Releasing](#releasing)
 7 | 
 8 | ## Overview
 9 | 
10 | This document explains the release strategy for artifacts in this organization.
11 | 
12 | ## Branching
13 | 
14 | ### Release Branching
15 | 
16 | Given the current major release of 1.0, projects in this organization maintain the following active branches.
17 | 
18 | * **main**: The next _major_ release. This is the branch where all merges take place and code moves fast.
19 | * **1.x**: The next _minor_ release. Once a change is merged into `main`, decide whether to backport it to `1.x`.
20 | * **1.0**: The _current_ release. In between minor releases, only hotfixes (e.g. security) are backported to `1.0`.
21 | 
22 | Label PRs with the next major version label (e.g. `2.0.0`) and merge changes into `main`. Label PRs that you believe need to be backported as `1.x` and `1.0`. Backport PRs by checking out the versioned branch, cherry-pick changes and open a PR against each target backport branch.
23 | 
24 | ### Feature Branches
25 | 
26 | Do not creating branches in the upstream repo, use your fork, for the exception of long lasting feature branches that require active collaboration from multiple developers. Name feature branches `feature/<thing>`. Once the work is merged to `main`, please make sure to delete the feature branch.
27 | 
28 | ## Release Labels
29 | 
30 | Repositories create consistent release labels, such as `v1.0.0`, `v1.1.0` and `v2.0.0`, as well as `patch` and `backport`. Use release labels to target an issue or a PR for a given release. See [MAINTAINERS](MAINTAINERS.md#triage-open-issues) for more information on triaging issues.
31 | 
32 | ## Releasing
33 | 
34 | The release process is standard across repositories in this org and is run by a release manager volunteering from amongst [MAINTAINERS](MAINTAINERS.md).
35 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | ## Reporting a Vulnerability
2 | 
3 | If you discover a potential security issue in this project we ask that you notify OpenSearch Security directly via email to security@opensearch.org. Please do **not** create a public GitHub issue.
4 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | coverage:
 3 |   precision: 2
 4 |   round: down
 5 |   range: '70...90'
 6 |   status:
 7 |     project:
 8 |       default:
 9 |         target: auto
10 |         threshold: 5%
11 | 


--------------------------------------------------------------------------------
/formatter/license-header.txt:
--------------------------------------------------------------------------------
1 | /*
2 |  * Copyright OpenSearch Contributors
3 |  * SPDX-License-Identifier: Apache-2.0
4 |  */


--------------------------------------------------------------------------------
/gradle.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright OpenSearch Contributors
 3 | # SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | 
 6 | # The BWC version here should always be the latest opensearch version set in
 7 | # https://github.com/opensearch-project/OpenSearch/blob/main/libs/core/src/main/java/org/opensearch/Version.java .
 8 | # Wired compatibility of OpenSearch works like 3.x version is compatible with 2.(latest-major) version.
 9 | # Therefore, to run rolling-upgrade BWC Test on local machine the BWC version here should be set 2.(latest-major).
10 | systemProp.bwc.version=3.1.0-SNAPSHOT
11 | systemProp.bwc.bundle.version=3.0.0
12 | 
13 | # For fixing Spotless check with Java 17
14 | org.gradle.jvmargs=--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED \
15 |   --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED \
16 |   --add-exports jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED \
17 |   --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \
18 |   --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
19 | 


--------------------------------------------------------------------------------
/gradle/formatting.gradle:
--------------------------------------------------------------------------------
 1 | allprojects {
 2 |     project.apply plugin: "com.diffplug.spotless"
 3 |     spotless {
 4 |         java {
 5 |             // Normally this isn't necessary, but we have Java sources in
 6 |             // non-standard places
 7 |             target '**/*.java'
 8 | 
 9 |             removeUnusedImports()
10 |             eclipse().withP2Mirrors(Map.of("https://download.eclipse.org/", "https://mirror.umd.edu/eclipse/")).configFile rootProject.file('formatter/formatterConfig.xml')
11 |             trimTrailingWhitespace()
12 |             endWithNewline();
13 | 
14 |             custom 'Refuse wildcard imports', {
15 |                 // Wildcard imports can't be resolved; fail the build
16 |                 if (it =~ /\s+import .*\*;/) {
17 |                     throw new AssertionError("Do not use wildcard imports.  'spotlessApply' cannot resolve this issue.")
18 |                 }
19 |             }
20 |         }
21 |         format 'misc', {
22 |             target '*.md', '**/*.gradle', '**/*.json', '**/*.yaml', '**/*.yml', '**/*.svg', '**/*.properties'
23 | 
24 |             trimTrailingWhitespace()
25 |             endWithNewline()
26 |         }
27 |         format("license", {
28 |             licenseHeaderFile("${rootProject.file("formatter/license-header.txt")}", "package ");
29 |             target("src/*/java/**/*.java","qa/*/java/**/*.java")
30 |         })
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opensearch-project/neural-search/a6669e4cc5f69b56e6eb00105b49e71599692a48/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | #  Copyright OpenSearch Contributors
 3 | #  SPDX-License-Identifier: Apache-2.0
 4 | #
 5 | 
 6 | distributionBase=GRADLE_USER_HOME
 7 | distributionPath=wrapper/dists
 8 | distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6
 9 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip
10 | networkTimeout=10000
11 | validateDistributionUrl=true
12 | zipStoreBase=GRADLE_USER_HOME
13 | zipStorePath=wrapper/dists
14 | 


--------------------------------------------------------------------------------
/lombok.config:
--------------------------------------------------------------------------------
1 | config.stopBubbling = true
2 | lombok.addLombokGeneratedAnnotation = true
3 | lombok.nonNull.exceptionType = JDK
4 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/ChunkingIndexSettings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "settings":{
 3 |     "default_pipeline": "%s",
 4 |     "number_of_shards": 3,
 5 |     "number_of_replicas": 1
 6 |   },
 7 |   "mappings": {
 8 |     "properties": {
 9 |       "body": {
10 |         "type": "text"
11 |       },
12 |       "body_chunk": {
13 |         "type": "text"
14 |       }
15 |     }
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/CreateModelGroupRequestBody.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "%s",
3 |   "description": "This is a public model group"
4 | }
5 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/IndexMappingMultipleShard.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "settings": {
 3 |     "index": {
 4 |       "knn": true,
 5 |       "refresh_interval": "30s",
 6 |       "default_pipeline": "%s"
 7 |     },
 8 |     "number_of_shards": 3,
 9 |     "number_of_replicas": 1
10 |   },
11 |   "mappings": {
12 |     "properties": {
13 |       "passage_embedding": {
14 |         "type": "knn_vector",
15 |         "dimension": 768,
16 |         "method": {
17 |           "name": "hnsw",
18 |           "space_type": "l2",
19 |           "engine": "lucene",
20 |           "parameters": {
21 |             "ef_construction": 128,
22 |             "m": 24
23 |           }
24 |         }
25 |       },
26 |       "passage_text": {
27 |         "type": "text"
28 |       },
29 |       "passage_image": {
30 |         "type": "text"
31 |       }
32 |     }
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/IndexMappingSingleShard.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "settings": {
 3 |     "index": {
 4 |       "knn": true,
 5 |       "refresh_interval": "30s",
 6 |       "default_pipeline": "%s"
 7 |     },
 8 |     "number_of_shards": 1,
 9 |     "number_of_replicas": 0
10 |   },
11 |   "mappings": {
12 |     "properties": {
13 |       "passage_embedding": {
14 |         "type": "knn_vector",
15 |         "dimension": 768,
16 |         "method": {
17 |           "name": "hnsw",
18 |           "space_type": "l2",
19 |           "engine": "lucene",
20 |           "parameters": {
21 |             "ef_construction": 128,
22 |             "m": 24
23 |           }
24 |         }
25 |       },
26 |       "passage_text": {
27 |         "type": "text"
28 |       },
29 |       "passage_image": {
30 |         "type": "text"
31 |       }
32 |     }
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "request_processors": [
 3 |     {
 4 |       "neural_sparse_two_phase_processor": {
 5 |         "tag": "neural-sparse",
 6 |         "description": "This processor is making two-phase rescorer.",
 7 |         "enabled": true,
 8 |         "two_phase_parameter": {
 9 |           "prune_ratio": %f,
10 |           "expansion_rate": %f,
11 |           "max_window_size": %d
12 |         }
13 |       }
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/PipelineConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text embedding pipeline for hybrid",
 3 |   "processors": [
 4 |     {
 5 |       "text_embedding": {
 6 |         "model_id": "%s",
 7 |         "field_map": {
 8 |           "passage_text": "passage_embedding"
 9 |         }
10 |       }
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An sparse encoding ingest pipeline",
 3 |   "processors": [
 4 |     {
 5 |       "sparse_encoding": {
 6 |         "model_id": "%s",
 7 |         "field_map": {
 8 |           "passage_text": "passage_embedding"
 9 |         }
10 |       }
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An example fixed token length chunker pipeline with standard tokenizer",
 3 |   "processors" : [
 4 |     {
 5 |       "text_chunking": {
 6 |         "field_map": {
 7 |           "body": "body_chunk"
 8 |         },
 9 |         "algorithm": {
10 |           "fixed_token_length": {
11 |             "token_limit": 10,
12 |             "tokenizer": "standard"
13 |           }
14 |         }
15 |       }
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/PipelineForTextImageProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text image embedding pipeline",
 3 |   "processors": [
 4 |     {
 5 |       "text_image_embedding": {
 6 |         "model_id": "%s",
 7 |         "embedding": "passage_embedding",
 8 |         "field_map": {
 9 |           "text": "passage_text",
10 |           "image": "passage_image"
11 |         }
12 |       }
13 |     }
14 |   ]
15 | }
16 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/SearchRequestPipelineConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "request_processors": [
 3 |     {
 4 |       "neural_query_enricher": {
 5 |         "tag": "tag1",
 6 |         "description": "This processor is going to restrict to publicly visible documents",
 7 |         "default_model_id": "%s"
 8 |       }
 9 |     }
10 |   ]
11 | }
12 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/SparseIndexMappings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "settings": {
 3 |     "default_pipeline": "%s",
 4 |     "number_of_shards": 3,
 5 |     "number_of_replicas": 1
 6 |   },
 7 |   "mappings": {
 8 |     "properties": {
 9 |       "passage_embedding": {
10 |         "type": "rank_features"
11 |       },
12 |       "passage_text": {
13 |         "type": "text"
14 |       }
15 |     }
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/UploadModelRequestBody.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "traced_small_model",
 3 |   "version": "1.0.0",
 4 |   "model_format": "TORCH_SCRIPT",
 5 |   "function_name": "TEXT_EMBEDDING",
 6 |   "model_task_type": "text_embedding",
 7 |   "model_content_hash_value": "e13b74006290a9d0f58c1376f9629d4ebc05a0f9385f40db837452b167ae9021",
 8 |   "model_group_id": "%s",
 9 |   "model_config": {
10 |     "model_type": "bert",
11 |     "embedding_dimension": 768,
12 |     "framework_type": "sentence_transformers",
13 |     "all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}"
14 |   },
15 |   "url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true"
16 | }
17 | 


--------------------------------------------------------------------------------
/qa/restart-upgrade/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "tokenize-idf-0915",
 3 |     "version": "1.0.0",
 4 |     "function_name": "SPARSE_TOKENIZE",
 5 |     "description": "test model",
 6 |     "model_format": "TORCH_SCRIPT",
 7 |     "model_group_id": "%s",
 8 |     "model_content_hash_value": "b345e9e943b62c405a8dd227ef2c46c84c5ff0a0b71b584be9132b37bce91a9a",
 9 |     "url": "https://github.com/opensearch-project/ml-commons/raw/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/sparse_encoding/sparse_demo.zip"
10 |   }
11 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/ChunkingIndexSettings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "settings":{
 3 |     "default_pipeline": "%s",
 4 |     "number_of_shards": 3,
 5 |     "number_of_replicas": 1
 6 |   },
 7 |   "mappings": {
 8 |     "properties": {
 9 |       "body": {
10 |         "type": "text"
11 |       },
12 |       "body_chunk": {
13 |         "type": "text"
14 |       }
15 |     }
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/CreateModelGroupRequestBody.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "%s",
3 |   "description": "This is a public model group"
4 | }
5 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/IndexMappings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "settings": {
 3 |     "index": {
 4 |       "knn": true,
 5 |       "refresh_interval": "30s",
 6 |       "default_pipeline": "%s"
 7 |     },
 8 |     "number_of_shards": 3,
 9 |     "number_of_replicas": 1
10 |   },
11 |   "mappings": {
12 |     "properties": {
13 |       "passage_embedding": {
14 |         "type": "knn_vector",
15 |         "dimension": 768,
16 |         "method": {
17 |           "name": "hnsw",
18 |           "space_type": "l2",
19 |           "engine": "lucene",
20 |           "parameters": {
21 |             "ef_construction": 128,
22 |             "m": 24
23 |           }
24 |         }
25 |       },
26 |       "passage_text": {
27 |         "type": "text"
28 |       },
29 |       "passage_image": {
30 |         "type": "text"
31 |       }
32 |     }
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "request_processors": [
 3 |     {
 4 |       "neural_sparse_two_phase_processor": {
 5 |         "tag": "neural-sparse",
 6 |         "description": "This processor is making two-phase rescorer.",
 7 |         "enabled": true,
 8 |         "two_phase_parameter": {
 9 |           "prune_ratio": %f,
10 |           "expansion_rate": %f,
11 |           "max_window_size": %d
12 |         }
13 |       }
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/PipelineConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text embedding pipeline for hybrid",
 3 |   "processors": [
 4 |     {
 5 |       "text_embedding": {
 6 |         "model_id": "%s",
 7 |         "field_map": {
 8 |           "passage_text": "passage_embedding"
 9 |         }
10 |       }
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An sparse encoding ingest pipeline",
 3 |   "processors": [
 4 |     {
 5 |       "sparse_encoding": {
 6 |         "model_id": "%s",
 7 |         "batch_size": "%d",
 8 |         "field_map": {
 9 |           "passage_text": "passage_embedding"
10 |         }
11 |       }
12 |     }
13 |   ]
14 | }
15 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/PipelineForTextChunkingProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An example fixed token length chunker pipeline with standard tokenizer",
 3 |   "processors" : [
 4 |     {
 5 |       "text_chunking": {
 6 |         "field_map": {
 7 |           "body": "body_chunk"
 8 |         },
 9 |         "algorithm": {
10 |           "fixed_token_length": {
11 |             "token_limit": 10,
12 |             "tokenizer": "standard"
13 |           }
14 |         }
15 |       }
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/PipelineForTextImageProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text image embedding pipeline",
 3 |   "processors": [
 4 |     {
 5 |       "text_image_embedding": {
 6 |         "model_id": "%s",
 7 |         "embedding": "passage_embedding",
 8 |         "field_map": {
 9 |           "text": "passage_text",
10 |           "image": "passage_image"
11 |         }
12 |       }
13 |     }
14 |   ]
15 | }
16 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/SearchRequestPipelineConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "request_processors": [
 3 |     {
 4 |       "neural_query_enricher": {
 5 |         "tag": "tag1",
 6 |         "description": "This processor is going to restrict to publicly visible documents",
 7 |         "default_model_id": "%s"
 8 |       }
 9 |     }
10 |   ]
11 | }
12 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/SparseIndexMappings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "settings": {
 3 |       "default_pipeline": "%s",
 4 |       "number_of_shards": 3,
 5 |       "number_of_replicas": 1
 6 |     },
 7 |     "mappings": {
 8 |       "properties": {
 9 |         "passage_embedding": {
10 |           "type": "rank_features"
11 |         },
12 |         "passage_text": {
13 |           "type": "text"
14 |         }
15 |       }
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/UploadModelRequestBody.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "traced_small_model",
 3 |   "version": "1.0.0",
 4 |   "model_format": "TORCH_SCRIPT",
 5 |   "function_name": "TEXT_EMBEDDING",
 6 |   "model_task_type": "text_embedding",
 7 |   "model_content_hash_value": "e13b74006290a9d0f58c1376f9629d4ebc05a0f9385f40db837452b167ae9021",
 8 |   "model_group_id": "%s",
 9 |   "model_config": {
10 |     "model_type": "bert",
11 |     "embedding_dimension": 768,
12 |     "framework_type": "sentence_transformers",
13 |     "all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}"
14 |   },
15 |   "url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true"
16 | }
17 | 


--------------------------------------------------------------------------------
/qa/rolling-upgrade/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "tokenize-idf-0915",
 3 |     "version": "1.0.0",
 4 |     "function_name": "SPARSE_TOKENIZE",
 5 |     "description": "test model",
 6 |     "model_format": "TORCH_SCRIPT",
 7 |     "model_group_id": "%s",
 8 |     "model_content_hash_value": "b345e9e943b62c405a8dd227ef2c46c84c5ff0a0b71b584be9132b37bce91a9a",
 9 |     "url": "https://github.com/opensearch-project/ml-commons/raw/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/sparse_encoding/sparse_demo.zip"
10 | }
11 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.10.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.10.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.10.0
 4 | 
 5 | ### Features
 6 | * Improved Hybrid Search relevancy by Score Normalization and Combination ([#241](https://github.com/opensearch-project/neural-search/pull/241/))
 7 | 
 8 | ### Enhancements
 9 | * Changed format for hybrid query results to a single list of scores with delimiter ([#259](https://github.com/opensearch-project/neural-search/pull/259))
10 | * Added validations for score combination weights in Hybrid Search ([#265](https://github.com/opensearch-project/neural-search/pull/265))
11 | * Made hybrid search active by default ([#274](https://github.com/opensearch-project/neural-search/pull/274))
12 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.11.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.11.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.11.0
 4 | 
 5 | ### Features
 6 | * Support sparse semantic retrieval by introducing `sparse_encoding` ingest processor and query builder ([#333](https://github.com/opensearch-project/neural-search/pull/333))
 7 | * Enabled support for applying default modelId in neural search query ([#337](https://github.com/opensearch-project/neural-search/pull/337)
 8 | ### Bug Fixes
 9 | * Fixed exception in Hybrid Query for one shard and multiple node ([#396](https://github.com/opensearch-project/neural-search/pull/396))
10 | ### Maintenance
11 | * Consumed latest changes from core, use QueryPhaseSearcherWrapper as parent class for Hybrid QPS ([#356](https://github.com/opensearch-project/neural-search/pull/356))
12 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.12.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.12.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.12.0
 4 | 
 5 | ### Features
 6 | - Add rerank processor interface and ml-commons reranker ([#494](https://github.com/opensearch-project/neural-search/pull/494))
 7 | ### Bug Fixes
 8 | - Fixing multiple issues reported in #497 ([#524](https://github.com/opensearch-project/neural-search/pull/524))
 9 | - Fix Flaky test reported in #433 ([#533](https://github.com/opensearch-project/neural-search/pull/533))
10 | - Enable support for default model id on HybridQueryBuilder ([#541](https://github.com/opensearch-project/neural-search/pull/541))
11 | - Fix Flaky test reported in #384 ([#559](https://github.com/opensearch-project/neural-search/pull/559))
12 | - Add validations for reranker requests per #555 ([#562](https://github.com/opensearch-project/neural-search/pull/562))
13 | ### Infrastructure
14 | - BWC tests for Neural Search ([#515](https://github.com/opensearch-project/neural-search/pull/515))
15 | - Github action to run integ tests in secure opensearch cluster ([#535](https://github.com/opensearch-project/neural-search/pull/535))
16 | - BWC tests for Multimodal search, Hybrid Search and Neural Sparse Search ([#533](https://github.com/opensearch-project/neural-search/pull/533))
17 | - Distribution bundle bwc tests ([#579])(https://github.com/opensearch-project/neural-search/pull/579)
18 | ### Maintenance
19 | - Update spotless and eclipse dependencies ([#589](https://github.com/opensearch-project/neural-search/pull/589))
20 | ### Refactoring
21 | - Added spotless check in the build ([#515](https://github.com/opensearch-project/neural-search/pull/515))


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.13.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.13.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.13.0
 4 | 
 5 | ### Features
 6 | - Implement document chunking processor with fixed token length and delimiter algorithm ([#607](https://github.com/opensearch-project/neural-search/pull/607/))
 7 | - Enabled support for applying default modelId in neural sparse query ([#614](https://github.com/opensearch-project/neural-search/pull/614)
 8 | ### Enhancements
 9 | - Adding aggregations in hybrid query ([#630](https://github.com/opensearch-project/neural-search/pull/630))
10 | - Support for post filter in hybrid query ([#633](https://github.com/opensearch-project/neural-search/pull/633))
11 | ### Bug Fixes
12 | - Fix runtime exceptions in hybrid query for case when sub-query scorer return TwoPhase iterator that is incompatible with DISI iterator ([#624](https://github.com/opensearch-project/neural-search/pull/624))


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.14.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.14.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.14.0
 4 | 
 5 | ### Features
 6 | * Support k-NN radial search parameters in neural search([#697](https://github.com/opensearch-project/neural-search/pull/697))
 7 | ### Enhancements
 8 | * BWC tests for text chunking processor ([#661](https://github.com/opensearch-project/neural-search/pull/661))
 9 | * Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
10 | * Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))
11 | * Allowing query by raw tokens in neural_sparse query ([#693](https://github.com/opensearch-project/neural-search/pull/693))
12 | * Removed stream.findFirst implementation to use more native iteration implement to improve hybrid query latencies by 35% ([#706](https://github.com/opensearch-project/neural-search/pull/706))
13 | * Removed map of subquery to subquery index in favor of storing index as part of disi wrapper to improve hybrid query latencies by 20% ([#711](https://github.com/opensearch-project/neural-search/pull/711))
14 | * Avoid change max_chunk_limit exceed exception in text chunking processor ([#717](https://github.com/opensearch-project/neural-search/pull/717))
15 | ### Bug Fixes
16 | * Fix async actions are left in neural_sparse query ([#438](https://github.com/opensearch-project/neural-search/pull/438))
17 | * Fix typo for sparse encoding processor factory([#578](https://github.com/opensearch-project/neural-search/pull/578))
18 | * Add non-null check for queryBuilder in NeuralQueryEnricherProcessor ([#615](https://github.com/opensearch-project/neural-search/pull/615))
19 | * Add max_token_score field placeholder in NeuralSparseQueryBuilder to fix the rolling-upgrade from 2.x nodes bwc tests. ([#696](https://github.com/opensearch-project/neural-search/pull/696))
20 | * Fix multi node "no such index" error in text chunking processor. ([#713](https://github.com/opensearch-project/neural-search/pull/713))
21 | ### Infrastructure
22 | * Adding integration tests for scenario of hybrid query with aggregations ([#632](https://github.com/opensearch-project/neural-search/pull/632))
23 | ### Maintenance
24 | * Update bwc tests for neural_query_enricher neural_sparse search ([#652](https://github.com/opensearch-project/neural-search/pull/652))
25 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.15.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.15.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.15.0
 4 | 
 5 | ### Features
 6 | * Speed up NeuralSparseQuery by two-phase using a custom search pipeline.([#646](https://github.com/opensearch-project/neural-search/issues/646))
 7 | * Support batchExecute in TextEmbeddingProcessor and SparseEncodingProcessor ([#743](https://github.com/opensearch-project/neural-search/issues/743))
 8 | ### Enhancements
 9 | * Pass empty doc collector instead of top docs collector to improve hybrid query latencies by 20% ([#731](https://github.com/opensearch-project/neural-search/pull/731))
10 | * Optimize parameter parsing in text chunking processor ([#733](https://github.com/opensearch-project/neural-search/pull/733))
11 | * Use lazy initialization for priority queue of hits and scores to improve latencies by 20% ([#746](https://github.com/opensearch-project/neural-search/pull/746))
12 | * Optimize max score calculation in the Query Phase of the Hybrid Search ([765](https://github.com/opensearch-project/neural-search/pull/765))
13 | * Implement parallel execution of sub-queries for hybrid search ([#749](https://github.com/opensearch-project/neural-search/pull/749))
14 | ### Bug Fixes
15 | * Total hit count fix in Hybrid Query ([756](https://github.com/opensearch-project/neural-search/pull/756))
16 | * Fix map type validation issue in multiple pipeline processors ([#661](https://github.com/opensearch-project/neural-search/pull/661))
17 | ### Infrastructure
18 | * Disable memory circuit breaker for integ tests ([#770](https://github.com/opensearch-project/neural-search/pull/770))
19 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.16.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.16.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.16.0
 4 | 
 5 | ### Features
 6 | - Enable sorting and search_after features in Hybrid Search [#827](https://github.com/opensearch-project/neural-search/pull/827)
 7 | ### Enhancements
 8 | - InferenceProcessor inherits from AbstractBatchingProcessor to support sub batching in processor [#820](https://github.com/opensearch-project/neural-search/pull/820)
 9 | - Adds dynamic knn query parameters efsearch and nprobes [#814](https://github.com/opensearch-project/neural-search/pull/814/)
10 | - Enable '.' for nested field in text embedding processor ([#811](https://github.com/opensearch-project/neural-search/pull/811))
11 | - Enhance syntax for nested mapping in destination fields([#841](https://github.com/opensearch-project/neural-search/pull/841))
12 | ### Bug Fixes
13 | - Fix function names and comments in the gradle file for BWC tests ([#795](https://github.com/opensearch-project/neural-search/pull/795/files))
14 | - Fix for missing HybridQuery results when concurrent segment search is enabled ([#800](https://github.com/opensearch-project/neural-search/pull/800))
15 | ### Infrastructure
16 | - Add BWC for batch ingestion ([#769](https://github.com/opensearch-project/neural-search/pull/769))
17 | - Add backward test cases for neural sparse two phase processor ([#777](https://github.com/opensearch-project/neural-search/pull/777))
18 | - Fix CI for JDK upgrade towards 21 ([#835](https://github.com/opensearch-project/neural-search/pull/835))
19 | - Maven publishing workflow by upgrade jdk to 21 ([#837](https://github.com/opensearch-project/neural-search/pull/837))


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.17.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.17.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.17.0
 4 | 
 5 | ### Enhancements
 6 | - Adds rescore parameter support ([#885](https://github.com/opensearch-project/neural-search/pull/885))
 7 | ### Bug Fixes
 8 | - Removing code to cut search results of hybrid search in the priority queue ([#867](https://github.com/opensearch-project/neural-search/pull/867))
 9 | - Fixed merge logic in hybrid query for multiple shards case ([#877](https://github.com/opensearch-project/neural-search/pull/877))
10 | ### Infrastructure
11 | - Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852))


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.18.0.0.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Version 2.18.0.0 Release Notes
 3 | 
 4 | Compatible with OpenSearch 2.18.0
 5 | 
 6 | ### Features
 7 | - Introduces ByFieldRerankProcessor for second level reranking on documents ([#932](https://github.com/opensearch-project/neural-search/pull/932))
 8 | ### Bug Fixes
 9 | - Fixed incorrect document order for nested aggregations in hybrid query ([#956](https://github.com/opensearch-project/neural-search/pull/956))
10 | ### Enhancements
11 | - Implement `ignore_missing` field in text chunking processors ([#907](https://github.com/opensearch-project/neural-search/pull/907))
12 | - Added rescorer in hybrid query ([#917](https://github.com/opensearch-project/neural-search/pull/917))
13 | 
14 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.19.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.19.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.19.0
 4 | 
 5 | ### Features
 6 | * Pagination in Hybrid query ([#1048](https://github.com/opensearch-project/neural-search/pull/1048))
 7 | * Implement Reciprocal Rank Fusion score normalization/combination technique in hybrid query ([#874](https://github.com/opensearch-project/neural-search/pull/874))
 8 | ### Bug Fixes
 9 | * Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998))
10 | * Fix bug where ingested document has list of nested objects ([#1040](https://github.com/opensearch-project/neural-search/pull/1040))
11 | * Fixed document source and score field mismatch in sorted hybrid queries ([#1043](https://github.com/opensearch-project/neural-search/pull/1043))
12 | * Update NeuralQueryBuilder doEquals() and doHashCode() to cater the missing parameters information ([#1045](https://github.com/opensearch-project/neural-search/pull/1045)).
13 | * Fix bug where embedding is missing when ingested document has "." in field name, and mismatches fieldMap config ([#1062](https://github.com/opensearch-project/neural-search/pull/1062))
14 | ### Enhancements
15 | * Explainability in hybrid query ([#970](https://github.com/opensearch-project/neural-search/pull/970))
16 | * Support new knn query parameter expand_nested ([#1013](https://github.com/opensearch-project/neural-search/pull/1013))
17 | * Implement pruning for neural sparse ingestion pipeline and two phase search processor ([#988](https://github.com/opensearch-project/neural-search/pull/988))
18 | * Support empty string for fields in text embedding processor ([#1041](https://github.com/opensearch-project/neural-search/pull/1041))
19 | * Optimize ML inference connection retry logic ([#1054](https://github.com/opensearch-project/neural-search/pull/1054))
20 | * Support for builder constructor in Neural Query Builder ([#1047](https://github.com/opensearch-project/neural-search/pull/1047))
21 | * Validate Disjunction query to avoid having nested hybrid query ([#1127](https://github.com/opensearch-project/neural-search/pull/1127))
22 | ### Maintenance
23 | * Add reindex integration tests for ingest processors ([#1075](https://github.com/opensearch-project/neural-search/pull/1075))
24 | * Fix github CI by adding eclipse dependency in formatting.gradle ([#1079](https://github.com/opensearch-project/neural-search/pull/1079))


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.4.1.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.4.1.0 Release Notes
2 | 
3 | Compatible with OpenSearch 2.4.1
4 | 
5 | ### Bug Fixes
6 | 
7 | * Change the behavior when embedding fields are not present ([#72](https://github.com/opensearch-project/neural-search/pull/72))
8 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.5.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.5.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.5.0
 4 | 
 5 | ### Enhancements
 6 | 
 7 | * Add filter option for query type ([#88](https://github.com/opensearch-project/neural-search/pull/88))
 8 | * Add retry mechanism for neural search inference ([#91](https://github.com/opensearch-project/neural-search/pull/91))
 9 | * Enable core branching strategy and make Neural Plugin as extensible plugin. ([#87](https://github.com/opensearch-project/neural-search/pull/87))
10 | 
11 | ### Documentation
12 | 
13 | * Update MAINTAINERS.md format ([#95](https://github.com/opensearch-project/neural-search/pull/95))
14 | * Use short-form MAINTAINERS.md ([#84](https://github.com/opensearch-project/neural-search/pull/84))
15 | 
16 | ### Refactoring
17 | 
18 | * Remove unused MLPredict Transport action from src ([#94](https://github.com/opensearch-project/neural-search/pull/94))
19 | 
20 | ### Maintenance
21 | 
22 | * Increment version to 2.5.0-SNAPSHOT ([#76](https://github.com/opensearch-project/neural-search/pull/76))
23 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.6.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.6.0.0 Release Notes
2 | 
3 | Compatible with OpenSearch 2.6.0
4 | 
5 | ### Maintenance
6 | 
7 | * Increment version to 2.6.0-SNAPSHOT ([#117](https://github.com/opensearch-project/neural-search/pull/117))
8 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.7.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.7.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.7.0
 4 | 
 5 | ### Infrastructure
 6 | 
 7 | * Add GHA to publish to maven repository ([#237](https://github.com/opensearch-project/neural-search/pull/130))
 8 | * Add reflection dependency ([#136](https://github.com/opensearch-project/neural-search/pull/136))
 9 | * Add CHANGELOG ([#135](https://github.com/opensearch-project/neural-search/pull/135))
10 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.8.0.0.md:
--------------------------------------------------------------------------------
1 | ## Version 2.8.0.0 Release Notes
2 | 
3 | Compatible with OpenSearch 2.8.0
4 | 
5 | ### Infrastructure
6 | 
7 | * Bump gradle version to 8.1.1 ([#169](https://github.com/opensearch-project/neural-search/pull/169))
8 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-2.9.0.0.md:
--------------------------------------------------------------------------------
 1 | ## Version 2.9.0.0 Release Notes
 2 | 
 3 | Compatible with OpenSearch 2.9.0
 4 | 
 5 | ### Maintenance
 6 | Increment version to 2.9.0-SNAPSHOT ([#191](https://github.com/opensearch-project/neural-search/pull/191))
 7 | 
 8 | ### Bug Fixes
 9 | Fix update document with knnn_vector size not matching issue ([#208](https://github.com/opensearch-project/neural-search/pull/208))
10 | 


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-3.0.0.0-alpha1.md:
--------------------------------------------------------------------------------
 1 | ## Version 3.0.0.0-alpha1 Release Notes
 2 | 
 3 | Compatible with OpenSearch 3.0.0-alpha1
 4 | 
 5 | ### Enhancements
 6 | - Set neural-search plugin 3.0.0 baseline JDK version to JDK-21 ([#838](https://github.com/opensearch-project/neural-search/pull/838))
 7 | - Support different embedding types in model's response ([#1007](https://github.com/opensearch-project/neural-search/pull/1007))
 8 | ### Bug Fixes
 9 | - Fix a bug to unflatten the doc with list of map with multiple entries correctly ([#1204](https://github.com/opensearch-project/neural-search/pull/1204)).
10 | ### Infrastructure
11 | - [3.0] Update neural-search for OpenSearch 3.0 compatibility ([#1141](https://github.com/opensearch-project/neural-search/pull/1141))
12 | ### Refactoring
13 | - Encapsulate KNNQueryBuilder creation within NeuralKNNQueryBuilder ([#1183](https://github.com/opensearch-project/neural-search/pull/1183))
14 | ### Documentation
15 | - Adding code guidelines ([#502](https://github.com/opensearch-project/neural-search/pull/502))


--------------------------------------------------------------------------------
/release-notes/opensearch-neural-search.release-notes-3.0.0.0-beta1.md:
--------------------------------------------------------------------------------
 1 | ## Version 3.0.0.0-beta1 Release Notes
 2 | 
 3 | Compatible with OpenSearch 3.0.0-beta1
 4 | 
 5 | ### Features
 6 | - Lower bound for min-max normalization technique in hybrid query ([#1195](https://github.com/opensearch-project/neural-search/pull/1195))
 7 | - Support filter function for HybridQueryBuilder and NeuralQueryBuilder ([#1206](https://github.com/opensearch-project/neural-search/pull/1206))
 8 | - Add Z Score normalization technique ([#1224](https://github.com/opensearch-project/neural-search/pull/1224))
 9 | - Support semantic sentence highlighter ([#1193](https://github.com/opensearch-project/neural-search/pull/1193))
10 | - Optimize embedding generation in Text Embedding Processor ([#1191](https://github.com/opensearch-project/neural-search/pull/1191))
11 | - Optimize embedding generation in Sparse Encoding Processor ([#1246](https://github.com/opensearch-project/neural-search/pull/1246))
12 | - Optimize embedding generation in Text/Image Embedding Processor ([#1249](https://github.com/opensearch-project/neural-search/pull/1249))
13 | - Inner hits support with hybrid query ([#1253](https://github.com/opensearch-project/neural-search/pull/1253))
14 | - Support custom tags in semantic highlighter ([#1254](https://github.com/opensearch-project/neural-search/pull/1254))
15 | - Add stats API ([#1256](https://github.com/opensearch-project/neural-search/pull/1256))
16 | 
17 | ### Bug Fixes
18 | - Remove validations for unmapped fields (text and image) in TextImageEmbeddingProcessor ([#1230](https://github.com/opensearch-project/neural-search/pull/1230))
19 | 
20 | ### Infrastructure
21 | - [3.0] Update neural-search for OpenSearch 3.0 beta compatibility ([#1245](https://github.com/opensearch-project/neural-search/pull/1245))
22 | 


--------------------------------------------------------------------------------
/repositories.gradle:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright OpenSearch Contributors
 3 |  *  SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | 
 6 | repositories {
 7 |     mavenLocal()
 8 |     maven { url "https://aws.oss.sonatype.org/content/repositories/snapshots" }
 9 |     mavenCentral()
10 |     maven { url "https://plugins.gradle.org/m2/" }
11 | }
12 | 


--------------------------------------------------------------------------------
/settings.gradle:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file was generated by the Gradle 'init' task.
 3 |  *
 4 |  * The settings file is used to specify which projects to include in your build.
 5 |  *
 6 |  * Detailed information about configuring a multi-project build in Gradle can be found
 7 |  * in the user manual at https://docs.gradle.org/7.5.1/userguide/multi_project_builds.html
 8 |  * This project uses @Incubating APIs which are subject to change.
 9 |  */
10 | 
11 | rootProject.name = 'neural-search'
12 | 
13 | include ":qa"
14 | include ":qa:rolling-upgrade"
15 | include ":qa:restart-upgrade"
16 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/common/VectorUtil.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.common;
 6 | 
 7 | import java.util.List;
 8 | 
 9 | import lombok.AccessLevel;
10 | import lombok.NoArgsConstructor;
11 | 
12 | /**
13 |  * Utility class for working with vectors
14 |  */
15 | @NoArgsConstructor(access = AccessLevel.PRIVATE)
16 | public class VectorUtil {
17 | 
18 |     /**
19 |      * Converts a vector represented as a list to an array
20 |      *
21 |      * @param vectorAsList {@link List} of {@link Float}'s representing the vector
22 |      * @return array of floats produced from input list
23 |      */
24 |     public static float[] vectorAsListToArray(List<Number> vectorAsList) {
25 |         float[] vector = new float[vectorAsList.size()];
26 |         for (int i = 0; i < vectorAsList.size(); i++) {
27 |             vector[i] = vectorAsList.get(i).floatValue();
28 |         }
29 |         return vector;
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/constants/MappingConstants.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.constants;
 6 | 
 7 | /**
 8 |  * Constants related to the index mapping.
 9 |  */
10 | public class MappingConstants {
11 |     /**
12 |      * Name for the field type. In index mapping we use this key to define the field type.
13 |      */
14 |     public static final String TYPE = "type";
15 |     /**
16 |      * Name for doc. Actions like create index and legacy create/update index template will have the
17 |      * mapping properties under a _doc key.
18 |      */
19 |     public static final String DOC = "_doc";
20 |     /**
21 |      * Name for properties. An object field will define subfields as properties.
22 |      */
23 |     public static final String PROPERTIES = "properties";
24 | 
25 |     /**
26 |      * Separator in a field path.
27 |      */
28 |     public static final String PATH_SEPARATOR = ".";
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/constants/SemanticFieldConstants.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.constants;
 6 | 
 7 | /**
 8 |  * Constants for semantic field
 9 |  */
10 | public class SemanticFieldConstants {
11 |     /**
12 |      * Name of the model id parameter. We use this key to define the id of the ML model that we will use for the
13 |      * semantic field.
14 |      */
15 |     public static final String MODEL_ID = "model_id";
16 | 
17 |     /**
18 |      * Name of the search model id parameter. We use this key to define the id of the ML model that we will use to
19 |      * inference the query text during the search. If this parameter is not defined we will use the model_id instead.
20 |      */
21 |     public static final String SEARCH_MODEL_ID = "search_model_id";
22 | 
23 |     /**
24 |      * Name of the raw field type parameter. We use this key to define the field type for the raw data. It will control
25 |      * how to store and query the raw data.
26 |      */
27 |     public static final String RAW_FIELD_TYPE = "raw_field_type";
28 | 
29 |     /**
30 |      * Name of the raw field type parameter. We use this key to define a custom field name for the semantic info.
31 |      */
32 |     public static final String SEMANTIC_INFO_FIELD_NAME = "semantic_info_field_name";
33 | 
34 |     /**
35 |      * Default suffix for semantic info field name. It will be used to construct the field name of the semantic info.
36 |      */
37 |     public static final String DEFAULT_SEMANTIC_INFO_FIELD_NAME_SUFFIX = "_semantic_info";
38 | 
39 |     /**
40 |      * Name of the field to control if we should do chunking for the semantic field. By default, the chunking is
41 |      * disabled to not downgrade the search performance.
42 |      */
43 |     public static final String CHUNKING = "chunking";
44 | 
45 |     /**
46 |      * Name of the field for search analyzer parameter. With this field set up, user has not to specify
47 |      * it during query time.
48 |      */
49 |     public static final String SEMANTIC_FIELD_SEARCH_ANALYZER = "semantic_field_search_analyzer";
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/executors/HybridQueryExecutorCollector.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.executors;
 6 | 
 7 | import lombok.AccessLevel;
 8 | import lombok.Getter;
 9 | import lombok.RequiredArgsConstructor;
10 | import lombok.Synchronized;
11 | 
12 | import java.util.Optional;
13 | import java.util.function.Function;
14 | 
15 | /**
16 |  * {@link HybridQueryExecutorCollector} is a generic Collector used by Hybrid Search Query during
17 |  * Query phase to parallelize sub query's action to improve latency
18 |  */
19 | @RequiredArgsConstructor(staticName = "newCollector", access = AccessLevel.PACKAGE)
20 | public final class HybridQueryExecutorCollector<I, R> {
21 | 
22 |     // will be used as input for all instances of collector generated by newCollector method,
23 |     // if it is required for collect operation
24 |     private final I param;
25 | 
26 |     // getResult should only be called after collector's collect method is invoked.
27 |     @Getter(onMethod_ = { @Synchronized })
28 |     private Optional<R> result = Optional.empty();
29 | 
30 |     /**
31 |      * Called once for every time an action has to be performed on this Collector
32 |      * @param action function that will be executed and result will be stored at result.
33 |      */
34 |     @Synchronized
35 |     public void collect(Function<I, R> action) {
36 |         result = Optional.ofNullable(action.apply(param));
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/executors/HybridQueryExecutorCollectorManager.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.executors;
 6 | 
 7 | /**
 8 |  * {@link HybridQueryExecutorCollectorManager} is responsible for creating new {@link HybridQueryExecutorCollector} instances
 9 |  */
10 | public interface HybridQueryExecutorCollectorManager<C extends HybridQueryExecutorCollector> {
11 |     /**
12 |      * Return a new Collector instance that extends {@link HybridQueryExecutor}.
13 |      * This will be used during Hybrid Search when sub queries wants to execute part of
14 |      * operation that is independent of each other that can be parallelized to improve
15 |      * the performance.
16 |      * @return HybridQueryExecutorCollector
17 |      */
18 |     C newCollector();
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/BooleanQueryTextExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.highlight.extractor;
 6 | 
 7 | import org.apache.lucene.search.BooleanClause;
 8 | import org.apache.lucene.search.BooleanQuery;
 9 | import org.apache.lucene.search.Query;
10 | 
11 | import lombok.extern.log4j.Log4j2;
12 | 
13 | /**
14 |  * Extractor for boolean queries
15 |  */
16 | @Log4j2
17 | public class BooleanQueryTextExtractor implements QueryTextExtractor {
18 | 
19 |     private final QueryTextExtractorRegistry registry;
20 | 
21 |     public BooleanQueryTextExtractor(QueryTextExtractorRegistry registry) {
22 |         this.registry = registry;
23 |     }
24 | 
25 |     @Override
26 |     public String extractQueryText(Query query, String fieldName) {
27 |         BooleanQuery booleanQuery = toQueryType(query, BooleanQuery.class);
28 | 
29 |         StringBuilder sb = new StringBuilder();
30 | 
31 |         for (BooleanClause clause : booleanQuery.clauses()) {
32 |             // Skip MUST_NOT clauses as they represent negative terms
33 |             if (clause.isProhibited()) {
34 |                 continue;
35 |             }
36 | 
37 |             try {
38 |                 String clauseText = registry.extractQueryText(clause.query(), fieldName);
39 |                 if (clauseText.isEmpty() == false) {
40 |                     if (sb.isEmpty() == false) {
41 |                         sb.append(" ");
42 |                     }
43 |                     sb.append(clauseText);
44 |                 }
45 |             } catch (IllegalArgumentException e) {
46 |                 log.warn("Failed to extract text from clause {}: {}", clause, e.getMessage(), e);
47 |             }
48 |         }
49 | 
50 |         return sb.toString();
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/HybridQueryTextExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.highlight.extractor;
 6 | 
 7 | import org.apache.lucene.search.Query;
 8 | import org.opensearch.neuralsearch.query.HybridQuery;
 9 | 
10 | import java.util.HashSet;
11 | import java.util.Set;
12 | 
13 | /**
14 |  * Extractor for hybrid queries that combines text from all sub-queries
15 |  */
16 | public class HybridQueryTextExtractor implements QueryTextExtractor {
17 | 
18 |     private final QueryTextExtractorRegistry registry;
19 | 
20 |     public HybridQueryTextExtractor(QueryTextExtractorRegistry registry) {
21 |         this.registry = registry;
22 |     }
23 | 
24 |     @Override
25 |     public String extractQueryText(Query query, String fieldName) {
26 |         HybridQuery hybridQuery = toQueryType(query, HybridQuery.class);
27 | 
28 |         // Create a set to avoid duplicates
29 |         Set<String> queryTexts = new HashSet<>();
30 | 
31 |         // Extract text from each sub-query
32 |         for (Query subQuery : hybridQuery.getSubQueries()) {
33 |             String extractedText = registry.extractQueryText(subQuery, fieldName);
34 |             if (extractedText != null && extractedText.isEmpty() == false) {
35 |                 queryTexts.add(extractedText);
36 |             }
37 |         }
38 | 
39 |         // Join with spaces
40 |         return String.join(" ", queryTexts).trim();
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/NestedQueryTextExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.highlight.extractor;
 6 | 
 7 | import org.apache.lucene.search.Query;
 8 | import org.opensearch.index.search.OpenSearchToParentBlockJoinQuery;
 9 | 
10 | public class NestedQueryTextExtractor implements QueryTextExtractor {
11 |     private final QueryTextExtractorRegistry registry;
12 | 
13 |     public NestedQueryTextExtractor(QueryTextExtractorRegistry registry) {
14 |         this.registry = registry;
15 |     }
16 | 
17 |     @Override
18 |     public String extractQueryText(Query query, String fieldName) {
19 |         OpenSearchToParentBlockJoinQuery neuralQuery = toQueryType(query, OpenSearchToParentBlockJoinQuery.class);
20 |         return registry.extractQueryText(neuralQuery.getChildQuery(), fieldName);
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/NeuralQueryTextExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.highlight.extractor;
 6 | 
 7 | import org.apache.lucene.search.Query;
 8 | import org.opensearch.neuralsearch.query.NeuralKNNQuery;
 9 | 
10 | /**
11 |  * Extractor for neural queries
12 |  */
13 | public class NeuralQueryTextExtractor implements QueryTextExtractor {
14 | 
15 |     @Override
16 |     public String extractQueryText(Query query, String fieldName) {
17 |         NeuralKNNQuery neuralQuery = toQueryType(query, NeuralKNNQuery.class);
18 |         return neuralQuery.getOriginalQueryText();
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/QueryTextExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.highlight.extractor;
 6 | 
 7 | import org.apache.lucene.search.Query;
 8 | 
 9 | import java.util.Locale;
10 | 
11 | /**
12 |  * Interface for extracting query text from different query types
13 |  */
14 | public interface QueryTextExtractor {
15 |     /**
16 |      * Converts a query to the expected type, throwing an exception if the type doesn't match
17 |      *
18 |      * @param query The query to convert
19 |      * @param expectedType The expected query type
20 |      * @return The query cast to the expected type
21 |      * @throws IllegalArgumentException if the query is not of the expected type
22 |      */
23 |     default <T extends Query> T toQueryType(Query query, Class<T> expectedType) {
24 |         if (!expectedType.isInstance(query)) {
25 |             throw new IllegalArgumentException(
26 |                 String.format(Locale.ROOT, "Expected %s but got %s", expectedType.getSimpleName(), query.getClass().getSimpleName())
27 |             );
28 |         }
29 |         return expectedType.cast(query);
30 |     }
31 | 
32 |     /**
33 |      * Extracts text from a query for highlighting
34 |      *
35 |      * @param query The query to extract text from
36 |      * @param fieldName The name of the field being highlighted
37 |      * @return The extracted query text
38 |      */
39 |     String extractQueryText(Query query, String fieldName);
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/highlight/extractor/TermQueryTextExtractor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.highlight.extractor;
 6 | 
 7 | import org.apache.lucene.index.Term;
 8 | import org.apache.lucene.search.Query;
 9 | import org.apache.lucene.search.TermQuery;
10 | 
11 | /**
12 |  * Extractor for term queries
13 |  */
14 | public class TermQueryTextExtractor implements QueryTextExtractor {
15 | 
16 |     @Override
17 |     public String extractQueryText(Query query, String fieldName) {
18 |         TermQuery termQuery = toQueryType(query, TermQuery.class);
19 | 
20 |         Term term = termQuery.getTerm();
21 |         // Only include terms from the field we're highlighting
22 |         if (fieldName.equals(term.field())) {
23 |             return term.text();
24 |         }
25 |         return "";
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/mapper/dto/SemanticParameters.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.mapper.dto;
 6 | 
 7 | import lombok.Builder;
 8 | import lombok.Getter;
 9 | 
10 | /**
11 |  * A DTO to hold all the semantic parameters.
12 |  */
13 | @Getter
14 | @Builder
15 | public class SemanticParameters {
16 |     private final String modelId;
17 |     private final String searchModelId;
18 |     private final String rawFieldType;
19 |     private final String semanticInfoFieldName;
20 |     private final Boolean chunkingEnabled;
21 |     private final String semanticFieldSearchAnalyzer;
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/InferenceRequest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor;
 6 | 
 7 | import java.util.List;
 8 | 
 9 | import lombok.Builder;
10 | import lombok.Getter;
11 | import lombok.NoArgsConstructor;
12 | import lombok.NonNull;
13 | import lombok.Setter;
14 | import lombok.experimental.SuperBuilder;
15 | 
16 | @SuperBuilder
17 | @NoArgsConstructor
18 | @Getter
19 | @Setter
20 | /**
21 |  *  Base abstract class for inference requests.
22 |  *  This class contains common fields and behaviors shared across different types of inference requests.
23 |  */
24 | public abstract class InferenceRequest {
25 |     /**
26 |      * Unique identifier for the model to be used for inference.
27 |      * This field is required and cannot be null.
28 |      */
29 |     @NonNull
30 |     private String modelId;
31 |     /**
32 |      * List of targetResponseFilters to be applied.
33 |      * Defaults value if not specified.
34 |      */
35 |     @Builder.Default
36 |     private List<String> targetResponseFilters = List.of("sentence_embedding");
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/MapInferenceRequest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor;
 6 | 
 7 | import java.util.Map;
 8 | import lombok.Getter;
 9 | import lombok.NoArgsConstructor;
10 | import lombok.Setter;
11 | import lombok.experimental.SuperBuilder;
12 | 
13 | /**
14 |  * Implementation of InferenceRequest for inputObjects based inference requests.
15 |  * Use this class when the input data consists of key-value pairs.
16 |  *
17 |  * @see InferenceRequest
18 |  */
19 | @SuperBuilder
20 | @NoArgsConstructor
21 | @Getter
22 | @Setter
23 | public class MapInferenceRequest extends InferenceRequest {
24 |     private Map<String, String> inputObjects;
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/NormalizationExecuteDTO.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor;
 6 | 
 7 | import lombok.AllArgsConstructor;
 8 | import lombok.Builder;
 9 | import lombok.Getter;
10 | import lombok.NonNull;
11 | import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique;
12 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique;
13 | import org.opensearch.search.fetch.FetchSearchResult;
14 | import org.opensearch.search.query.QuerySearchResult;
15 | 
16 | import java.util.List;
17 | import java.util.Optional;
18 | 
19 | /**
20 |  * DTO object to hold data in NormalizationProcessorWorkflow class
21 |  * in NormalizationProcessorWorkflow.
22 |  */
23 | @AllArgsConstructor
24 | @Builder
25 | @Getter
26 | public class NormalizationExecuteDTO {
27 |     @NonNull
28 |     private List<QuerySearchResult> querySearchResults;
29 |     @NonNull
30 |     private Optional<FetchSearchResult> fetchSearchResultOptional;
31 |     @NonNull
32 |     private ScoreNormalizationTechnique normalizationTechnique;
33 |     @NonNull
34 |     private ScoreCombinationTechnique combinationTechnique;
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/NormalizationProcessorWorkflowExecuteRequest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor;
 6 | 
 7 | import lombok.AllArgsConstructor;
 8 | import lombok.Builder;
 9 | import lombok.Getter;
10 | import org.opensearch.action.search.SearchPhaseContext;
11 | import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique;
12 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique;
13 | import org.opensearch.search.fetch.FetchSearchResult;
14 | import org.opensearch.search.pipeline.PipelineProcessingContext;
15 | import org.opensearch.search.query.QuerySearchResult;
16 | 
17 | import java.util.List;
18 | import java.util.Optional;
19 | 
20 | @Builder
21 | @AllArgsConstructor
22 | @Getter
23 | /**
24 |  * DTO class to hold request parameters for normalization and combination
25 |  */
26 | public class NormalizationProcessorWorkflowExecuteRequest {
27 |     final List<QuerySearchResult> querySearchResults;
28 |     final Optional<FetchSearchResult> fetchSearchResultOptional;
29 |     final ScoreNormalizationTechnique normalizationTechnique;
30 |     final ScoreCombinationTechnique combinationTechnique;
31 |     boolean explain;
32 |     final PipelineProcessingContext pipelineProcessingContext;
33 |     final SearchPhaseContext searchPhaseContext;
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/NormalizeScoresDTO.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor;
 6 | 
 7 | import lombok.AllArgsConstructor;
 8 | import lombok.Builder;
 9 | import lombok.Getter;
10 | import lombok.NonNull;
11 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique;
12 | 
13 | import java.util.List;
14 | 
15 | /**
16 |  * DTO object to hold data required for score normalization.
17 |  */
18 | @AllArgsConstructor
19 | @Builder
20 | @Getter
21 | public class NormalizeScoresDTO {
22 |     @NonNull
23 |     private List<CompoundTopDocs> queryTopDocs;
24 |     @NonNull
25 |     private ScoreNormalizationTechnique normalizationTechnique;
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/SearchShard.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor;
 6 | 
 7 | import lombok.AllArgsConstructor;
 8 | import lombok.Value;
 9 | import org.opensearch.search.SearchShardTarget;
10 | 
11 | /**
12 |  * DTO class to store index, shardId and nodeId for a search shard.
13 |  */
14 | @Value
15 | @AllArgsConstructor
16 | public class SearchShard {
17 |     String index;
18 |     int shardId;
19 |     String nodeId;
20 | 
21 |     /**
22 |      * Create SearchShard from SearchShardTarget
23 |      * @param searchShardTarget
24 |      * @return SearchShard
25 |      */
26 |     public static SearchShard createSearchShard(final SearchShardTarget searchShardTarget) {
27 |         return new SearchShard(searchShardTarget.getIndex(), searchShardTarget.getShardId().id(), searchShardTarget.getNodeId());
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/SimilarityInferenceRequest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor;
 6 | 
 7 | import lombok.NoArgsConstructor;
 8 | import lombok.Getter;
 9 | import lombok.Setter;
10 | import lombok.experimental.SuperBuilder;
11 | 
12 | /**
13 |  * Implementation of InferenceRequest for similarity based text inference requests.
14 |  *
15 |  * @see TextInferenceRequest
16 |  */
17 | @SuperBuilder
18 | @NoArgsConstructor
19 | @Getter
20 | @Setter
21 | public class SimilarityInferenceRequest extends TextInferenceRequest {
22 |     private String queryText;
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/TechniqueCompatibilityCheckDTO.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor;
 6 | 
 7 | import lombok.AllArgsConstructor;
 8 | import lombok.Builder;
 9 | import lombok.Getter;
10 | import lombok.NonNull;
11 | import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique;
12 | import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique;
13 | 
14 | /**
15 |  * DTO object to hold data required for validation.
16 |  */
17 | @AllArgsConstructor
18 | @Builder
19 | @Getter
20 | public class TechniqueCompatibilityCheckDTO {
21 |     @NonNull
22 |     private ScoreCombinationTechnique scoreCombinationTechnique;
23 |     @NonNull
24 |     private ScoreNormalizationTechnique scoreNormalizationTechnique;
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/TextInferenceRequest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor;
 6 | 
 7 | import java.util.List;
 8 | import lombok.Getter;
 9 | import lombok.NoArgsConstructor;
10 | import lombok.Setter;
11 | import lombok.experimental.SuperBuilder;
12 | 
13 | /**
14 |  * Implementation of InferenceRequest for inputTexts based inference requests.
15 |  * Use this class when the input data consists of list of strings.
16 |  *
17 |  * @see InferenceRequest
18 |  */
19 | @SuperBuilder
20 | @NoArgsConstructor
21 | @Getter
22 | @Setter
23 | public class TextInferenceRequest extends InferenceRequest {
24 |     private List<String> inputTexts; // on which inference needs to happen
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.chunker;
 6 | 
 7 | import com.google.common.collect.ImmutableMap;
 8 | 
 9 | import java.util.Map;
10 | import java.util.Objects;
11 | import java.util.Set;
12 | import java.util.function.Function;
13 | 
14 | /**
15 |  * A factory to create different chunking algorithm objects.
16 |  */
17 | public final class ChunkerFactory {
18 | 
19 |     private ChunkerFactory() {} // no instance of this factory class
20 | 
21 |     private static final Map<String, Function<Map<String, Object>, Chunker>> CHUNKERS_CONSTRUCTORS = ImmutableMap.of(
22 |         FixedTokenLengthChunker.ALGORITHM_NAME,
23 |         FixedTokenLengthChunker::new,
24 |         DelimiterChunker.ALGORITHM_NAME,
25 |         DelimiterChunker::new
26 |     );
27 | 
28 |     /** Set of supported chunker algorithm types */
29 |     public static Set<String> CHUNKER_ALGORITHMS = CHUNKERS_CONSTRUCTORS.keySet();
30 | 
31 |     /**
32 |      * Creates a new Chunker instance based on the specified type and parameters.
33 |      *
34 |      * @param type the type of chunker to create
35 |      * @param parameters configuration parameters for the chunker
36 |      * @return a new Chunker instance configured with the given parameters
37 |      */
38 |     public static Chunker create(final String type, final Map<String, Object> parameters) {
39 |         Function<Map<String, Object>, Chunker> chunkerConstructionFunction = CHUNKERS_CONSTRUCTORS.get(type);
40 |         // chunkerConstructionFunction is not null because we have validated the type in text chunking processor
41 |         Objects.requireNonNull(chunkerConstructionFunction);
42 |         return chunkerConstructionFunction.apply(parameters);
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/combination/CombineScoresDto.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.combination;
 6 | 
 7 | import java.util.List;
 8 | import lombok.AllArgsConstructor;
 9 | import lombok.Builder;
10 | import lombok.Getter;
11 | import lombok.NonNull;
12 | import org.apache.lucene.search.Sort;
13 | import org.opensearch.common.Nullable;
14 | import org.opensearch.neuralsearch.processor.CompoundTopDocs;
15 | import org.opensearch.search.query.QuerySearchResult;
16 | 
17 | /**
18 |  * DTO object to hold data required for Score Combination.
19 |  */
20 | @AllArgsConstructor
21 | @Builder
22 | @Getter
23 | public class CombineScoresDto {
24 |     @NonNull
25 |     private List<CompoundTopDocs> queryTopDocs;
26 |     @NonNull
27 |     private ScoreCombinationTechnique scoreCombinationTechnique;
28 |     @NonNull
29 |     private List<QuerySearchResult> querySearchResults;
30 |     @Nullable
31 |     private Sort sort;
32 |     private int fromValueForSingleShard;
33 |     private boolean isSingleShard;
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/combination/ScoreCombinationTechnique.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.combination;
 6 | 
 7 | public interface ScoreCombinationTechnique {
 8 | 
 9 |     /**
10 |      * Defines combination function specific to this technique
11 |      * @param scores array of collected original scores
12 |      * @return combined score
13 |      */
14 |     float combine(final float[] scores);
15 | 
16 |     /**
17 |      * Returns the name of the combination technique.
18 |      */
19 |     String techniqueName();
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/explain/CombinedExplanationDetails.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.explain;
 6 | 
 7 | import lombok.AllArgsConstructor;
 8 | import lombok.Builder;
 9 | import lombok.Getter;
10 | 
11 | /**
12 |  * DTO class to hold explain details for normalization and combination
13 |  */
14 | @AllArgsConstructor
15 | @Builder
16 | @Getter
17 | public class CombinedExplanationDetails {
18 |     private ExplanationDetails normalizationExplanations;
19 |     private ExplanationDetails combinationExplanations;
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/explain/DocIdAtSearchShard.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.explain;
 6 | 
 7 | import lombok.Value;
 8 | import org.opensearch.neuralsearch.processor.SearchShard;
 9 | 
10 | /**
11 |  * DTO class to store docId and search shard for a query.
12 |  * Used in {@link org.opensearch.neuralsearch.processor.NormalizationProcessorWorkflow} to normalize scores across shards.
13 |  */
14 | @Value
15 | public class DocIdAtSearchShard {
16 |     int docId;
17 |     SearchShard searchShard;
18 | }
19 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/explain/ExplainableTechnique.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.explain;
 6 | 
 7 | import org.opensearch.neuralsearch.processor.CompoundTopDocs;
 8 | 
 9 | import java.util.List;
10 | import java.util.Map;
11 | 
12 | /**
13 |  * Abstracts explanation of score combination or normalization technique.
14 |  */
15 | public interface ExplainableTechnique {
16 | 
17 |     String GENERIC_DESCRIPTION_OF_TECHNIQUE = "generic score processing technique";
18 | 
19 |     /**
20 |      * Returns a string with general description of the technique
21 |      */
22 |     default String describe() {
23 |         return GENERIC_DESCRIPTION_OF_TECHNIQUE;
24 |     }
25 | 
26 |     /**
27 |      * Returns a map with explanation for each document id
28 |      * @param queryTopDocs collection of CompoundTopDocs for each shard result
29 |      * @return map of document per shard and corresponding explanation object
30 |      */
31 |     default Map<DocIdAtSearchShard, ExplanationDetails> explain(final List<CompoundTopDocs> queryTopDocs) {
32 |         return Map.of();
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/explain/ExplanationDetails.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.explain;
 6 | 
 7 | import lombok.AllArgsConstructor;
 8 | import lombok.Value;
 9 | import org.apache.commons.lang3.tuple.Pair;
10 | 
11 | import java.util.List;
12 | 
13 | /**
14 |  * DTO class to store value and description for explain details.
15 |  * Used in {@link org.opensearch.neuralsearch.processor.NormalizationProcessorWorkflow} to normalize scores across shards.
16 |  */
17 | @Value
18 | @AllArgsConstructor
19 | public class ExplanationDetails {
20 |     int docId;
21 |     List<Pair<Float, String>> scoreDetails;
22 | 
23 |     public ExplanationDetails(List<Pair<Float, String>> scoreDetails) {
24 |         // pass docId as -1 to match docId in SearchHit
25 |         // https://github.com/opensearch-project/OpenSearch/blob/main/server/src/main/java/org/opensearch/search/SearchHit.java#L170
26 |         this(-1, scoreDetails);
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/explain/ExplanationPayload.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.explain;
 6 | 
 7 | import lombok.AllArgsConstructor;
 8 | import lombok.Builder;
 9 | import lombok.Getter;
10 | 
11 | import java.util.Map;
12 | 
13 | /**
14 |  * DTO class to hold explain details for normalization and combination
15 |  */
16 | @AllArgsConstructor
17 | @Builder
18 | @Getter
19 | public class ExplanationPayload {
20 |     private final Map<PayloadType, Object> explainPayload;
21 | 
22 |     public enum PayloadType {
23 |         NORMALIZATION_PROCESSOR
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/factory/ExplanationResponseProcessorFactory.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.factory;
 6 | 
 7 | import org.opensearch.neuralsearch.processor.ExplanationResponseProcessor;
 8 | import org.opensearch.search.pipeline.Processor;
 9 | import org.opensearch.search.pipeline.SearchResponseProcessor;
10 | 
11 | import java.util.Map;
12 | 
13 | /**
14 |  * Factory class for creating ExplanationResponseProcessor
15 |  */
16 | public class ExplanationResponseProcessorFactory implements Processor.Factory<SearchResponseProcessor> {
17 | 
18 |     @Override
19 |     public SearchResponseProcessor create(
20 |         Map<String, Processor.Factory<SearchResponseProcessor>> processorFactories,
21 |         String tag,
22 |         String description,
23 |         boolean ignoreFailure,
24 |         Map<String, Object> config,
25 |         Processor.PipelineContext pipelineContext
26 |     ) throws Exception {
27 |         return new ExplanationResponseProcessor(description, tag, ignoreFailure);
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/highlight/SentenceHighlightingRequest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.highlight;
 6 | 
 7 | import lombok.Getter;
 8 | import lombok.NoArgsConstructor;
 9 | import lombok.Setter;
10 | import lombok.experimental.SuperBuilder;
11 | import org.opensearch.neuralsearch.processor.InferenceRequest;
12 | 
13 | /**
14 |  * Implementation of InferenceRequest for sentence highlighting inference requests.
15 |  * This class handles the question and context parameters needed for highlighting.
16 |  *
17 |  * @see InferenceRequest
18 |  */
19 | @SuperBuilder
20 | @NoArgsConstructor
21 | @Getter
22 | @Setter
23 | public class SentenceHighlightingRequest extends InferenceRequest {
24 |     /**
25 |      * The question to be answered from the context
26 |      */
27 |     private String question;
28 | 
29 |     /**
30 |      * The context text in which to find the answer
31 |      */
32 |     private String context;
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/normalization/ScoreNormalizationTechnique.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.normalization;
 6 | 
 7 | import org.opensearch.neuralsearch.processor.NormalizeScoresDTO;
 8 | 
 9 | /**
10 |  * Abstracts normalization of scores in query search results.
11 |  */
12 | public interface ScoreNormalizationTechnique {
13 | 
14 |     /**
15 |      * Performs score normalization based on input normalization technique.
16 |      * Mutates input object by updating normalized scores.
17 |      * @param normalizeScoresDTO is a data transfer object that contains queryTopDocs
18 |      * original query results from multiple shards and multiple sub-queries, ScoreNormalizationTechnique,
19 |      * and nullable rankConstant that is only used in RRF technique
20 |      */
21 |     void normalize(final NormalizeScoresDTO normalizeScoresDTO);
22 | 
23 |     /**
24 |      * Returns the name of the normalization technique.
25 |      */
26 |     String techniqueName();
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/rerank/RerankType.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.rerank;
 6 | 
 7 | import java.util.Collections;
 8 | import java.util.HashMap;
 9 | import java.util.Locale;
10 | import java.util.Map;
11 | 
12 | import lombok.Getter;
13 | 
14 | /**
15 |  * enum for distinguishing various reranking methods
16 |  */
17 | public enum RerankType {
18 | 
19 |     ML_OPENSEARCH("ml_opensearch"),
20 |     BY_FIELD("by_field");
21 | 
22 |     @Getter
23 |     private final String label;
24 | 
25 |     private RerankType(String label) {
26 |         this.label = label;
27 |     }
28 | 
29 |     private static final Map<String, RerankType> LABEL_MAP;
30 |     static {
31 |         Map<String, RerankType> labelMap = new HashMap<>();
32 |         for (RerankType type : RerankType.values()) {
33 |             labelMap.put(type.getLabel(), type);
34 |         }
35 |         LABEL_MAP = Collections.unmodifiableMap(labelMap);
36 |     }
37 | 
38 |     /**
39 |      * Construct a RerankType from the label
40 |      * @param label label of a RerankType
41 |      * @return RerankType represented by the label
42 |      */
43 |     public static RerankType from(final String label) {
44 |         RerankType ans = LABEL_MAP.get(label);
45 |         if (ans == null) {
46 |             throw new IllegalArgumentException(String.format(Locale.ROOT, "Wrong rerank type name: %s", label));
47 |         }
48 |         return ans;
49 |     }
50 | 
51 |     public static Map<String, RerankType> labelMap() {
52 |         return LABEL_MAP;
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/processor/rerank/context/ContextSourceFetcher.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.rerank.context;
 6 | 
 7 | import java.util.Map;
 8 | 
 9 | import org.opensearch.action.search.SearchRequest;
10 | import org.opensearch.action.search.SearchResponse;
11 | import org.opensearch.core.action.ActionListener;
12 | 
13 | /**
14 |  * Interface that gets context from some source and puts it in a map
15 |  * for a reranking processor to use
16 |  */
17 | public interface ContextSourceFetcher {
18 | 
19 |     /**
20 |      * Fetch the information needed in order to rerank.
21 |      * That could be as simple as grabbing a field from the search request or
22 |      * as complicated as a lookup to some external service
23 |      * @param searchRequest the search query
24 |      * @param searchResponse the search results, in case they're relevant
25 |      * @param listener be async
26 |      */
27 |     void fetchContext(
28 |         final SearchRequest searchRequest,
29 |         final SearchResponse searchResponse,
30 |         final ActionListener<Map<String, Object>> listener
31 |     );
32 | 
33 |     /**
34 |      * Get the name of the contextSourceFetcher. This will be used as the field
35 |      * name in the context config for the pipeline
36 |      * @return Name of the fetcher
37 |      */
38 |     String getName();
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/query/HybridQueryContext.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.query;
 6 | 
 7 | import lombok.Builder;
 8 | import lombok.Getter;
 9 | 
10 | /**
11 |  * Class that holds the low level information of hybrid query in the form of context
12 |  */
13 | @Builder
14 | @Getter
15 | public class HybridQueryContext {
16 |     private Integer paginationDepth;
17 | }
18 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/query/HybridSubQueryScorer.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.query;
 6 | 
 7 | import lombok.Data;
 8 | import org.apache.lucene.search.Scorable;
 9 | 
10 | import java.io.IOException;
11 | import java.util.Arrays;
12 | 
13 | /**
14 |  * Scorer implementation for Hybrid Query. This object is light and expected to be re-used between different doc ids
15 |  */
16 | @Data
17 | public class HybridSubQueryScorer extends Scorable {
18 |     // array of scores from all sub-queries for a single doc id
19 |     private final float[] subQueryScores;
20 |     // array of min competitive scores, score is shard level
21 |     private final float[] minScores;
22 | 
23 |     public HybridSubQueryScorer(int numOfSubQueries) {
24 |         this.minScores = new float[numOfSubQueries];
25 |         this.subQueryScores = new float[numOfSubQueries];
26 |     }
27 | 
28 |     @Override
29 |     public float score() throws IOException {
30 |         // for scenarios when scorer is needed (like in aggregations) for one doc id return sum of sub-query scores
31 |         float totalScore = 0.0f;
32 |         for (float score : subQueryScores) {
33 |             totalScore += score;
34 |         }
35 |         return totalScore;
36 |     }
37 | 
38 |     /**
39 |      * Reset sub-query scores to 0.0f so this scorer can be reused for next doc id
40 |      */
41 |     public void resetScores() {
42 |         Arrays.fill(subQueryScores, 0.0f);
43 |     }
44 | 
45 |     public int getNumOfSubQueries() {
46 |         return subQueryScores.length;
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/query/ModelInferenceQueryBuilder.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.query;
 6 | 
 7 | /**
 8 |  * Query builders which calls ml-commons API to do model inference.
 9 |  * The model inference result is used for search on target field.
10 |  */
11 | public interface ModelInferenceQueryBuilder {
12 |     /**
13 |      * Get the model id used by ml-commons model inference. Return null if the model id is absent.
14 |      */
15 |     public String modelId();
16 | 
17 |     /**
18 |      * Set a new model id for the query builder.
19 |      */
20 |     public ModelInferenceQueryBuilder modelId(String modelId);
21 | 
22 |     /**
23 |      * Get the field name for search.
24 |      */
25 |     public String fieldName();
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/query/NeuralKNNQuery.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.query;
 6 | 
 7 | import lombok.Getter;
 8 | import lombok.RequiredArgsConstructor;
 9 | import org.apache.lucene.search.IndexSearcher;
10 | import org.apache.lucene.search.Query;
11 | import org.apache.lucene.search.QueryVisitor;
12 | import org.apache.lucene.search.ScoreMode;
13 | import org.apache.lucene.search.Weight;
14 | 
15 | import java.io.IOException;
16 | import java.util.Objects;
17 | 
18 | /**
19 |  * Wraps KNN Lucene query to support neural search extensions.
20 |  * Delegates core operations to the underlying KNN query.
21 |  */
22 | @Getter
23 | @RequiredArgsConstructor
24 | public class NeuralKNNQuery extends Query {
25 |     private final Query knnQuery;
26 |     private final String originalQueryText;
27 | 
28 |     @Override
29 |     public String toString(String field) {
30 |         return knnQuery.toString(field);
31 |     }
32 | 
33 |     @Override
34 |     public void visit(QueryVisitor visitor) {
35 |         // Delegate the visitor to the underlying KNN query
36 |         knnQuery.visit(visitor);
37 |     }
38 | 
39 |     @Override
40 |     public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
41 |         // Delegate weight creation to the underlying KNN query
42 |         return knnQuery.createWeight(searcher, scoreMode, boost);
43 |     }
44 | 
45 |     @Override
46 |     public Query rewrite(IndexSearcher indexSearcher) throws IOException {
47 |         Query rewritten = knnQuery.rewrite(indexSearcher);
48 |         if (rewritten == knnQuery) {
49 |             return this;
50 |         }
51 |         return new NeuralKNNQuery(rewritten, originalQueryText);
52 |     }
53 | 
54 |     @Override
55 |     public boolean equals(Object other) {
56 |         if (this == other) return true;
57 |         if (other == null || getClass() != other.getClass()) return false;
58 |         NeuralKNNQuery that = (NeuralKNNQuery) other;
59 |         return Objects.equals(knnQuery, that.knnQuery) && Objects.equals(originalQueryText, that.originalQueryText);
60 |     }
61 | 
62 |     @Override
63 |     public int hashCode() {
64 |         return Objects.hash(knnQuery, originalQueryText);
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/query/dto/NeuralQueryBuildStage.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.query.dto;
 6 | 
 7 | public enum NeuralQueryBuildStage {
 8 |     FROM_X_CONTENT,
 9 |     REWRITE
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/query/dto/NeuralQueryTargetFieldConfig.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.query.dto;
 6 | 
 7 | import lombok.Builder;
 8 | import lombok.Data;
 9 | 
10 | @Data
11 | @Builder
12 | public class NeuralQueryTargetFieldConfig {
13 |     private final Boolean isSemanticField;
14 |     private final Boolean isUnmappedField;
15 |     private final String searchModelId;
16 |     private final String embeddingFieldType;
17 |     private final String embeddingFieldPath;
18 |     private final String chunksPath;
19 |     private final Boolean chunkingEnabled;
20 |     private final String semanticFieldSearchAnalyzer;
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/search/HitsThresholdChecker.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.search;
 6 | 
 7 | import java.util.Locale;
 8 | 
 9 | import org.apache.lucene.search.ScoreMode;
10 | 
11 | import lombok.Getter;
12 | 
13 | /**
14 |  *  Abstracts algorithm that allows early termination for the search flow if number of hits reached
15 |  *  certain treshold
16 |  */
17 | public class HitsThresholdChecker {
18 |     private int hitCount;
19 |     @Getter
20 |     private final int totalHitsThreshold;
21 | 
22 |     public HitsThresholdChecker(int totalHitsThreshold) {
23 |         if (totalHitsThreshold < 0) {
24 |             throw new IllegalArgumentException(String.format(Locale.ROOT, "totalHitsThreshold must be >= 0, got %d", totalHitsThreshold));
25 |         }
26 |         this.totalHitsThreshold = totalHitsThreshold;
27 |     }
28 | 
29 |     public void incrementHitCount() {
30 |         ++hitCount;
31 |     }
32 | 
33 |     public boolean isThresholdReached() {
34 |         return hitCount >= getTotalHitsThreshold();
35 |     }
36 | 
37 |     public ScoreMode scoreMode() {
38 |         return ScoreMode.TOP_SCORES;
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/search/HybridDisiWrapper.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.search;
 6 | 
 7 | import lombok.Getter;
 8 | import org.apache.lucene.search.DisiWrapper;
 9 | import org.apache.lucene.search.Scorer;
10 | 
11 | /**
12 |  * Wrapper for DisiWrapper, saves state of sub-queries for performance reasons
13 |  */
14 | @Getter
15 | public class HybridDisiWrapper extends DisiWrapper {
16 |     // index of disi wrapper sub-query object when its part of the hybrid query
17 |     private final int subQueryIndex;
18 | 
19 |     public HybridDisiWrapper(Scorer scorer, int subQueryIndex) {
20 |         super(scorer, false);
21 |         this.subQueryIndex = subQueryIndex;
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/search/collector/HybridLeafCollector.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.search.collector;
 6 | 
 7 | import lombok.AccessLevel;
 8 | import lombok.Getter;
 9 | import lombok.extern.log4j.Log4j2;
10 | import org.apache.lucene.search.LeafCollector;
11 | import org.apache.lucene.search.Scorable;
12 | import org.opensearch.neuralsearch.query.HybridSubQueryScorer;
13 | 
14 | import java.io.IOException;
15 | import java.util.Objects;
16 | 
17 | /**
18 |  * The abstract class for hybrid query leaf collector
19 |  */
20 | @Log4j2
21 | public abstract class HybridLeafCollector implements LeafCollector {
22 |     @Getter(AccessLevel.PACKAGE)
23 |     HybridSubQueryScorer compoundQueryScorer;
24 | 
25 |     @Override
26 |     public void setScorer(Scorable scorer) throws IOException {
27 |         if (scorer instanceof HybridSubQueryScorer) {
28 |             compoundQueryScorer = (HybridSubQueryScorer) scorer;
29 |         } else {
30 |             compoundQueryScorer = getHybridQueryScorer(scorer);
31 |             if (Objects.isNull(compoundQueryScorer)) {
32 |                 log.error("cannot find scorer of type HybridQueryScorer in a hierarchy of scorer {}", scorer);
33 |             }
34 |         }
35 |     }
36 | 
37 |     private HybridSubQueryScorer getHybridQueryScorer(final Scorable scorer) throws IOException {
38 |         if (Objects.isNull(scorer)) {
39 |             return null;
40 |         }
41 |         if (scorer instanceof HybridSubQueryScorer) {
42 |             return (HybridSubQueryScorer) scorer;
43 |         }
44 |         for (Scorable.ChildScorable childScorable : scorer.getChildren()) {
45 |             HybridSubQueryScorer hybridQueryScorer = getHybridQueryScorer(childScorable.child());
46 |             if (Objects.nonNull(hybridQueryScorer)) {
47 |                 log.debug("found hybrid query scorer, it's child of scorer {}", childScorable.child().getClass().getSimpleName());
48 |                 return hybridQueryScorer;
49 |             }
50 |         }
51 |         return null;
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/search/collector/HybridSearchCollector.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.search.collector;
 6 | 
 7 | import java.util.List;
 8 | import org.apache.lucene.search.Collector;
 9 | import org.apache.lucene.search.TopDocs;
10 | 
11 | /**
12 |  * Common interface class for Hybrid search collectors
13 |  */
14 | public interface HybridSearchCollector extends Collector {
15 |     /**
16 |      * @return List of topDocs which contains topDocs of individual subqueries.
17 |      */
18 |     List<? extends TopDocs> topDocs();
19 | 
20 |     /**
21 |      * @return count of total hits per shard
22 |      */
23 |     int getTotalHits();
24 | 
25 |     /**
26 |      * @return maxScore found on a shard
27 |      */
28 |     float getMaxScore();
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/search/collector/SimpleFieldCollector.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.search.collector;
 6 | 
 7 | import java.io.IOException;
 8 | import java.util.Objects;
 9 | import org.apache.lucene.index.LeafReaderContext;
10 | import org.apache.lucene.search.LeafCollector;
11 | import org.apache.lucene.search.Sort;
12 | import org.opensearch.neuralsearch.search.HitsThresholdChecker;
13 | 
14 | /*
15 |   SimpleFieldCollector collects the sorted results at the shard level for every individual query.
16 |   It collects the list of TopFieldDocs.
17 |  */
18 | public final class SimpleFieldCollector extends HybridTopFieldDocSortCollector {
19 | 
20 |     public SimpleFieldCollector(int numHits, HitsThresholdChecker hitsThresholdChecker, Sort sort) {
21 |         super(numHits, hitsThresholdChecker, sort, null);
22 |     }
23 | 
24 |     @Override
25 |     public LeafCollector getLeafCollector(LeafReaderContext context) {
26 |         docBase = context.docBase;
27 | 
28 |         return new HybridTopDocSortLeafCollector() {
29 |             @Override
30 |             public void collect(int doc) throws IOException {
31 |                 if (Objects.isNull(compoundQueryScorer)) {
32 |                     throw new IllegalArgumentException("scorers are null for all sub-queries in hybrid query");
33 |                 }
34 |                 float[] subScoresByQuery = compoundQueryScorer.getSubQueryScores();
35 |                 initializePriorityQueuesWithComparators(context, subScoresByQuery.length);
36 |                 incrementTotalHitCount();
37 |                 for (int i = 0; i < subScoresByQuery.length; i++) {
38 |                     float score = subScoresByQuery[i];
39 |                     // if score is 0.0 there is no hits for that sub-query
40 |                     if (score == 0) {
41 |                         continue;
42 |                     }
43 |                     maxScore = Math.max(score, maxScore);
44 |                     if (queueFull[i]) {
45 |                         if (thresholdCheck(doc, i)) {
46 |                             return;
47 |                         }
48 |                         collectCompetitiveHit(doc, i);
49 |                     } else {
50 |                         collectedHits[i]++;
51 |                         collectHit(doc, collectedHits[i], i, score);
52 |                     }
53 |                 }
54 |             }
55 |         };
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/search/query/HybridQueryFieldDocComparator.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.search.query;
 6 | 
 7 | import java.util.Comparator;
 8 | import lombok.AccessLevel;
 9 | import lombok.RequiredArgsConstructor;
10 | import org.apache.lucene.search.FieldComparator;
11 | import org.apache.lucene.search.FieldDoc;
12 | import org.apache.lucene.search.Pruning;
13 | import org.apache.lucene.search.ScoreDoc;
14 | import org.apache.lucene.search.SortField;
15 | 
16 | /**
17 |  * Comparator class that compares two field docs as per the sorting criteria
18 |  */
19 | @RequiredArgsConstructor(access = AccessLevel.PACKAGE)
20 | class HybridQueryFieldDocComparator implements Comparator<FieldDoc> {
21 |     final SortField[] sortFields;
22 |     final FieldComparator<?>[] comparators;
23 |     final int[] reverseMul;
24 |     final Comparator<ScoreDoc> tieBreaker;
25 | 
26 |     public HybridQueryFieldDocComparator(SortField[] sortFields, Comparator<ScoreDoc> tieBreaker) {
27 |         this.sortFields = sortFields;
28 |         this.tieBreaker = tieBreaker;
29 |         comparators = new FieldComparator[sortFields.length];
30 |         reverseMul = new int[sortFields.length];
31 |         for (int compIDX = 0; compIDX < sortFields.length; compIDX++) {
32 |             final SortField sortField = sortFields[compIDX];
33 |             comparators[compIDX] = sortField.getComparator(1, Pruning.NONE);
34 |             reverseMul[compIDX] = sortField.getReverse() ? -1 : 1;
35 |         }
36 |     }
37 | 
38 |     @Override
39 |     public int compare(final FieldDoc firstFD, final FieldDoc secondFD) {
40 |         for (int compIDX = 0; compIDX < comparators.length; compIDX++) {
41 |             final FieldComparator comp = comparators[compIDX];
42 | 
43 |             final int cmp = reverseMul[compIDX] * comp.compareValues(firstFD.fields[compIDX], secondFD.fields[compIDX]);
44 | 
45 |             if (cmp != 0) {
46 |                 return cmp;
47 |             }
48 |         }
49 |         return tieBreakCompare(firstFD, secondFD, tieBreaker);
50 |     }
51 | 
52 |     private int tieBreakCompare(ScoreDoc firstDoc, ScoreDoc secondDoc, Comparator<ScoreDoc> tieBreaker) {
53 |         assert tieBreaker != null;
54 |         int value = tieBreaker.compare(firstDoc, secondDoc);
55 |         return value;
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/search/query/exception/HybridSearchRescoreQueryException.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.search.query.exception;
 6 | 
 7 | import org.opensearch.OpenSearchException;
 8 | 
 9 | /**
10 |  * Exception thrown when there is an issue with the hybrid search rescore query.
11 |  */
12 | public class HybridSearchRescoreQueryException extends OpenSearchException {
13 | 
14 |     public HybridSearchRescoreQueryException(Throwable cause) {
15 |         super("rescore failed for hybrid query", cause);
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/settings/NeuralSearchSettings.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.settings;
 6 | 
 7 | import org.opensearch.common.settings.Setting;
 8 | 
 9 | import lombok.AccessLevel;
10 | import lombok.NoArgsConstructor;
11 | 
12 | /**
13 |  * Class defines settings specific to neural-search plugin
14 |  */
15 | @NoArgsConstructor(access = AccessLevel.PRIVATE)
16 | public final class NeuralSearchSettings {
17 | 
18 |     /**
19 |      * Limits the number of document fields that can be passed to the reranker.
20 |      */
21 |     public static final Setting<Integer> RERANKER_MAX_DOC_FIELDS = Setting.intSetting(
22 |         "plugins.neural_search.reranker_max_document_fields",
23 |         50,
24 |         Setting.Property.NodeScope
25 |     );
26 | 
27 |     /**
28 |      * Enables or disables the Stats API and event stat collection.
29 |      * If API is called when stats are disabled, the response will 403.
30 |      * Event stat increment calls are also treated as no-ops.
31 |      */
32 |     public static final Setting<Boolean> NEURAL_STATS_ENABLED = Setting.boolSetting(
33 |         "plugins.neural_search.stats_enabled",
34 |         false,
35 |         Setting.Property.NodeScope,
36 |         Setting.Property.Dynamic
37 |     );
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/settings/NeuralSearchSettingsAccessor.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.settings;
 6 | 
 7 | import lombok.Getter;
 8 | import org.opensearch.cluster.service.ClusterService;
 9 | import org.opensearch.common.settings.Settings;
10 | import org.opensearch.neuralsearch.stats.events.EventStatsManager;
11 | 
12 | /**
13 |  * Class handles exposing settings related to neural search and manages callbacks when the settings change
14 |  */
15 | public class NeuralSearchSettingsAccessor {
16 |     @Getter
17 |     private volatile boolean isStatsEnabled;
18 | 
19 |     /**
20 |      * Constructor, registers callbacks to update settings
21 |      * @param clusterService
22 |      * @param settings
23 |      */
24 |     public NeuralSearchSettingsAccessor(ClusterService clusterService, Settings settings) {
25 |         isStatsEnabled = NeuralSearchSettings.NEURAL_STATS_ENABLED.get(settings);
26 |         registerSettingsCallbacks(clusterService);
27 |     }
28 | 
29 |     private void registerSettingsCallbacks(ClusterService clusterService) {
30 |         clusterService.getClusterSettings().addSettingsUpdateConsumer(NeuralSearchSettings.NEURAL_STATS_ENABLED, value -> {
31 |             // If stats are being toggled off, clear and reset all stats
32 |             if (isStatsEnabled && (value == false)) {
33 |                 EventStatsManager.instance().reset();
34 |             }
35 |             isStatsEnabled = value;
36 |         });
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/stats/common/StatName.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.stats.common;
 6 | 
 7 | /**
 8 |  * Interface for objects that hold stat name, path, and type information.
 9 |  * The stat name is used as the unique identifier for the stat. It can be used as a request parameter for user filtering.
10 |  */
11 | public interface StatName {
12 |     /**
13 |      * Gets the name of the stat. These must be unique to support user request stat filtering.
14 |      * @return the name of the stat
15 |      */
16 |     String getNameString();
17 | 
18 |     /**
19 |      * Gets the path of the stat in dot notation.
20 |      * The path must be unique and avoid collisions with other stat names.
21 |      * @return the path of the stat
22 |      */
23 |     String getFullPath();
24 | 
25 |     /**
26 |      * The type of the stat
27 |      * @return the stat type
28 |      */
29 |     StatType getStatType();
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/stats/common/StatSnapshot.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.stats.common;
 6 | 
 7 | import org.opensearch.core.xcontent.ToXContent;
 8 | import org.opensearch.core.xcontent.ToXContentFragment;
 9 | import org.opensearch.core.xcontent.XContentBuilder;
10 | 
11 | import java.io.IOException;
12 | 
13 | /**
14 |  * A serializable snapshot of a stat at a given point in time.
15 |  * Holds stat values, type, and metadata for processing and returning across rest layer.
16 |  * These are not meant to be persisted.
17 |  * @param <T> The type of the value of the stat
18 |  */
19 | public interface StatSnapshot<T> extends ToXContentFragment {
20 |     /**
21 |      * Field name of the stat_type in XContent
22 |      */
23 |     String STAT_TYPE_FIELD = "stat_type";
24 | 
25 |     /**
26 |      * Field name of the value in XContent
27 |      */
28 |     String VALUE_FIELD = "value";
29 | 
30 |     /**
31 |      * Gets the raw value of the stat, excluding any metadata
32 |      * @return the raw stat value
33 |      */
34 |     T getValue();
35 | 
36 |     /**
37 |      * Converts to fields xContent, including stat metadata
38 |      *
39 |      * @param builder XContentBuilder
40 |      * @param params Params
41 |      * @return XContentBuilder
42 |      * @throws IOException thrown by builder for invalid field
43 |      */
44 |     XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException;
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/stats/common/StatType.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.stats.common;
 6 | 
 7 | /**
 8 |  * Interface for the type of stat. Used for stat type metadata
 9 |  */
10 | public interface StatType {
11 | 
12 |     /**
13 |      * Get the name of the stat type containing info about the type and how to process it
14 |      * @return name of the stat type
15 |      */
16 |     String getTypeString();
17 | }
18 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/stats/events/EventStat.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.stats.events;
 6 | 
 7 | import org.opensearch.neuralsearch.stats.common.StatSnapshot;
 8 | 
 9 | /**
10 |  * Interface for event stats. These contain logic to store and update ongoing event information.
11 |  */
12 | public interface EventStat {
13 |     /**
14 |      * Returns a single point in time value associated with the stat. Typically a counter.
15 |      * @return the value of the stat
16 |       */
17 |     long getValue();
18 | 
19 |     /**
20 |      * Returns a snapshot of the stat. Used to cross transport layer/rest layer
21 |      * @return
22 |      */
23 |     StatSnapshot<?> getStatSnapshot();
24 | 
25 |     /**
26 |      * Increments the stat
27 |      */
28 |     void increment();
29 | 
30 |     /**
31 |      * Resets the stat value
32 |      */
33 |     void reset();
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/stats/events/EventStatType.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.stats.events;
 6 | 
 7 | import org.opensearch.neuralsearch.stats.common.StatType;
 8 | 
 9 | import java.util.Locale;
10 | 
11 | /**
12 |  * Enum for different kinds of event stat types to track
13 |  */
14 | public enum EventStatType implements StatType {
15 |     TIMESTAMPED_EVENT_COUNTER;
16 | 
17 |     /**
18 |      * Gets the name of the stat type, the enum name in lowercase
19 |      * @return the name of the stat type
20 |      */
21 |     public String getTypeString() {
22 |         return name().toLowerCase(Locale.ROOT);
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/stats/info/CountableInfoStatSnapshot.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.stats.info;
 6 | 
 7 | import org.opensearch.core.xcontent.ToXContent;
 8 | import org.opensearch.core.xcontent.XContentBuilder;
 9 | import org.opensearch.neuralsearch.stats.common.StatSnapshot;
10 | 
11 | import java.io.IOException;
12 | import java.util.concurrent.atomic.LongAdder;
13 | 
14 | /**
15 |  * A countable stat snapshot for info stats.
16 |  * Can be updated in place
17 |  */
18 | public class CountableInfoStatSnapshot implements StatSnapshot<Long> {
19 |     private LongAdder adder;
20 |     private InfoStatName statName;
21 | 
22 |     /**
23 |      * Creates a new stat snapshot
24 |      * @param statName the name of the stat it corresponds to
25 |      */
26 |     public CountableInfoStatSnapshot(InfoStatName statName) {
27 |         this.statName = statName;
28 |         this.adder = new LongAdder();
29 |     }
30 | 
31 |     /**
32 |      * Gets the counter value
33 |      * @return the counter value
34 |      */
35 |     public Long getValue() {
36 |         return adder.longValue();
37 |     }
38 | 
39 |     /**
40 |      * Increment the counter by a given delta
41 |      * @param delta the amount ot increment by
42 |      */
43 |     public void incrementBy(Long delta) {
44 |         adder.add(delta);
45 |     }
46 | 
47 |     /**
48 |      * Converts to fields xContent, including stat metadata
49 |      *
50 |      * @param builder XContentBuilder
51 |      * @param params Params
52 |      * @return XContentBuilder
53 |      * @throws IOException thrown by builder for invalid field
54 |      */
55 |     @Override
56 |     public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
57 |         builder.startObject();
58 |         builder.field(StatSnapshot.VALUE_FIELD, getValue());
59 |         builder.field(StatSnapshot.STAT_TYPE_FIELD, statName.getStatType().getTypeString());
60 |         builder.endObject();
61 |         return builder;
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/stats/info/InfoStatType.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.stats.info;
 6 | 
 7 | import org.opensearch.neuralsearch.stats.common.StatType;
 8 | 
 9 | import java.util.Locale;
10 | 
11 | /**
12 |  * Enum for different kinds of info stat types to track
13 |  */
14 | public enum InfoStatType implements StatType {
15 |     INFO_COUNTER,
16 |     INFO_STRING,
17 |     INFO_BOOLEAN;
18 | 
19 |     /**
20 |      * Gets the name of the stat type, the enum name in lowercase
21 |      * @return the name of the stat type
22 |      */
23 |     public String getTypeString() {
24 |         return name().toLowerCase(Locale.ROOT);
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/stats/info/SettableInfoStatSnapshot.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.stats.info;
 6 | 
 7 | import lombok.Getter;
 8 | import lombok.Setter;
 9 | import org.opensearch.core.xcontent.XContentBuilder;
10 | import org.opensearch.neuralsearch.stats.common.StatSnapshot;
11 | 
12 | import java.io.IOException;
13 | 
14 | /**
15 |  * A settable info snapshot used to track Strings, booleans, or other simple serializable objects
16 |  * This are meant to be constructed, set, and serialized, not for long storage in memory
17 |  * @param <T> the type of the value to set
18 |  */
19 | public class SettableInfoStatSnapshot<T> implements StatSnapshot<T> {
20 |     @Getter
21 |     @Setter
22 |     private T value;
23 | 
24 |     private InfoStatName statName;
25 | 
26 |     /**
27 |      * Creates a new stat snapshot with default null value
28 |      * @param statName the associated stat name
29 |      */
30 |     public SettableInfoStatSnapshot(InfoStatName statName) {
31 |         this.statName = statName;
32 |         this.value = null;
33 |     }
34 | 
35 |     /**
36 |      * Creates a new stat snapshot for a given value
37 |      * @param statName the associated stat name
38 |      * @param value the initial value to set
39 |      */
40 |     public SettableInfoStatSnapshot(InfoStatName statName, T value) {
41 |         this.statName = statName;
42 |         this.value = value;
43 |     }
44 | 
45 |     /**
46 |      * Converts to fields xContent, including stat metadata
47 |      *
48 |      * @param builder XContentBuilder
49 |      * @param params Params
50 |      * @return XContentBuilder
51 |      * @throws IOException thrown by builder for invalid field
52 |      */
53 |     @Override
54 |     public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
55 |         builder.startObject();
56 |         builder.field(StatSnapshot.VALUE_FIELD, getValue());
57 |         builder.field(StatSnapshot.STAT_TYPE_FIELD, statName.getStatType().getTypeString());
58 |         builder.endObject();
59 |         return builder;
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/transport/NeuralStatsAction.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.transport;
 6 | 
 7 | import org.opensearch.action.ActionType;
 8 | import org.opensearch.core.common.io.stream.Writeable;
 9 | 
10 | /**
11 |  * NeuralStatsAction class
12 |  */
13 | public class NeuralStatsAction extends ActionType<NeuralStatsResponse> {
14 | 
15 |     public static final NeuralStatsAction INSTANCE = new NeuralStatsAction();
16 |     public static final String NAME = "cluster:admin/neural_stats_action";
17 | 
18 |     /**
19 |      * Constructor
20 |      */
21 |     private NeuralStatsAction() {
22 |         super(NAME, NeuralStatsResponse::new);
23 |     }
24 | 
25 |     @Override
26 |     public Writeable.Reader<NeuralStatsResponse> getResponseReader() {
27 |         return NeuralStatsResponse::new;
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/transport/NeuralStatsNodeRequest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.transport;
 6 | 
 7 | import lombok.Getter;
 8 | import org.opensearch.core.common.io.stream.StreamInput;
 9 | import org.opensearch.core.common.io.stream.StreamOutput;
10 | import org.opensearch.transport.TransportRequest;
11 | 
12 | import java.io.IOException;
13 | 
14 | /**
15 |  *  NeuralStatsNodeRequest represents the request to an individual node
16 |  */
17 | public class NeuralStatsNodeRequest extends TransportRequest {
18 |     @Getter
19 |     private NeuralStatsRequest request;
20 | 
21 |     /**
22 |      * Constructor
23 |      */
24 |     public NeuralStatsNodeRequest() {
25 |         super();
26 |     }
27 | 
28 |     /**
29 |      * Constructor
30 |      *
31 |      * @param in input stream
32 |      * @throws IOException in case of I/O errors
33 |      */
34 |     public NeuralStatsNodeRequest(StreamInput in) throws IOException {
35 |         super(in);
36 |         request = new NeuralStatsRequest(in);
37 |     }
38 | 
39 |     /**
40 |      * Constructor
41 |      *
42 |      * @param request NeuralStatsRequest
43 |      */
44 |     public NeuralStatsNodeRequest(NeuralStatsRequest request) {
45 |         this.request = request;
46 |     }
47 | 
48 |     @Override
49 |     public void writeTo(StreamOutput out) throws IOException {
50 |         super.writeTo(out);
51 |         request.writeTo(out);
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/transport/NeuralStatsRequest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.transport;
 6 | 
 7 | import lombok.Getter;
 8 | import org.opensearch.action.support.nodes.BaseNodesRequest;
 9 | import org.opensearch.core.common.io.stream.StreamInput;
10 | import org.opensearch.core.common.io.stream.StreamOutput;
11 | import org.opensearch.neuralsearch.stats.NeuralStatsInput;
12 | 
13 | import java.io.IOException;
14 | 
15 | /**
16 |  * NeuralStatsRequest gets node (cluster) level Stats for Neural
17 |  * By default, all parameters will be true
18 |  */
19 | public class NeuralStatsRequest extends BaseNodesRequest<NeuralStatsRequest> {
20 | 
21 |     /**
22 |      * Key indicating all stats should be retrieved
23 |      */
24 |     @Getter
25 |     private final NeuralStatsInput neuralStatsInput;
26 | 
27 |     /**
28 |      * Empty constructor needed for NeuralStatsTransportAction
29 |      */
30 |     public NeuralStatsRequest() {
31 |         super((String[]) null);
32 |         this.neuralStatsInput = new NeuralStatsInput();
33 |     }
34 | 
35 |     /**
36 |      * Constructor
37 |      *
38 |      * @param in input stream
39 |      * @throws IOException in case of I/O errors
40 |      */
41 |     public NeuralStatsRequest(StreamInput in) throws IOException {
42 |         super(in);
43 |         this.neuralStatsInput = new NeuralStatsInput(in);
44 |     }
45 | 
46 |     /**
47 |      * Constructor
48 |      *
49 |      * @param nodeIds NodeIDs from which to retrieve stats
50 |      */
51 |     public NeuralStatsRequest(String[] nodeIds, NeuralStatsInput neuralStatsInput) {
52 |         super(nodeIds);
53 |         this.neuralStatsInput = neuralStatsInput;
54 |     }
55 | 
56 |     @Override
57 |     public void writeTo(StreamOutput out) throws IOException {
58 |         super.writeTo(out);
59 |         neuralStatsInput.writeTo(out);
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/util/HybridQueryUtil.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.util;
 6 | 
 7 | import lombok.AccessLevel;
 8 | import lombok.NoArgsConstructor;
 9 | import org.apache.lucene.search.BooleanQuery;
10 | import org.apache.lucene.search.Query;
11 | import org.opensearch.index.search.NestedHelper;
12 | import org.opensearch.neuralsearch.query.HybridQuery;
13 | import org.opensearch.search.internal.SearchContext;
14 | 
15 | import java.util.Objects;
16 | 
17 | /**
18 |  * Utility class for anything related to hybrid query
19 |  */
20 | @NoArgsConstructor(access = AccessLevel.PRIVATE)
21 | public class HybridQueryUtil {
22 | 
23 |     /**
24 |      * This method validates whether the query object is an instance of hybrid query
25 |      */
26 |     public static boolean isHybridQuery(final Query query, final SearchContext searchContext) {
27 |         if (query instanceof HybridQuery
28 |             || (Objects.nonNull(searchContext.parsedQuery()) && searchContext.parsedQuery().query() instanceof HybridQuery)) {
29 |             return true;
30 |         }
31 |         return false;
32 |     }
33 | 
34 |     private static boolean hasNestedFieldOrNestedDocs(final Query query, final SearchContext searchContext) {
35 |         return searchContext.mapperService().hasNested() && new NestedHelper(searchContext.mapperService()).mightMatchNestedDocs(query);
36 |     }
37 | 
38 |     private static boolean isWrappedHybridQuery(final Query query) {
39 |         return query instanceof BooleanQuery
40 |             && ((BooleanQuery) query).clauses().stream().anyMatch(clauseQuery -> clauseQuery.query() instanceof HybridQuery);
41 |     }
42 | 
43 |     private static boolean hasAliasFilter(final Query query, final SearchContext searchContext) {
44 |         return Objects.nonNull(searchContext.aliasFilter());
45 |     }
46 | 
47 |     /**
48 |      * This method checks whether hybrid query is wrapped under boolean query object
49 |      */
50 |     public static boolean isHybridQueryWrappedInBooleanQuery(final SearchContext searchContext, final Query query) {
51 |         return ((hasAliasFilter(query, searchContext) || hasNestedFieldOrNestedDocs(query, searchContext))
52 |             && isWrappedHybridQuery(query)
53 |             && !((BooleanQuery) query).clauses().isEmpty());
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/util/PipelineServiceUtil.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.util;
 6 | 
 7 | import com.google.common.annotations.VisibleForTesting;
 8 | import lombok.extern.log4j.Log4j2;
 9 | import org.opensearch.cluster.service.ClusterService;
10 | import org.opensearch.ingest.IngestService;
11 | import org.opensearch.search.pipeline.PipelineConfiguration;
12 | import org.opensearch.search.pipeline.SearchPipelineService;
13 | 
14 | import java.util.List;
15 | import java.util.Map;
16 | import java.util.stream.Collectors;
17 | 
18 | /**
19 |  * Class abstracts information related to ingest and search pipelines
20 |  */
21 | @Log4j2
22 | public class PipelineServiceUtil {
23 |     private ClusterService clusterService;
24 | 
25 |     /**
26 |      * Constructor
27 |      * @param clusterService
28 |      */
29 |     public PipelineServiceUtil(ClusterService clusterService) {
30 |         this.clusterService = clusterService;
31 |     }
32 | 
33 |     /**
34 |      * Returns list of search pipeline configs
35 |      * @return list of search pipeline configs
36 |      */
37 |     public List<Map<String, Object>> getSearchPipelineConfigs() {
38 |         List<Map<String, Object>> pipelineConfigs = getSearchPipelines().stream()
39 |             .map(PipelineConfiguration::getConfigAsMap)
40 |             .collect(Collectors.toList());
41 | 
42 |         return pipelineConfigs;
43 |     }
44 | 
45 |     /**
46 |      * Returns list of ingest pipeline configs
47 |      * @return list of ingest pipeline configs
48 |      */
49 |     public List<Map<String, Object>> getIngestPipelineConfigs() {
50 |         List<Map<String, Object>> pipelineConfigs = getIngestPipelines().stream()
51 |             .map(org.opensearch.ingest.PipelineConfiguration::getConfigAsMap)
52 |             .collect(Collectors.toList());
53 | 
54 |         return pipelineConfigs;
55 |     }
56 | 
57 |     @VisibleForTesting
58 |     protected List<org.opensearch.ingest.PipelineConfiguration> getIngestPipelines() {
59 |         return IngestService.getPipelines(clusterService.state());
60 |     }
61 | 
62 |     @VisibleForTesting
63 |     protected List<org.opensearch.search.pipeline.PipelineConfiguration> getSearchPipelines() {
64 |         return SearchPipelineService.getPipelines(clusterService.state());
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/java/org/opensearch/neuralsearch/util/prune/PruneType.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.util.prune;
 6 | 
 7 | import org.apache.commons.lang.StringUtils;
 8 | 
 9 | import java.util.Arrays;
10 | import java.util.Locale;
11 | import java.util.Map;
12 | import java.util.function.Function;
13 | import java.util.stream.Collectors;
14 | 
15 | /**
16 |  * Enum representing different types of prune methods for sparse vectors
17 |  */
18 | public enum PruneType {
19 |     NONE("none"),
20 |     TOP_K("top_k"),
21 |     ALPHA_MASS("alpha_mass"),
22 |     MAX_RATIO("max_ratio"),
23 |     ABS_VALUE("abs_value");
24 | 
25 |     private final String value;
26 |     private static final Map<String, PruneType> VALUE_MAP = Arrays.stream(values())
27 |         .collect(Collectors.toUnmodifiableMap(status -> status.value, Function.identity()));
28 | 
29 |     PruneType(String value) {
30 |         this.value = value;
31 |     }
32 | 
33 |     public String getValue() {
34 |         return value;
35 |     }
36 | 
37 |     /**
38 |      * Get PruneType from string value
39 |      *
40 |      * @param value string representation of prune type
41 |      * @return corresponding PruneType enum
42 |      * @throws IllegalArgumentException if value doesn't match any prune type
43 |      */
44 |     public static PruneType fromString(final String value) {
45 |         if (StringUtils.isEmpty(value)) return NONE;
46 |         PruneType type = VALUE_MAP.get(value);
47 |         if (type == null) {
48 |             throw new IllegalArgumentException(String.format(Locale.ROOT, "Unknown prune type: %s", value));
49 |         }
50 |         return type;
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/plugin-metadata/plugin-security.policy:
--------------------------------------------------------------------------------
1 | grant {
2 |     //ml-commons client
3 |     permission java.lang.RuntimePermission "getClassLoader";
4 |     permission java.lang.RuntimePermission "accessDeclaredMembers";
5 |     permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
6 |     permission java.lang.RuntimePermission "setContextClassLoader";
7 |     
8 | };
9 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/NeuralSearchIT.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch;
 6 | 
 7 | import java.io.IOException;
 8 | 
 9 | import org.apache.hc.core5.http.ParseException;
10 | import org.apache.hc.core5.http.io.entity.EntityUtils;
11 | import org.junit.Assert;
12 | import org.opensearch.client.Request;
13 | import org.opensearch.client.Response;
14 | import org.opensearch.rest.RestRequest;
15 | 
16 | public class NeuralSearchIT extends OpenSearchSecureRestTestCase {
17 |     private static final String NEURAL_SEARCH_PLUGIN_NAME = "neural-search";
18 | 
19 |     public void testNeuralSearchPluginInstalled() throws IOException, ParseException {
20 |         final Request request = new Request(RestRequest.Method.GET.name(), String.join("/", "_cat", "plugins"));
21 |         final Response response = client().performRequest(request);
22 |         assertOK(response);
23 | 
24 |         final String responseBody = EntityUtils.toString(response.getEntity());
25 |         Assert.assertNotNull(responseBody);
26 |         Assert.assertTrue(responseBody.contains(NEURAL_SEARCH_PLUGIN_NAME));
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/NeuralSearchTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch;
 6 | 
 7 | import org.opensearch.knn.common.KNNConstants;
 8 | import org.opensearch.knn.index.engine.KNNEngine;
 9 | import org.opensearch.test.OpenSearchTestCase;
10 | 
11 | public class NeuralSearchTests extends OpenSearchTestCase {
12 | 
13 |     public void testValidateKNNDependency() {
14 |         assertEquals(KNNConstants.LUCENE_NAME, KNNEngine.LUCENE.getName());
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/common/VectorUtilTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.common;
 6 | 
 7 | import java.util.Collections;
 8 | import java.util.List;
 9 | 
10 | import org.opensearch.test.OpenSearchTestCase;
11 | 
12 | public class VectorUtilTests extends OpenSearchTestCase {
13 | 
14 |     public void testVectorAsListToArray() {
15 |         List<Number> vectorAsList_withThreeElements = List.of(1.3f, 2.5f, 3.5f);
16 |         float[] vectorAsArray_withThreeElements = VectorUtil.vectorAsListToArray(vectorAsList_withThreeElements);
17 | 
18 |         assertEquals(vectorAsList_withThreeElements.size(), vectorAsArray_withThreeElements.length);
19 |         for (int i = 0; i < vectorAsList_withThreeElements.size(); i++) {
20 |             assertEquals(vectorAsList_withThreeElements.get(i).floatValue(), vectorAsArray_withThreeElements[i], 0.0f);
21 |         }
22 | 
23 |         List<Number> vectorAsList_withNoElements = Collections.emptyList();
24 |         float[] vectorAsArray_withNoElements = VectorUtil.vectorAsListToArray(vectorAsList_withNoElements);
25 |         assertEquals(0, vectorAsArray_withNoElements.length);
26 |     }
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/constants/TestCommonConstants.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.constants;
 6 | 
 7 | import java.util.List;
 8 | import java.util.Map;
 9 | 
10 | import lombok.AccessLevel;
11 | import lombok.NoArgsConstructor;
12 | import org.opensearch.neuralsearch.processor.MapInferenceRequest;
13 | import org.opensearch.neuralsearch.processor.SimilarityInferenceRequest;
14 | import org.opensearch.neuralsearch.processor.TextInferenceRequest;
15 | 
16 | @NoArgsConstructor(access = AccessLevel.PRIVATE)
17 | public class TestCommonConstants {
18 |     public static final String MODEL_ID = "modeId";
19 |     public static final List<String> TARGET_RESPONSE_FILTERS = List.of("sentence_embedding");
20 |     public static final Float[] PREDICT_VECTOR_ARRAY = new Float[] { 2.0f, 3.0f };
21 |     public static final List<String> SENTENCES_LIST = List.of("it is sunny today", "roses are red");
22 |     public static final Map<String, String> SENTENCES_MAP = Map.of("inputText", "Text query", "inputImage", "base641234567890");
23 | 
24 |     public static final String QUERY_TEST = "is it sunny";
25 | 
26 |     public static final TextInferenceRequest TEXT_INFERENCE_REQUEST = TextInferenceRequest.builder()
27 |         .modelId(MODEL_ID)
28 |         .inputTexts(SENTENCES_LIST)
29 |         .build();
30 | 
31 |     public static final MapInferenceRequest MAP_INFERENCE_REQUEST = MapInferenceRequest.builder()
32 |         .modelId(MODEL_ID)
33 |         .inputObjects(SENTENCES_MAP)
34 |         .build();
35 | 
36 |     public static final SimilarityInferenceRequest SIMILARITY_INFERENCE_REQUEST = SimilarityInferenceRequest.builder()
37 |         .modelId(MODEL_ID)
38 |         .inputTexts(SENTENCES_LIST)
39 |         .queryText(QUERY_TEST)
40 |         .build();
41 | }
42 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/executors/HybridQueryExecutorIT.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.executors;
 6 | 
 7 | import org.apache.hc.core5.http.ParseException;
 8 | import org.apache.hc.core5.http.io.entity.EntityUtils;
 9 | import org.junit.Assert;
10 | import org.opensearch.client.Request;
11 | import org.opensearch.client.Response;
12 | import org.opensearch.neuralsearch.OpenSearchSecureRestTestCase;
13 | import org.opensearch.rest.RestRequest;
14 | 
15 | import java.io.IOException;
16 | 
17 | import static org.opensearch.neuralsearch.executors.HybridQueryExecutor.getThreadPoolName;
18 | 
19 | public class HybridQueryExecutorIT extends OpenSearchSecureRestTestCase {
20 | 
21 |     public void testHybridQueryExecutorThreadIsInitialized() throws IOException, ParseException {
22 |         final Request request = new Request(RestRequest.Method.GET.name(), String.join("/", "_cat", "thread_pool", getThreadPoolName()));
23 |         final Response response = client().performRequest(request);
24 |         assertOK(response);
25 | 
26 |         final String responseBody = EntityUtils.toString(response.getEntity());
27 |         Assert.assertNotNull(responseBody);
28 |         Assert.assertTrue(responseBody.contains(getThreadPoolName()));
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactoryTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.processor.chunker;
 6 | 
 7 | import org.mockito.Mock;
 8 | import org.opensearch.index.analysis.AnalysisRegistry;
 9 | import org.opensearch.test.OpenSearchTestCase;
10 | 
11 | import java.util.HashMap;
12 | import java.util.Map;
13 | 
14 | import static org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker.ANALYSIS_REGISTRY_FIELD;
15 | 
16 | public class ChunkerFactoryTests extends OpenSearchTestCase {
17 | 
18 |     @Mock
19 |     private AnalysisRegistry analysisRegistry;
20 | 
21 |     public void testCreate_FixedTokenLength() {
22 |         Chunker chunker = ChunkerFactory.create(FixedTokenLengthChunker.ALGORITHM_NAME, createChunkParameters());
23 |         assertNotNull(chunker);
24 |         assert (chunker instanceof FixedTokenLengthChunker);
25 |     }
26 | 
27 |     public void testCreate_Delimiter() {
28 |         Chunker chunker = ChunkerFactory.create(DelimiterChunker.ALGORITHM_NAME, createChunkParameters());
29 |         assertNotNull(chunker);
30 |         assert (chunker instanceof DelimiterChunker);
31 |     }
32 | 
33 |     public void testCreate_Invalid() {
34 |         String invalidChunkerName = "Invalid Chunker Algorithm";
35 |         assertThrows(NullPointerException.class, () -> ChunkerFactory.create(invalidChunkerName, createChunkParameters()));
36 |     }
37 | 
38 |     private Map<String, Object> createChunkParameters() {
39 |         Map<String, Object> parameters = new HashMap<>();
40 |         parameters.put(ANALYSIS_REGISTRY_FIELD, analysisRegistry);
41 |         return parameters;
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/query/HybridSubQueryScorerTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.query;
 6 | 
 7 | import org.opensearch.test.OpenSearchTestCase;
 8 | 
 9 | public class HybridSubQueryScorerTests extends OpenSearchTestCase {
10 | 
11 |     private static final int NUM_SUB_QUERIES = 2;
12 | 
13 |     public void testGetSubQueryScores_whenInitialized_thenReturnCorrectSize() {
14 |         HybridSubQueryScorer scorer = new HybridSubQueryScorer(NUM_SUB_QUERIES);
15 |         float[] scores = scorer.getSubQueryScores();
16 | 
17 |         assertEquals(NUM_SUB_QUERIES, scores.length);
18 |         assertEquals(NUM_SUB_QUERIES, scorer.getNumOfSubQueries());
19 |     }
20 | 
21 |     public void testResetScores_whenScoresSet_thenAllScoresZero() {
22 |         HybridSubQueryScorer scorer = new HybridSubQueryScorer(NUM_SUB_QUERIES);
23 |         float[] scores = scorer.getSubQueryScores();
24 |         scores[0] = 0.5f;
25 |         scores[1] = 1.0f;
26 | 
27 |         scorer.resetScores();
28 | 
29 |         // verify all scores are reset to 0
30 |         for (float score : scorer.getSubQueryScores()) {
31 |             assertEquals(0.0f, score, 0.0f);
32 |         }
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/query/NeuralKNNQueryTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.query;
 6 | 
 7 | import org.apache.lucene.search.Query;
 8 | import org.opensearch.test.OpenSearchTestCase;
 9 | 
10 | import java.io.IOException;
11 | 
12 | import static org.mockito.ArgumentMatchers.any;
13 | import static org.mockito.ArgumentMatchers.anyFloat;
14 | import static org.mockito.Mockito.mock;
15 | import static org.mockito.Mockito.verify;
16 | import static org.mockito.Mockito.when;
17 | 
18 | public class NeuralKNNQueryTests extends OpenSearchTestCase {
19 | 
20 |     public void testNeuralKNNQuery() throws IOException {
21 |         Query mockKnnQuery = mock(Query.class);
22 |         String originalQueryText = "test query";
23 |         NeuralKNNQuery query = new NeuralKNNQuery(mockKnnQuery, originalQueryText);
24 | 
25 |         // Test toString
26 |         when(mockKnnQuery.toString("field")).thenReturn("test_query");
27 |         assertEquals("toString should delegate to underlying query", "test_query", query.toString("field"));
28 | 
29 |         // Test createWeight
30 |         when(mockKnnQuery.createWeight(any(), any(), anyFloat())).thenReturn(null);
31 |         query.createWeight(null, null, 1.0f);
32 |         verify(mockKnnQuery).createWeight(any(), any(), anyFloat());
33 | 
34 |         // Test equals and hashCode
35 |         NeuralKNNQuery query2 = new NeuralKNNQuery(mockKnnQuery, originalQueryText);
36 |         assertEquals("Same underlying query should be equal", query, query2);
37 |         assertEquals("Same underlying query should have same hash code", query.hashCode(), query2.hashCode());
38 | 
39 |         // Test originalQueryText getter
40 |         assertEquals("Original query text should match", originalQueryText, query.getOriginalQueryText());
41 | 
42 |         // Test not equals with different originalQueryText
43 |         NeuralKNNQuery query3 = new NeuralKNNQuery(mockKnnQuery, "different query");
44 |         assertNotEquals("Different original query text should not be equal", query, query3);
45 |         assertNotEquals("Different original query text should have different hash code", query.hashCode(), query3.hashCode());
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/search/HitsThresholdCheckerTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.search;
 6 | 
 7 | import java.util.stream.IntStream;
 8 | 
 9 | import org.apache.lucene.search.ScoreMode;
10 | import org.opensearch.neuralsearch.query.OpenSearchQueryTestCase;
11 | 
12 | public class HitsThresholdCheckerTests extends OpenSearchQueryTestCase {
13 | 
14 |     public void testThresholdReached_whenIncrementCount_thenThresholdReached() {
15 |         HitsThresholdChecker hitsThresholdChecker = new HitsThresholdChecker(5);
16 |         assertEquals(5, hitsThresholdChecker.getTotalHitsThreshold());
17 |         assertEquals(ScoreMode.TOP_SCORES, hitsThresholdChecker.scoreMode());
18 |         assertFalse(hitsThresholdChecker.isThresholdReached());
19 |         hitsThresholdChecker.incrementHitCount();
20 |         assertFalse(hitsThresholdChecker.isThresholdReached());
21 |         IntStream.rangeClosed(1, 5).forEach((checker) -> hitsThresholdChecker.incrementHitCount());
22 |         assertTrue(hitsThresholdChecker.isThresholdReached());
23 |     }
24 | 
25 |     public void testThresholdLimit_whenThresholdNegative_thenFail() {
26 |         expectThrows(IllegalArgumentException.class, () -> new HitsThresholdChecker(-1));
27 |     }
28 | 
29 |     public void testTrackThreshold_whenTrackThresholdSet_thenSuccessful() {
30 |         HitsThresholdChecker hitsThresholdChecker = new HitsThresholdChecker(Integer.MAX_VALUE);
31 |         assertEquals(ScoreMode.TOP_SCORES, hitsThresholdChecker.scoreMode());
32 |         assertFalse(hitsThresholdChecker.isThresholdReached());
33 |         hitsThresholdChecker.incrementHitCount();
34 |         assertFalse(hitsThresholdChecker.isThresholdReached());
35 |         IntStream.rangeClosed(1, 5).forEach((checker) -> hitsThresholdChecker.incrementHitCount());
36 |         assertFalse(hitsThresholdChecker.isThresholdReached());
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/search/HybridDisiWrapperTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.search;
 6 | 
 7 | import org.apache.lucene.search.DocIdSetIterator;
 8 | import org.apache.lucene.search.Scorer;
 9 | import org.opensearch.neuralsearch.query.OpenSearchQueryTestCase;
10 | 
11 | import static org.mockito.Mockito.mock;
12 | import static org.mockito.Mockito.when;
13 | 
14 | public class HybridDisiWrapperTests extends OpenSearchQueryTestCase {
15 | 
16 |     public void testSubQueryIndex_whenCreateNewInstanceAndSetIndex_thenSuccessful() {
17 |         Scorer scorer = mock(Scorer.class);
18 |         DocIdSetIterator docIdSetIterator = mock(DocIdSetIterator.class);
19 |         when(scorer.iterator()).thenReturn(docIdSetIterator);
20 |         int subQueryIndex = 2;
21 |         HybridDisiWrapper hybridDisiWrapper = new HybridDisiWrapper(scorer, subQueryIndex);
22 |         assertEquals(2, hybridDisiWrapper.getSubQueryIndex());
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/search/collector/HybridCollectorTestCase.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.search.collector;
 6 | 
 7 | import org.apache.lucene.search.LeafCollector;
 8 | import org.opensearch.neuralsearch.query.HybridSubQueryScorer;
 9 | import org.opensearch.neuralsearch.query.OpenSearchQueryTestCase;
10 | 
11 | import java.io.IOException;
12 | import java.util.List;
13 | 
14 | /**
15 |  * Base class for HybridCollector test cases
16 |  */
17 | public class HybridCollectorTestCase extends OpenSearchQueryTestCase {
18 |     /**
19 |      * Collect docs and scores for each sub-query scorer and add them to the leaf collector
20 |      * @param scorer HybridSubQueryScorer object
21 |      * @param scores1 List of scores for the first sub-query
22 |      * @param leafCollector LeafCollector object
23 |      * @param subQueryIndex Index of the sub-query
24 |      * @param docsIds Array of document IDs
25 |      * @throws IOException
26 |      */
27 |     void collectDocsAndScores(
28 |         HybridSubQueryScorer scorer,
29 |         List<Float> scores1,
30 |         LeafCollector leafCollector,
31 |         int subQueryIndex,
32 |         int[] docsIds
33 |     ) throws IOException {
34 |         for (int i = 0; i < docsIds.length; i++) {
35 |             scorer.getSubQueryScores()[subQueryIndex] = scores1.get(i);
36 |             leafCollector.collect(docsIds[i]);
37 |             scorer.resetScores();
38 |         }
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/stats/info/CountableInfoStatSnapshotTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.stats.info;
 6 | 
 7 | import org.opensearch.common.xcontent.json.JsonXContent;
 8 | import org.opensearch.core.xcontent.ToXContent;
 9 | import org.opensearch.core.xcontent.XContentBuilder;
10 | import org.opensearch.neuralsearch.stats.common.StatSnapshot;
11 | import org.opensearch.test.OpenSearchTestCase;
12 | 
13 | import java.io.IOException;
14 | import java.util.Map;
15 | 
16 | import static org.opensearch.neuralsearch.util.TestUtils.xContentBuilderToMap;
17 | 
18 | public class CountableInfoStatSnapshotTests extends OpenSearchTestCase {
19 |     private static final InfoStatName STAT_NAME = InfoStatName.TEXT_EMBEDDING_PROCESSORS;
20 | 
21 |     public void test_increment() {
22 |         CountableInfoStatSnapshot snapshot = new CountableInfoStatSnapshot(STAT_NAME);
23 |         assertEquals(0L, snapshot.getValue().longValue());
24 |         snapshot.incrementBy(5L);
25 |         assertEquals(5L, snapshot.getValue().longValue());
26 |         snapshot.incrementBy(3L);
27 |         assertEquals(8L, snapshot.getValue().longValue());
28 |     }
29 | 
30 |     public void test_toXContent() throws IOException {
31 |         CountableInfoStatSnapshot snapshot = new CountableInfoStatSnapshot(STAT_NAME);
32 |         snapshot.incrementBy(8675309L);
33 | 
34 |         XContentBuilder builder = JsonXContent.contentBuilder();
35 |         snapshot.toXContent(builder, ToXContent.EMPTY_PARAMS);
36 | 
37 |         Map<String, Object> responseMap = xContentBuilderToMap(builder);
38 | 
39 |         assertEquals(8675309, responseMap.get(StatSnapshot.VALUE_FIELD));
40 |         assertEquals(STAT_NAME.getStatType().getTypeString(), responseMap.get(StatSnapshot.STAT_TYPE_FIELD));
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/stats/info/SettableInfoStatSnapshotTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.stats.info;
 6 | 
 7 | import org.opensearch.common.xcontent.json.JsonXContent;
 8 | import org.opensearch.core.xcontent.ToXContent;
 9 | import org.opensearch.core.xcontent.XContentBuilder;
10 | import org.opensearch.neuralsearch.stats.common.StatSnapshot;
11 | import org.opensearch.test.OpenSearchTestCase;
12 | 
13 | import java.io.IOException;
14 | import java.util.Map;
15 | 
16 | import static org.opensearch.neuralsearch.util.TestUtils.xContentBuilderToMap;
17 | 
18 | public class SettableInfoStatSnapshotTests extends OpenSearchTestCase {
19 | 
20 |     private static final InfoStatName STAT_NAME = InfoStatName.CLUSTER_VERSION;
21 |     private static final String SETTABLE_VALUE = "test-value";
22 | 
23 |     public void test_constructorWithoutValue() {
24 |         SettableInfoStatSnapshot<String> snapshot = new SettableInfoStatSnapshot<>(STAT_NAME);
25 |         assertNull(snapshot.getValue());
26 |     }
27 | 
28 |     public void test_constructorWithValue() {
29 |         SettableInfoStatSnapshot<String> snapshot = new SettableInfoStatSnapshot<>(STAT_NAME, SETTABLE_VALUE);
30 |         assertEquals(SETTABLE_VALUE, snapshot.getValue());
31 |     }
32 | 
33 |     public void test_setValueUpdates() {
34 |         SettableInfoStatSnapshot<String> snapshot = new SettableInfoStatSnapshot<>(STAT_NAME);
35 |         snapshot.setValue("new-value");
36 |         assertEquals("new-value", snapshot.getValue());
37 |     }
38 | 
39 |     public void test_toXContent() throws IOException {
40 |         SettableInfoStatSnapshot<String> snapshot = new SettableInfoStatSnapshot<>(STAT_NAME, SETTABLE_VALUE);
41 |         XContentBuilder builder = JsonXContent.contentBuilder();
42 |         snapshot.toXContent(builder, ToXContent.EMPTY_PARAMS);
43 | 
44 |         Map<String, Object> responseMap = xContentBuilderToMap(builder);
45 | 
46 |         assertEquals(SETTABLE_VALUE, responseMap.get(StatSnapshot.VALUE_FIELD));
47 |         assertEquals(STAT_NAME.getStatType().getTypeString(), responseMap.get(StatSnapshot.STAT_TYPE_FIELD));
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/util/PipelineServiceUtilTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.util;
 6 | 
 7 | import org.opensearch.cluster.service.ClusterService;
 8 | import org.opensearch.test.OpenSearchTestCase;
 9 | 
10 | import java.util.Collections;
11 | import java.util.List;
12 | import java.util.Map;
13 | 
14 | import static org.mockito.Mockito.doReturn;
15 | import static org.mockito.Mockito.mock;
16 | import static org.mockito.Mockito.spy;
17 | import static org.mockito.Mockito.times;
18 | import static org.mockito.Mockito.verify;
19 | 
20 | public class PipelineServiceUtilTests extends OpenSearchTestCase {
21 |     public void test_getIngestPipelineConfigs_returnsEmptyList() {
22 |         ClusterService mockClusterService = mock(ClusterService.class);
23 |         PipelineServiceUtil utilSpy = spy(new PipelineServiceUtil(mockClusterService));
24 | 
25 |         doReturn(Collections.emptyList()).when(utilSpy).getIngestPipelines();
26 | 
27 |         List<Map<String, Object>> configs = utilSpy.getIngestPipelineConfigs();
28 | 
29 |         verify(utilSpy, times(1)).getIngestPipelines();
30 |         assertTrue(configs.isEmpty());
31 |     }
32 | 
33 |     public void test_getSearchPipelineConfigs_returnsEmptyList() {
34 |         ClusterService mockClusterService = mock(ClusterService.class);
35 |         PipelineServiceUtil utilSpy = spy(new PipelineServiceUtil(mockClusterService));
36 | 
37 |         doReturn(Collections.emptyList()).when(utilSpy).getSearchPipelines();
38 | 
39 |         List<Map<String, Object>> configs = utilSpy.getSearchPipelineConfigs();
40 | 
41 |         verify(utilSpy, times(1)).getSearchPipelines();
42 |         assertTrue(configs.isEmpty());
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/test/java/org/opensearch/neuralsearch/util/prune/PruneTypeTests.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.util.prune;
 6 | 
 7 | import org.opensearch.test.OpenSearchTestCase;
 8 | 
 9 | public class PruneTypeTests extends OpenSearchTestCase {
10 |     public void testGetValue() {
11 |         assertEquals("none", PruneType.NONE.getValue());
12 |         assertEquals("top_k", PruneType.TOP_K.getValue());
13 |         assertEquals("alpha_mass", PruneType.ALPHA_MASS.getValue());
14 |         assertEquals("max_ratio", PruneType.MAX_RATIO.getValue());
15 |         assertEquals("abs_value", PruneType.ABS_VALUE.getValue());
16 |     }
17 | 
18 |     public void testFromString() {
19 |         assertEquals(PruneType.NONE, PruneType.fromString("none"));
20 |         assertEquals(PruneType.NONE, PruneType.fromString(null));
21 |         assertEquals(PruneType.NONE, PruneType.fromString(""));
22 |         assertEquals(PruneType.TOP_K, PruneType.fromString("top_k"));
23 |         assertEquals(PruneType.ALPHA_MASS, PruneType.fromString("alpha_mass"));
24 |         assertEquals(PruneType.MAX_RATIO, PruneType.fromString("max_ratio"));
25 |         assertEquals(PruneType.ABS_VALUE, PruneType.fromString("abs_value"));
26 | 
27 |         IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> PruneType.fromString("test_value"));
28 |         assertEquals("Unknown prune type: test_value", exception.getMessage());
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/test/resources/highlight/UploadSentenceHighlightingModelRequestBody.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "sentence_highlighting_qa_model",
 3 |   "version": "1.0.0",
 4 |   "function_name": "QUESTION_ANSWERING",
 5 |   "description": "Sentence highlighting question answering model for testing",
 6 |   "model_format": "TORCH_SCRIPT",
 7 |   "model_group_id": "%s",
 8 |   "model_content_hash_value": "15e97d44ca59f6cd3e977398e38a9cea401eb87f360b92ca9dd8b30afd41f926",
 9 |   "url": "https://github.com/opensearch-project/ml-commons/blob/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/question_answering/sentence_highlighting_qa_model_pt.zip?raw=true",
10 |   "model_config": {
11 |     "model_type": "sentence_highlighting",
12 |     "framework_type": "huggingface_transformers"
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/test/resources/mapper/mappingWithNestedSemanticFields.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "products": {
 3 |     "type": "nested",
 4 |     "properties": {
 5 |       "product_description": {
 6 |         "type": "semantic",
 7 |         "model_id": "dummy model id"
 8 |       },
 9 |       "price": {
10 |         "type": "number"
11 |       }
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/test/resources/mappingtransformer/transformedMappingMultipleSemanticFields.json:
--------------------------------------------------------------------------------
 1 | 
 2 | {
 3 |   "properties": {
 4 |     "inter_field": {
 5 |       "properties": {
 6 |         "semantic_field_1": {
 7 |           "model_id": "textEmbeddingModelId",
 8 |           "type": "semantic",
 9 |           "chunking": true
10 |         },
11 |         "semantic_field_1_semantic_info": {
12 |           "properties": {
13 |             "chunks": {
14 |               "type": "nested",
15 |               "properties": {
16 |                 "embedding": {
17 |                   "type": "knn_vector",
18 |                   "method": {
19 |                     "space_type": "l2",
20 |                     "name": "hnsw"
21 |                   },
22 |                   "dimension": 768
23 |                 },
24 |                 "text": {
25 |                   "type": "text"
26 |                 }
27 |               }
28 |             },
29 |             "model": {
30 |               "properties": {
31 |                 "id": {
32 |                   "type": "text",
33 |                   "index": false
34 |                 },
35 |                 "type": {
36 |                   "type": "text",
37 |                   "index": false
38 |                 },
39 |                 "name": {
40 |                   "type": "text",
41 |                   "index": false
42 |                 }
43 |               }
44 |             }
45 |           }
46 |         }
47 |       }
48 |     },
49 |     "semantic_field_2": {
50 |       "model_id": "sparseModelId",
51 |       "type": "semantic",
52 |       "semantic_info_field_name": "custom_semantic_info_field"
53 |     },
54 |     "custom_semantic_info_field": {
55 |       "properties": {
56 |         "embedding": {
57 |           "type": "rank_features"
58 |         },
59 |         "model": {
60 |           "properties": {
61 |             "id": {
62 |               "type": "text",
63 |               "index": false
64 |             },
65 |             "type": {
66 |               "type": "text",
67 |               "index": false
68 |             },
69 |             "name": {
70 |               "type": "text",
71 |               "index": false
72 |             }
73 |           }
74 |         }
75 |       }
76 |     }
77 |   }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/CreateModelGroupRequestBody.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "%s",
3 |   "description": "This is a public model group"
4 | }
5 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/NeuralSparseTwoPhaseAndNeuralEnrichProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "request_processors": [
 3 |     {
 4 |       "neural_sparse_two_phase_processor": {
 5 |         "tag": "neural-sparse",
 6 |         "description": "This processor is making two-phase rescorer.",
 7 |         "enabled": true,
 8 |         "two_phase_parameter": {
 9 |           "prune_ratio": %f,
10 |           "expansion_rate": %f,
11 |           "max_window_size": %d
12 |         }
13 |       }
14 |     },
15 |     {
16 |       "neural_query_enricher": {
17 |         "tag": "tag1",
18 |         "description": "This processor is going to set the default model id.",
19 |         "default_model_id": "%s"
20 |       }
21 |     }
22 |   ]
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "request_processors": [
 3 |     {
 4 |       "neural_sparse_two_phase_processor": {
 5 |         "tag": "neural-sparse",
 6 |         "description": "This processor is making two-phase rescorer.",
 7 |         "enabled": true,
 8 |         "two_phase_parameter": {
 9 |           "prune_ratio": %f,
10 |           "expansion_rate": %f,
11 |           "max_window_size": %d
12 |         }
13 |       }
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/PipelineConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text embedding pipeline for hybrid",
 3 |   "processors": [
 4 |     {
 5 |       "text_embedding": {
 6 |         "model_id": "%s",
 7 |         "batch_size": "%d",
 8 |         "field_map": {
 9 |           "title": "title_knn",
10 |           "favor_list": "favor_list_knn",
11 |           "favorites": {
12 |             "game": "game_knn",
13 |             "movie": "movie_knn"
14 |           },
15 |           "nested_passages": {
16 |             "text": "embedding"
17 |           }
18 |         }
19 |       }
20 |     }
21 |   ]
22 | }
23 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/PipelineConfigurationWithBatchSize.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text embedding pipeline for hybrid",
 3 |   "processors": [
 4 |     {
 5 |       "drop": {
 6 |         "if": "ctx.text.contains('drop')"
 7 |       }
 8 |     },
 9 |     {
10 |       "fail": {
11 |         "if": "ctx.text.contains('fail')",
12 |         "message": "fail"
13 |       }
14 |     },
15 |     {
16 |       "text_embedding": {
17 |         "model_id": "%s",
18 |         "batch_size": 2,
19 |         "field_map": {
20 |           "title": "title_knn",
21 |           "favor_list": "favor_list_knn",
22 |           "favorites": {
23 |             "game": "game_knn",
24 |             "movie": "movie_knn"
25 |           },
26 |           "nested_passages": {
27 |             "text": "embedding"
28 |           }
29 |         }
30 |       }
31 |     }
32 |   ]
33 | }
34 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/PipelineConfigurationWithBatchSizeWithSkipExisting.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text embedding pipeline for hybrid",
 3 |   "processors": [
 4 |     {
 5 |       "drop": {
 6 |         "if": "ctx.text.contains('drop')"
 7 |       }
 8 |     },
 9 |     {
10 |       "fail": {
11 |         "if": "ctx.text.contains('fail')",
12 |         "message": "fail"
13 |       }
14 |     },
15 |     {
16 |       "text_embedding": {
17 |         "model_id": "%s",
18 |         "batch_size": 2,
19 |         "field_map": {
20 |           "title": "title_knn",
21 |           "favor_list": "favor_list_knn",
22 |           "favorites": {
23 |             "game": "game_knn",
24 |             "movie": "movie_knn"
25 |           },
26 |           "nested_passages": {
27 |             "text": "embedding"
28 |           }
29 |         },
30 |         "skip_existing": true
31 |       }
32 |     }
33 |   ]
34 | }
35 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/PipelineConfigurationWithNestedFieldsMapping.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text embedding pipeline for hybrid",
 3 |   "processors": [
 4 |     {
 5 |       "text_embedding": {
 6 |         "model_id": "%s",
 7 |         "field_map": {
 8 |           "title": "title_knn",
 9 |           "favor_list": "favor_list_knn",
10 |           "favorites": {
11 |             "game": "game_knn",
12 |             "movie": "movie_knn"
13 |           },
14 |           "nested_passages.level_2.level_3_text": "level_3_container.level_3_embedding"
15 |         }
16 |       }
17 |     }
18 |   ]
19 | }
20 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/PipelineConfigurationWithNestedFieldsMappingWithSkipExisting.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text embedding pipeline for hybrid",
 3 |   "processors": [
 4 |     {
 5 |       "text_embedding": {
 6 |         "model_id": "%s",
 7 |         "field_map": {
 8 |           "title": "title_knn",
 9 |           "favor_list": "favor_list_knn",
10 |           "favorites": {
11 |             "game": "game_knn",
12 |             "movie": "movie_knn"
13 |           },
14 |           "nested_passages.level_2.level_3_text": "level_3_container.level_3_embedding"
15 |         },
16 |         "skip_existing": true
17 |       }
18 |     }
19 |   ]
20 | }
21 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/PipelineConfigurationWithSkipExisting.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text embedding pipeline for optimized inference call",
 3 |   "processors": [
 4 |     {
 5 |       "text_embedding": {
 6 |         "model_id": "%s",
 7 |         "batch_size": "%d",
 8 |         "field_map": {
 9 |           "title": "title_knn",
10 |           "favor_list": "favor_list_knn",
11 |           "favorites": {
12 |             "game": "game_knn",
13 |             "movie": "movie_knn"
14 |           },
15 |           "nested_passages": {
16 |             "text": "embedding"
17 |           }
18 |         },
19 |         "skip_existing": true
20 |       }
21 |     }
22 |   ]
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/PipelineForTextImageEmbeddingProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text image embedding pipeline",
 3 |   "processors": [
 4 |     {
 5 |       "text_image_embedding": {
 6 |         "model_id": "%s",
 7 |         "embedding": "passage_embedding",
 8 |         "field_map": {
 9 |           "text": "passage_text",
10 |           "image": "passage_image"
11 |         }
12 |       }
13 |     }
14 |   ]
15 | }
16 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/PipelineForTextImageEmbeddingWithSkipExistingProcessorConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "text image embedding pipeline",
 3 |   "processors": [
 4 |     {
 5 |       "text_image_embedding": {
 6 |         "model_id": "%s",
 7 |         "embedding": "passage_embedding",
 8 |         "field_map": {
 9 |           "text": "passage_text",
10 |           "image": "passage_image"
11 |         },
12 |         "skip_existing": true
13 |       }
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/ReRankByFieldPipelineConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "Pipeline for reranking ByField",
 3 |   "response_processors": [
 4 |     {
 5 |         "rerank": {
 6 |             "by_field": {
 7 |                 "target_field": "%s",
 8 |                 "remove_target_field": "%s",
 9 |                 "keep_previous_score": "%s"
10 |             }
11 |         }
12 |     }
13 |   ]
14 | }
15 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/RerankMLOpenSearchPipelineConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Pipeline for reranking with a cross encoder",
 3 |     "response_processors": [
 4 |         {
 5 |             "rerank": {
 6 |                 "ml_opensearch": {
 7 |                     "model_id": "%s"
 8 |                 },
 9 |                 "context": {
10 |                     "document_fields": ["text_representation"]
11 |                 }
12 |             }
13 |         }
14 |     ]
15 | }
16 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/SearchRequestPipelineConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "request_processors": [
 3 |     {
 4 |       "neural_query_enricher": {
 5 |         "tag": "tag1",
 6 |         "description": "This processor is going to restrict to publicly visible documents",
 7 |         "default_model_id": "%s"
 8 |       }
 9 |     }
10 |   ]
11 | }
12 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/SparseEncodingIndexMappings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "settings":{
 3 |     "default_pipeline": "pipeline-sparse-encoding"
 4 |   },
 5 |   "mappings": {
 6 |     "properties": {
 7 |       "title_sparse": {
 8 |         "type": "rank_features"
 9 |       },
10 |       "favor_list_sparse": {
11 |         "type": "nested",
12 |         "properties":{
13 |           "sparse_encoding":{
14 |             "type": "rank_features"
15 |           }
16 |         }
17 |       },
18 |       "favorites.game_sparse": {
19 |         "type": "rank_features"
20 |       },
21 |       "favorites.movie_sparse": {
22 |         "type": "rank_features"
23 |       }
24 |     }
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/SparseEncodingPipelineConfiguration.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An example sparse Encoding pipeline",
 3 |   "processors" : [
 4 |     {
 5 |       "sparse_encoding": {
 6 |         "model_id": "%s",
 7 |         "batch_size": "%d",
 8 |         "field_map": {
 9 |           "title": "title_sparse",
10 |           "favor_list": "favor_list_sparse",
11 |           "favorites": {
12 |             "game": "game_sparse",
13 |             "movie": "movie_sparse"
14 |           }
15 |         }
16 |       }
17 |     }
18 |   ]
19 | }
20 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/SparseEncodingPipelineConfigurationWithPrune.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An example sparse Encoding pipeline",
 3 |   "processors" : [
 4 |     {
 5 |       "sparse_encoding": {
 6 |         "model_id": "%s",
 7 |         "batch_size": "%d",
 8 |         "prune_type": "max_ratio",
 9 |         "prune_ratio": 0.8,
10 |         "field_map": {
11 |           "title": "title_sparse",
12 |           "favor_list": "favor_list_sparse",
13 |           "favorites": {
14 |             "game": "game_sparse",
15 |             "movie": "movie_sparse"
16 |           }
17 |         }
18 |       }
19 |     }
20 |   ]
21 | }
22 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/SparseEncodingPipelineConfigurationWithSkipExisting.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An example sparse Encoding pipeline",
 3 |   "processors" : [
 4 |     {
 5 |       "sparse_encoding": {
 6 |         "model_id": "%s",
 7 |         "batch_size": "%d",
 8 |         "field_map": {
 9 |           "title": "title_sparse",
10 |           "favor_list": "favor_list_sparse",
11 |           "favorites": {
12 |             "game": "game_sparse",
13 |             "movie": "movie_sparse"
14 |           }
15 |         },
16 |         "skip_existing": true
17 |       }
18 |     }
19 |   ]
20 | }
21 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/UploadModelRequestBody.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "traced_small_model",
 3 |   "version": "1.0.0",
 4 |   "model_format": "TORCH_SCRIPT",
 5 |   "function_name": "TEXT_EMBEDDING",
 6 |   "model_task_type": "text_embedding",
 7 |   "model_content_hash_value": "e13b74006290a9d0f58c1376f9629d4ebc05a0f9385f40db837452b167ae9021",
 8 |   "model_group_id": "%s",
 9 |   "model_config": {
10 |     "model_type": "bert",
11 |     "embedding_dimension": 768,
12 |     "framework_type": "sentence_transformers",
13 |     "all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}"
14 |   },
15 |   "url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true"
16 | }
17 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "amazon/neural-sparse/opensearch-neural-sparse-tokenizer-v1",
3 |   "version": "1.0.1",
4 |   "model_group_id": "%s",
5 |   "model_format": "TORCH_SCRIPT"
6 | }
7 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/UploadTextSimilarityModelRequestBody.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "ms-marco-TinyBERT-L-2-v2",
 3 |     "version": "1.0.0",
 4 |     "function_name": "TEXT_SIMILARITY",
 5 |     "description": "test model",
 6 |     "model_format": "TORCH_SCRIPT",
 7 |     "model_group_id": "%s",
 8 |     "model_content_hash_value": "90e39a926101d1a4e542aade0794319404689b12acfd5d7e65c03d91c668b5cf",
 9 |     "model_config": {
10 |         "model_type": "bert",
11 |         "embedding_dimension": 1,
12 |         "framework_type": "huggingface_transformers",
13 |         "all_config": "nobody will read this"
14 |     },
15 |     "url": "https://github.com/opensearch-project/ml-commons/blob/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_similarity/TinyBERT-CE-torch_script.zip?raw=true"
16 | }
17 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/bulk_item_template.json:
--------------------------------------------------------------------------------
1 | { "index": { "_index": "{{index}}", "_id": "{{id}}" } },
2 | {{doc}}
3 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/chunker/PipelineForCascadedChunker.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An example cascaded pipeline with fixed token length algorithm after chunking algorithm",
 3 |   "processors" : [
 4 |     {
 5 |       "text_chunking": {
 6 |         "field_map": {
 7 |           "body": "body_chunk_intermediate"
 8 |         },
 9 |         "algorithm": {
10 |           "delimiter": {
11 |             "delimiter": "."
12 |           }
13 |         }
14 |       }
15 |     },
16 |     {
17 |       "text_chunking": {
18 |         "field_map": {
19 |           "body_chunk_intermediate": "body_chunk"
20 |         },
21 |         "algorithm": {
22 |           "fixed_token_length": {
23 |             "token_limit": 10
24 |           }
25 |         }
26 |       }
27 |     }
28 |   ]
29 | }
30 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/chunker/PipelineForDelimiterChunker.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An example delimiter chunker pipeline",
 3 |   "processors" : [
 4 |     {
 5 |       "text_chunking": {
 6 |         "field_map": {
 7 |           "body": "body_chunk"
 8 |         },
 9 |         "algorithm": {
10 |           "delimiter": {
11 |             "delimiter": "."
12 |           }
13 |         }
14 |       }
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/chunker/PipelineForFixedTokenLengthChunkerWithLetterTokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An example fixed token length chunker pipeline with letter tokenizer",
 3 |   "processors" : [
 4 |     {
 5 |       "text_chunking": {
 6 |         "field_map": {
 7 |           "body": "body_chunk"
 8 |         },
 9 |         "algorithm": {
10 |           "fixed_token_length": {
11 |             "token_limit": 10,
12 |             "tokenizer": "letter"
13 |           }
14 |         }
15 |       }
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/chunker/PipelineForFixedTokenLengthChunkerWithLowercaseTokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An example fixed token length chunker pipeline with lowercase tokenizer",
 3 |   "processors" : [
 4 |     {
 5 |       "text_chunking": {
 6 |         "field_map": {
 7 |           "body": "body_chunk"
 8 |         },
 9 |         "algorithm": {
10 |           "fixed_token_length": {
11 |             "token_limit": 10,
12 |             "tokenizer": "lowercase"
13 |           }
14 |         }
15 |       }
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/chunker/PipelineForFixedTokenLengthChunkerWithStandardTokenizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "An example fixed token length chunker pipeline with standard tokenizer",
 3 |   "processors" : [
 4 |     {
 5 |       "text_chunking": {
 6 |         "field_map": {
 7 |           "body": "body_chunk"
 8 |         },
 9 |         "algorithm": {
10 |           "fixed_token_length": {
11 |             "token_limit": 10,
12 |             "tokenizer": "standard"
13 |           }
14 |         }
15 |       }
16 |     }
17 |   ]
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/chunker/TextChunkingIndexSettings.json:
--------------------------------------------------------------------------------
1 | {
2 |   "settings":{
3 |     "index.analyze.max_token_count" : 100,
4 |     "default_pipeline": "%s"
5 |   }
6 | }
7 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/chunker/TextChunkingTestDocument.json:
--------------------------------------------------------------------------------
1 | {
2 |   "body": "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch."
3 | }
4 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/chunker/TextChunkingTestLongDocument.json:
--------------------------------------------------------------------------------
1 | {
2 |   "body": "This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch. This is an example long document to be chunked. The document has more than 100 tokens by standard tokenizer in OpenSearch."
3 | }
4 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/ingest_doc1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "This is a good day",
 3 |   "text": "%s",
 4 |   "description": "daily logging",
 5 |   "favor_list": [
 6 |     "test",
 7 |     "hello",
 8 |     "mock"
 9 |   ],
10 |   "favorites": {
11 |     "game": "overwatch",
12 |     "movie": null
13 |   },
14 |   "nested_passages": [
15 |     {
16 |       "text_not_for_embedding": "test"
17 |     },
18 |     {
19 |       "text": "hello"
20 |     },
21 |     {
22 |       "text": "world"
23 |     }
24 |   ]
25 | }
26 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/ingest_doc2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "this is a second doc",
 3 |   "text": "%s",
 4 |   "description": "the description is not very long",
 5 |   "favor_list": [
 6 |     "favor"
 7 |   ],
 8 |   "favorites": {
 9 |     "game": "golden state",
10 |     "movie": null
11 |   },
12 |   "nested_passages": [
13 |     {
14 |       "text_not_for_embedding": "test"
15 |     },
16 |     {
17 |       "text": "apple"
18 |     },
19 |     {
20 |       "text": "banana"
21 |     }
22 |   ]
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/ingest_doc3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "This is a good day",
 3 |   "description": "daily logging",
 4 |   "favor_list": [
 5 |     "test",
 6 |     "hello",
 7 |     "mock"
 8 |   ],
 9 |   "favorites": {
10 |     "game": "overwatch",
11 |     "movie": null
12 |   },
13 |   "nested_passages":
14 |     {
15 |       "level_2":
16 |           {
17 |             "level_3_text": "hello",
18 |             "level_3_container": {
19 |               "level_4_text_field": "abc"
20 |             }
21 |           }
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/ingest_doc4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "This is a good day",
 3 |   "description": "daily logging",
 4 |   "favor_list": [
 5 |     "key",
 6 |     "hey",
 7 |     "click"
 8 |   ],
 9 |   "favorites": {
10 |     "game": "cossacks",
11 |     "movie": "matrix"
12 |   },
13 |   "nested_passages":
14 |     {
15 |       "level_2":
16 |           {
17 |             "level_3_text": "clown"
18 |           }
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/ingest_doc5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "This is a good day",
 3 |   "description": "daily logging",
 4 |   "favor_list": [
 5 |     "key",
 6 |     "hey",
 7 |     "click"
 8 |   ],
 9 |   "favorites": {
10 |     "game": "cossacks",
11 |     "movie": "matrix"
12 |   },
13 |   "nested_passages":[
14 |     {
15 |       "level_2":
16 |       {
17 |         "level_3_text": "clown"
18 |       }
19 |     },
20 |     {
21 |       "level_2.level_3_text": "batman"
22 |     }
23 |   ]
24 | }
25 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/semantic/ingest_doc1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "geo_data": "dummy_geo_data_1",
 3 |   "products": [
 4 |     {
 5 |       "product_description": "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch."
 6 |     },
 7 |     {
 8 |       "product_description": "dummy_product_description"
 9 |     }
10 |   ]
11 | }
12 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/semantic/ingest_doc2.json:
--------------------------------------------------------------------------------
1 | {
2 |   "geo_data": "dummy_geo_data_1",
3 |   "products": [
4 |     {
5 |       "product_description": "dummy_product_description"
6 |     }
7 |   ]
8 | }
9 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/semantic/ingest_doc3.json:
--------------------------------------------------------------------------------
1 | {
2 |   "geo_data": "dummy_geo_data_1"
3 | }
4 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/semantic/ingested_doc1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_routing" : "routing",
 3 |   "geo_data_semantic_info" : {
 4 |     "embedding" : {
 5 |       "dummy token" : 1.0
 6 |     },
 7 |     "model" : {
 8 |       "name" : "SPARSE_ENCODING",
 9 |       "id" : "dummy_model_id_2",
10 |       "type" : "SPARSE_ENCODING"
11 |     }
12 |   },
13 |   "_version_type" : "internal",
14 |   "_index" : "index",
15 |   "geo_data" : "dummy_geo_data_1",
16 |   "_id" : "1",
17 |   "_version" : 1,
18 |   "products" : [ {
19 |     "product_description" : "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.",
20 |     "product_description_semantic_info" : {
21 |       "chunks" : [ {
22 |         "text" : "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.This is an example ",
23 |         "embedding" : [ 2.0 ]
24 |       }, {
25 |         "text" : "24 tokens by standard tokenizer in OpenSearch.This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.",
26 |         "embedding" : [ 1.0 ]
27 |       } ],
28 |       "model" : {
29 |         "name" : "TEXT_EMBEDDING",
30 |         "id" : "dummy_model_id_1",
31 |         "type" : "TEXT_EMBEDDING"
32 |       }
33 |     }
34 |   }, {
35 |     "product_description" : "dummy_product_description",
36 |     "product_description_semantic_info" : {
37 |       "chunks" : [ {
38 |         "text" : "dummy_product_description",
39 |         "embedding" : [ 3.0 ]
40 |       } ],
41 |       "model" : {
42 |         "name" : "TEXT_EMBEDDING",
43 |         "id" : "dummy_model_id_1",
44 |         "type" : "TEXT_EMBEDDING"
45 |       }
46 |     }
47 |   } ]
48 | }
49 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/semantic/ingested_doc2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_routing" : "routing",
 3 |   "geo_data_semantic_info" : {
 4 |     "embedding" : {
 5 |       "dummy token" : 1.0
 6 |     },
 7 |     "model" : {
 8 |       "name" : "SPARSE_ENCODING",
 9 |       "id" : "dummy_model_id_2",
10 |       "type" : "SPARSE_ENCODING"
11 |     }
12 |   },
13 |   "_version_type" : "internal",
14 |   "_index" : "index",
15 |   "geo_data" : "dummy_geo_data_1",
16 |   "_id" : "2",
17 |   "_version" : 1,
18 |   "products" : [ {
19 |     "product_description" : "dummy_product_description",
20 |     "product_description_semantic_info" : {
21 |       "chunks" : [ {
22 |         "text" : "dummy_product_description",
23 |         "embedding" : [ 3.0 ]
24 |       } ],
25 |       "model" : {
26 |         "name" : "TEXT_EMBEDDING",
27 |         "id" : "dummy_model_id_1",
28 |         "type" : "TEXT_EMBEDDING"
29 |       }
30 |     }
31 |   } ]
32 | }
33 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/semantic/ingested_doc3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_routing" : "routing",
 3 |   "geo_data_semantic_info" : {
 4 |     "embedding" : {
 5 |       "dummy token" : 1.0
 6 |     },
 7 |     "model" : {
 8 |       "name" : "SPARSE_ENCODING",
 9 |       "id" : "dummy_model_id_2",
10 |       "type" : "SPARSE_ENCODING"
11 |     }
12 |   },
13 |   "_version_type" : "internal",
14 |   "_index" : "index",
15 |   "geo_data" : "dummy_geo_data_1",
16 |   "_id" : "2",
17 |   "_version" : 1
18 | }
19 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/semantic/invalid_ingest_doc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "geo_data": "dummy_geo_data_1",
 3 |   "products": [
 4 |     {
 5 |       "product_description": {
 6 |         "invalid": "not a string"
 7 |       }
 8 |     }
 9 |   ]
10 | }
11 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/update_doc1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "This is a good day",
 3 |   "text": "%s",
 4 |   "description": "daily logging",
 5 |   "favor_list": [
 6 |     "test",
 7 |     "hello",
 8 |     "mock"
 9 |   ],
10 |   "favorites": {
11 |     "game": "overwatch",
12 |     "movie": null
13 |   },
14 |   "nested_passages": [
15 |     {
16 |       "text_not_for_embedding": "test"
17 |     },
18 |     {
19 |       "text": "bye"
20 |     },
21 |     {
22 |       "text": "world"
23 |     }
24 |   ]
25 | }
26 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/update_doc2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "this is a second doc",
 3 |   "text": "%s",
 4 |   "description": "the description is not very long",
 5 |   "favor_list": [
 6 |     "favor"
 7 |   ],
 8 |   "favorites": {
 9 |     "game": "silver state",
10 |     "movie": null
11 |   },
12 |   "nested_passages": [
13 |     {
14 |       "text_not_for_embedding": "test"
15 |     },
16 |     {
17 |       "text": "apple"
18 |     },
19 |     {
20 |       "text": "banana"
21 |     }
22 |   ]
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/update_doc3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "This is a good day",
 3 |   "description": "daily logging",
 4 |   "favor_list": [
 5 |     "test",
 6 |     "hello",
 7 |     "mock"
 8 |   ],
 9 |   "favorites": {
10 |     "game": "overwatch",
11 |     "movie": null
12 |   },
13 |   "nested_passages":
14 |   {
15 |     "level_2":
16 |     {
17 |       "level_3_text": "hello",
18 |       "level_3_container": {
19 |         "level_4_text_field": "def"
20 |       }
21 |     }
22 |   }
23 | }
24 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/update_doc4.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "This is a good day",
 3 |   "description": "daily logging",
 4 |   "favor_list": [
 5 |     "key",
 6 |     "hey",
 7 |     "click"
 8 |   ],
 9 |   "favorites": {
10 |     "game": "cossacks",
11 |     "movie": "matrix"
12 |   },
13 |   "nested_passages":
14 |   {
15 |     "level_2":
16 |     {
17 |       "level_3_text": "joker"
18 |     }
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/test/resources/processor/update_doc5.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "This is a good day",
 3 |   "description": "daily logging",
 4 |   "favor_list": [
 5 |     "key",
 6 |     "hey",
 7 |     "click"
 8 |   ],
 9 |   "favorites": {
10 |     "game": "cossacks",
11 |     "movie": "matrix"
12 |   },
13 |   "nested_passages":[
14 |     {
15 |       "level_2":
16 |       {
17 |         "level_3_text": "joker"
18 |       }
19 |     },
20 |     {
21 |       "level_2.level_3_text": "superman"
22 |     }
23 |   ]
24 | }
25 | 


--------------------------------------------------------------------------------
/src/testFixtures/java/org/opensearch/neuralsearch/util/AggregationsTestUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.util;
 6 | 
 7 | import java.util.List;
 8 | import java.util.Map;
 9 | 
10 | /**
11 |  * Util class for routines associated with aggregations testing
12 |  */
13 | public class AggregationsTestUtils {
14 | 
15 |     public static List<Map<String, Object>> getNestedHits(Map<String, Object> searchResponseAsMap) {
16 |         Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
17 |         return (List<Map<String, Object>>) hitsMap.get("hits");
18 |     }
19 | 
20 |     public static Map<String, Object> getTotalHits(Map<String, Object> searchResponseAsMap) {
21 |         Map<String, Object> hitsMap = (Map<String, Object>) searchResponseAsMap.get("hits");
22 |         return (Map<String, Object>) hitsMap.get("total");
23 |     }
24 | 
25 |     public static Map<String, Object> getAggregations(final Map<String, Object> searchResponseAsMap) {
26 |         Map<String, Object> aggsMap = (Map<String, Object>) searchResponseAsMap.get("aggregations");
27 |         return aggsMap;
28 |     }
29 | 
30 |     public static <T> T getAggregationValue(final Map<String, Object> aggsMap, final String aggName) {
31 |         Map<String, Object> aggValues = (Map<String, Object>) aggsMap.get(aggName);
32 |         return (T) aggValues.get("value");
33 |     }
34 | 
35 |     public static <T> T getAggregationBuckets(final Map<String, Object> aggsMap, final String aggName) {
36 |         Map<String, Object> aggValues = (Map<String, Object>) aggsMap.get(aggName);
37 |         return (T) aggValues.get("buckets");
38 |     }
39 | 
40 |     public static <T> T getAggregationValues(final Map<String, Object> aggsMap, final String aggName) {
41 |         return (T) aggsMap.get(aggName);
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/testFixtures/java/org/opensearch/neuralsearch/util/BatchIngestionUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.util;
 6 | 
 7 | import java.util.ArrayList;
 8 | import java.util.Arrays;
 9 | import java.util.HashMap;
10 | import java.util.List;
11 | import java.util.Map;
12 | 
13 | /**
14 |  * A helper class to build docs for bulk request which is used by batch ingestion tests.
15 |  */
16 | public class BatchIngestionUtils {
17 |     private static final List<String> TEXTS = Arrays.asList(
18 |         "hello",
19 |         "world",
20 |         "an apple",
21 |         "find me",
22 |         "birdy",
23 |         "flying piggy",
24 |         "newspaper",
25 |         "dynamic programming",
26 |         "random text",
27 |         "finally"
28 |     );
29 | 
30 |     public static List<Map<String, String>> prepareDataForBulkIngestion(int startId, int count) {
31 |         List<Map<String, String>> docs = new ArrayList<>();
32 |         for (int i = startId; i < startId + count; ++i) {
33 |             Map<String, String> params = new HashMap<>();
34 |             params.put("id", Integer.toString(i));
35 |             params.put("text", TEXTS.get(i % TEXTS.size()));
36 |             docs.add(params);
37 |         }
38 |         return docs;
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/testFixtures/java/org/opensearch/neuralsearch/util/NeuralSearchClusterTestUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright OpenSearch Contributors
 3 |  * SPDX-License-Identifier: Apache-2.0
 4 |  */
 5 | package org.opensearch.neuralsearch.util;
 6 | 
 7 | import static org.mockito.Mockito.mock;
 8 | import static org.mockito.Mockito.when;
 9 | 
10 | import org.opensearch.Version;
11 | import org.opensearch.cluster.ClusterState;
12 | import org.opensearch.cluster.metadata.IndexNameExpressionResolver;
13 | import org.opensearch.cluster.node.DiscoveryNodes;
14 | import org.opensearch.cluster.service.ClusterService;
15 | import org.opensearch.common.settings.Settings;
16 | import org.opensearch.common.util.concurrent.ThreadContext;
17 | 
18 | public class NeuralSearchClusterTestUtils {
19 | 
20 |     /**
21 |      * Create new mock for ClusterService
22 |      * @param version min version for cluster nodes
23 |      * @return
24 |      */
25 |     public static ClusterService mockClusterService(final Version version) {
26 |         ClusterService clusterService = mock(ClusterService.class);
27 |         ClusterState clusterState = mock(ClusterState.class);
28 |         when(clusterService.state()).thenReturn(clusterState);
29 |         DiscoveryNodes discoveryNodes = mock(DiscoveryNodes.class);
30 |         when(clusterState.getNodes()).thenReturn(discoveryNodes);
31 |         when(discoveryNodes.getMinNodeVersion()).thenReturn(version);
32 |         return clusterService;
33 |     }
34 | 
35 |     /**
36 |      * Set up a simple NeuralSearchClusterUtil instance with a specified version.
37 |      */
38 |     public static void setUpClusterService(Version version) {
39 |         ClusterService clusterService = NeuralSearchClusterTestUtils.mockClusterService(version);
40 |         IndexNameExpressionResolver indexNameExpressionResolver = new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY));
41 |         NeuralSearchClusterUtil.instance().initialize(clusterService, indexNameExpressionResolver);
42 |     }
43 | 
44 |     /**
45 |      * Set up a simple NeuralSearchClusterUtil instance with current version.
46 |      */
47 |     public static void setUpClusterService() {
48 |         setUpClusterService(Version.CURRENT);
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------