├── LICENSE
├── NOTICE
├── README.md
├── pom.xml
├── src
├── main
│ ├── java
│ │ └── org
│ │ │ └── apache
│ │ │ └── datasketches
│ │ │ ├── common
│ │ │ ├── ArrayOfBooleansSerDe.java
│ │ │ ├── ArrayOfDoublesSerDe.java
│ │ │ ├── ArrayOfItemsSerDe.java
│ │ │ ├── ArrayOfLongsSerDe.java
│ │ │ ├── ArrayOfNumbersSerDe.java
│ │ │ ├── ArrayOfStringsSerDe.java
│ │ │ ├── ArrayOfUtf16StringsSerDe.java
│ │ │ ├── BoundsOnBinomialProportions.java
│ │ │ ├── BoundsOnRatiosInSampledSets.java
│ │ │ ├── ByteArrayUtil.java
│ │ │ ├── Family.java
│ │ │ ├── MemoryStatus.java
│ │ │ ├── ResizeFactor.java
│ │ │ ├── SketchesArgumentException.java
│ │ │ ├── SketchesException.java
│ │ │ ├── SketchesReadOnlyException.java
│ │ │ ├── SketchesStateException.java
│ │ │ ├── SuppressFBWarnings.java
│ │ │ ├── Util.java
│ │ │ └── package-info.java
│ │ │ ├── cpc
│ │ │ ├── BitMatrix.java
│ │ │ ├── CompressedState.java
│ │ │ ├── CompressionCharacterization.java
│ │ │ ├── CompressionData.java
│ │ │ ├── CpcCompression.java
│ │ │ ├── CpcConfidence.java
│ │ │ ├── CpcSketch.java
│ │ │ ├── CpcUnion.java
│ │ │ ├── CpcUtil.java
│ │ │ ├── CpcWrapper.java
│ │ │ ├── Flavor.java
│ │ │ ├── Format.java
│ │ │ ├── IconEstimator.java
│ │ │ ├── IconPolynomialCoefficients.java
│ │ │ ├── MergingValidation.java
│ │ │ ├── PairTable.java
│ │ │ ├── PreambleUtil.java
│ │ │ ├── QuickMergingValidation.java
│ │ │ ├── RuntimeAsserts.java
│ │ │ ├── StreamingValidation.java
│ │ │ ├── TestUtil.java
│ │ │ └── package-info.java
│ │ │ ├── fdt
│ │ │ ├── FdtSketch.java
│ │ │ ├── Group.java
│ │ │ ├── PostProcessor.java
│ │ │ └── package-info.java
│ │ │ ├── filters
│ │ │ ├── bloomfilter
│ │ │ │ ├── BitArray.java
│ │ │ │ ├── BloomFilter.java
│ │ │ │ ├── BloomFilterBuilder.java
│ │ │ │ ├── DirectBitArray.java
│ │ │ │ ├── DirectBitArrayR.java
│ │ │ │ ├── HeapBitArray.java
│ │ │ │ └── package-info.java
│ │ │ └── package-info.java
│ │ │ ├── frequencies
│ │ │ ├── ErrorType.java
│ │ │ ├── ItemsSketch.java
│ │ │ ├── LongsSketch.java
│ │ │ ├── PreambleUtil.java
│ │ │ ├── ReversePurgeItemHashMap.java
│ │ │ ├── ReversePurgeLongHashMap.java
│ │ │ ├── Util.java
│ │ │ └── package-info.java
│ │ │ ├── hash
│ │ │ ├── MurmurHash3.java
│ │ │ ├── MurmurHash3FFM21.java
│ │ │ ├── XxHash.java
│ │ │ └── package-info.java
│ │ │ ├── hll
│ │ │ ├── AbstractCoupons.java
│ │ │ ├── AbstractHllArray.java
│ │ │ ├── AuxHashMap.java
│ │ │ ├── BaseHllSketch.java
│ │ │ ├── CompositeInterpolationXTable.java
│ │ │ ├── Conversions.java
│ │ │ ├── CouponHashSet.java
│ │ │ ├── CouponList.java
│ │ │ ├── CouponMapping.java
│ │ │ ├── CubicInterpolation.java
│ │ │ ├── CurMode.java
│ │ │ ├── DirectAuxHashMap.java
│ │ │ ├── DirectCouponHashSet.java
│ │ │ ├── DirectCouponList.java
│ │ │ ├── DirectHll4Array.java
│ │ │ ├── DirectHll6Array.java
│ │ │ ├── DirectHll8Array.java
│ │ │ ├── DirectHllArray.java
│ │ │ ├── HarmonicNumbers.java
│ │ │ ├── HeapAuxHashMap.java
│ │ │ ├── Hll4Array.java
│ │ │ ├── Hll4Update.java
│ │ │ ├── Hll6Array.java
│ │ │ ├── Hll8Array.java
│ │ │ ├── HllArray.java
│ │ │ ├── HllEstimators.java
│ │ │ ├── HllPairIterator.java
│ │ │ ├── HllSketch.java
│ │ │ ├── HllSketchImpl.java
│ │ │ ├── HllUtil.java
│ │ │ ├── IntArrayPairIterator.java
│ │ │ ├── IntMemoryPairIterator.java
│ │ │ ├── PairIterator.java
│ │ │ ├── PreambleUtil.java
│ │ │ ├── RelativeErrorTables.java
│ │ │ ├── TgtHllType.java
│ │ │ ├── ToByteArrayImpl.java
│ │ │ ├── Union.java
│ │ │ ├── doc-files
│ │ │ │ ├── HLL_HIP_K12T20U20.png
│ │ │ │ └── HLL_UnionTime4_6_8_Java_CPP.png
│ │ │ └── package-info.java
│ │ │ ├── hllmap
│ │ │ ├── CouponHashMap.java
│ │ │ ├── CouponTraverseMap.java
│ │ │ ├── CouponsIterator.java
│ │ │ ├── HllMap.java
│ │ │ ├── Map.java
│ │ │ ├── SingleCouponMap.java
│ │ │ ├── UniqueCountMap.java
│ │ │ └── package-info.java
│ │ │ ├── kll
│ │ │ ├── KllDirectCompactItemsSketch.java
│ │ │ ├── KllDirectDoublesSketch.java
│ │ │ ├── KllDirectFloatsSketch.java
│ │ │ ├── KllDirectLongsSketch.java
│ │ │ ├── KllDoublesHelper.java
│ │ │ ├── KllDoublesSketch.java
│ │ │ ├── KllDoublesSketchIterator.java
│ │ │ ├── KllFloatsHelper.java
│ │ │ ├── KllFloatsSketch.java
│ │ │ ├── KllFloatsSketchIterator.java
│ │ │ ├── KllHeapDoublesSketch.java
│ │ │ ├── KllHeapFloatsSketch.java
│ │ │ ├── KllHeapItemsSketch.java
│ │ │ ├── KllHeapLongsSketch.java
│ │ │ ├── KllHelper.java
│ │ │ ├── KllItemsHelper.java
│ │ │ ├── KllItemsSketch.java
│ │ │ ├── KllItemsSketchIterator.java
│ │ │ ├── KllLongsHelper.java
│ │ │ ├── KllLongsSketch.java
│ │ │ ├── KllLongsSketchIterator.java
│ │ │ ├── KllMemoryValidate.java
│ │ │ ├── KllPreambleUtil.java
│ │ │ ├── KllSketch.java
│ │ │ ├── KllSketchIterator.java
│ │ │ └── package-info.java
│ │ │ ├── package-info.java
│ │ │ ├── partitions
│ │ │ ├── BoundsRule.java
│ │ │ ├── Partitioner.java
│ │ │ ├── SketchFillRequest.java
│ │ │ └── package-info.java
│ │ │ ├── quantiles
│ │ │ ├── ClassicUtil.java
│ │ │ ├── CompactDoublesSketch.java
│ │ │ ├── DirectCompactDoublesSketch.java
│ │ │ ├── DirectDoublesSketchAccessor.java
│ │ │ ├── DirectUpdateDoublesSketch.java
│ │ │ ├── DirectUpdateDoublesSketchR.java
│ │ │ ├── DoublesArrayAccessor.java
│ │ │ ├── DoublesBufferAccessor.java
│ │ │ ├── DoublesByteArrayImpl.java
│ │ │ ├── DoublesMergeImpl.java
│ │ │ ├── DoublesSketch.java
│ │ │ ├── DoublesSketchAccessor.java
│ │ │ ├── DoublesSketchBuilder.java
│ │ │ ├── DoublesSketchIterator.java
│ │ │ ├── DoublesUnion.java
│ │ │ ├── DoublesUnionBuilder.java
│ │ │ ├── DoublesUnionImpl.java
│ │ │ ├── DoublesUnionImplR.java
│ │ │ ├── DoublesUpdateImpl.java
│ │ │ ├── DoublesUtil.java
│ │ │ ├── HeapCompactDoublesSketch.java
│ │ │ ├── HeapDoublesSketchAccessor.java
│ │ │ ├── HeapUpdateDoublesSketch.java
│ │ │ ├── ItemsByteArrayImpl.java
│ │ │ ├── ItemsMergeImpl.java
│ │ │ ├── ItemsSketch.java
│ │ │ ├── ItemsSketchIterator.java
│ │ │ ├── ItemsUnion.java
│ │ │ ├── ItemsUpdateImpl.java
│ │ │ ├── ItemsUtil.java
│ │ │ ├── PreambleUtil.java
│ │ │ ├── UpdateDoublesSketch.java
│ │ │ └── package-info.java
│ │ │ ├── quantilescommon
│ │ │ ├── BinarySearch.java
│ │ │ ├── DoublesSketchSortedView.java
│ │ │ ├── DoublesSortedView.java
│ │ │ ├── DoublesSortedViewIterator.java
│ │ │ ├── FloatsSketchSortedView.java
│ │ │ ├── FloatsSortedView.java
│ │ │ ├── FloatsSortedViewIterator.java
│ │ │ ├── GenericInequalitySearch.java
│ │ │ ├── GenericPartitionBoundaries.java
│ │ │ ├── GenericSortedView.java
│ │ │ ├── GenericSortedViewIterator.java
│ │ │ ├── IncludeMinMax.java
│ │ │ ├── InequalitySearch.java
│ │ │ ├── ItemsSketchSortedView.java
│ │ │ ├── KolmogorovSmirnov.java
│ │ │ ├── LongsSketchSortedView.java
│ │ │ ├── LongsSortedView.java
│ │ │ ├── LongsSortedViewIterator.java
│ │ │ ├── PartitioningFeature.java
│ │ │ ├── QuantileSearchCriteria.java
│ │ │ ├── QuantilesAPI.java
│ │ │ ├── QuantilesDoublesAPI.java
│ │ │ ├── QuantilesDoublesSketchIterator.java
│ │ │ ├── QuantilesFloatsAPI.java
│ │ │ ├── QuantilesFloatsSketchIterator.java
│ │ │ ├── QuantilesGenericAPI.java
│ │ │ ├── QuantilesGenericSketchIterator.java
│ │ │ ├── QuantilesLongsAPI.java
│ │ │ ├── QuantilesLongsSketchIterator.java
│ │ │ ├── QuantilesSketchIterator.java
│ │ │ ├── QuantilesUtil.java
│ │ │ ├── SketchPartitionLimits.java
│ │ │ ├── SortedView.java
│ │ │ ├── SortedViewIterator.java
│ │ │ └── package-info.java
│ │ │ ├── req
│ │ │ ├── BaseReqSketch.java
│ │ │ ├── FloatBuffer.java
│ │ │ ├── ReqCompactor.java
│ │ │ ├── ReqDebug.java
│ │ │ ├── ReqSerDe.java
│ │ │ ├── ReqSketch.java
│ │ │ ├── ReqSketchBuilder.java
│ │ │ ├── ReqSketchIterator.java
│ │ │ └── package-info.java
│ │ │ ├── sampling
│ │ │ ├── EbppsItemsSample.java
│ │ │ ├── EbppsItemsSketch.java
│ │ │ ├── PreambleUtil.java
│ │ │ ├── ReservoirItemsSketch.java
│ │ │ ├── ReservoirItemsUnion.java
│ │ │ ├── ReservoirLongsSketch.java
│ │ │ ├── ReservoirLongsUnion.java
│ │ │ ├── ReservoirSize.java
│ │ │ ├── SampleSubsetSummary.java
│ │ │ ├── SamplingUtil.java
│ │ │ ├── VarOptItemsSamples.java
│ │ │ ├── VarOptItemsSketch.java
│ │ │ ├── VarOptItemsUnion.java
│ │ │ └── package-info.java
│ │ │ ├── tdigest
│ │ │ ├── BinarySearch.java
│ │ │ ├── Sort.java
│ │ │ ├── TDigestDouble.java
│ │ │ └── package-info.java
│ │ │ ├── theta
│ │ │ ├── AnotB.java
│ │ │ ├── AnotBimpl.java
│ │ │ ├── BitPacking.java
│ │ │ ├── BytesCompactCompressedHashIterator.java
│ │ │ ├── BytesCompactHashIterator.java
│ │ │ ├── CompactOperations.java
│ │ │ ├── CompactSketch.java
│ │ │ ├── ConcurrentBackgroundThetaPropagation.java
│ │ │ ├── ConcurrentDirectQuickSelectSketch.java
│ │ │ ├── ConcurrentHeapQuickSelectSketch.java
│ │ │ ├── ConcurrentHeapThetaBuffer.java
│ │ │ ├── ConcurrentPropagationService.java
│ │ │ ├── ConcurrentSharedThetaSketch.java
│ │ │ ├── DirectCompactCompressedSketch.java
│ │ │ ├── DirectCompactSketch.java
│ │ │ ├── DirectQuickSelectSketch.java
│ │ │ ├── DirectQuickSelectSketchR.java
│ │ │ ├── EmptyCompactSketch.java
│ │ │ ├── ForwardCompatibility.java
│ │ │ ├── HashIterator.java
│ │ │ ├── HeapAlphaSketch.java
│ │ │ ├── HeapCompactHashIterator.java
│ │ │ ├── HeapCompactSketch.java
│ │ │ ├── HeapHashIterator.java
│ │ │ ├── HeapQuickSelectSketch.java
│ │ │ ├── HeapUpdateSketch.java
│ │ │ ├── Intersection.java
│ │ │ ├── IntersectionImpl.java
│ │ │ ├── JaccardSimilarity.java
│ │ │ ├── MemoryCompactCompressedHashIterator.java
│ │ │ ├── MemoryHashIterator.java
│ │ │ ├── PreambleUtil.java
│ │ │ ├── Rebuilder.java
│ │ │ ├── SetOperation.java
│ │ │ ├── SetOperationBuilder.java
│ │ │ ├── SingleItemSketch.java
│ │ │ ├── Sketch.java
│ │ │ ├── Sketches.java
│ │ │ ├── Union.java
│ │ │ ├── UnionImpl.java
│ │ │ ├── UpdateReturnState.java
│ │ │ ├── UpdateSketch.java
│ │ │ ├── UpdateSketchBuilder.java
│ │ │ ├── WrappedCompactCompressedSketch.java
│ │ │ ├── WrappedCompactSketch.java
│ │ │ └── package-info.java
│ │ │ ├── thetacommon
│ │ │ ├── BinomialBoundsN.java
│ │ │ ├── BoundsOnRatiosInThetaSketchedSets.java
│ │ │ ├── BoundsOnRatiosInTupleSketchedSets.java
│ │ │ ├── EquivTables.java
│ │ │ ├── HashOperations.java
│ │ │ ├── QuickSelect.java
│ │ │ ├── SetOperationCornerCases.java
│ │ │ ├── ThetaUtil.java
│ │ │ └── package-info.java
│ │ │ └── tuple
│ │ │ ├── AnotB.java
│ │ │ ├── CompactSketch.java
│ │ │ ├── DeserializeResult.java
│ │ │ ├── Filter.java
│ │ │ ├── HashTables.java
│ │ │ ├── Intersection.java
│ │ │ ├── JaccardSimilarity.java
│ │ │ ├── QuickSelectSketch.java
│ │ │ ├── SerializerDeserializer.java
│ │ │ ├── Sketch.java
│ │ │ ├── Sketches.java
│ │ │ ├── Summary.java
│ │ │ ├── SummaryDeserializer.java
│ │ │ ├── SummaryFactory.java
│ │ │ ├── SummarySetOperations.java
│ │ │ ├── TupleSketchIterator.java
│ │ │ ├── Union.java
│ │ │ ├── UpdatableSketch.java
│ │ │ ├── UpdatableSketchBuilder.java
│ │ │ ├── UpdatableSummary.java
│ │ │ ├── Util.java
│ │ │ ├── adouble
│ │ │ ├── DoubleSketch.java
│ │ │ ├── DoubleSummary.java
│ │ │ ├── DoubleSummaryDeserializer.java
│ │ │ ├── DoubleSummaryFactory.java
│ │ │ ├── DoubleSummarySetOperations.java
│ │ │ └── package-info.java
│ │ │ ├── aninteger
│ │ │ ├── IntegerSketch.java
│ │ │ ├── IntegerSummary.java
│ │ │ ├── IntegerSummaryDeserializer.java
│ │ │ ├── IntegerSummaryFactory.java
│ │ │ ├── IntegerSummarySetOperations.java
│ │ │ └── package-info.java
│ │ │ ├── arrayofdoubles
│ │ │ ├── ArrayOfDoublesAnotB.java
│ │ │ ├── ArrayOfDoublesAnotBImpl.java
│ │ │ ├── ArrayOfDoublesCombiner.java
│ │ │ ├── ArrayOfDoublesCompactSketch.java
│ │ │ ├── ArrayOfDoublesIntersection.java
│ │ │ ├── ArrayOfDoublesQuickSelectSketch.java
│ │ │ ├── ArrayOfDoublesSetOperationBuilder.java
│ │ │ ├── ArrayOfDoublesSketch.java
│ │ │ ├── ArrayOfDoublesSketchIterator.java
│ │ │ ├── ArrayOfDoublesSketches.java
│ │ │ ├── ArrayOfDoublesUnion.java
│ │ │ ├── ArrayOfDoublesUpdatableSketch.java
│ │ │ ├── ArrayOfDoublesUpdatableSketchBuilder.java
│ │ │ ├── DirectArrayOfDoublesCompactSketch.java
│ │ │ ├── DirectArrayOfDoublesIntersection.java
│ │ │ ├── DirectArrayOfDoublesQuickSelectSketch.java
│ │ │ ├── DirectArrayOfDoublesQuickSelectSketchR.java
│ │ │ ├── DirectArrayOfDoublesSketchIterator.java
│ │ │ ├── DirectArrayOfDoublesUnion.java
│ │ │ ├── DirectArrayOfDoublesUnionR.java
│ │ │ ├── HashTables.java
│ │ │ ├── HeapArrayOfDoublesCompactSketch.java
│ │ │ ├── HeapArrayOfDoublesIntersection.java
│ │ │ ├── HeapArrayOfDoublesQuickSelectSketch.java
│ │ │ ├── HeapArrayOfDoublesSketchIterator.java
│ │ │ ├── HeapArrayOfDoublesUnion.java
│ │ │ └── package-info.java
│ │ │ ├── package-info.java
│ │ │ └── strings
│ │ │ ├── ArrayOfStringsSketch.java
│ │ │ ├── ArrayOfStringsSummary.java
│ │ │ ├── ArrayOfStringsSummaryDeserializer.java
│ │ │ ├── ArrayOfStringsSummaryFactory.java
│ │ │ ├── ArrayOfStringsSummarySetOperations.java
│ │ │ └── package-info.java
│ └── javadoc
│ │ ├── overview.html
│ │ └── resources
│ │ └── dictionary.html
└── test
│ ├── java
│ └── org
│ │ └── apache
│ │ └── datasketches
│ │ ├── common
│ │ ├── ArrayOfXSerDeTest.java
│ │ ├── BoundsOnBinomialProportionsTest.java
│ │ ├── BoundsOnRatiosInSampledSetsTest.java
│ │ ├── ByteArrayUtilTest.java
│ │ ├── FamilyTest.java
│ │ ├── Shuffle.java
│ │ ├── ShuffleTest.java
│ │ ├── SketchesExceptionTest.java
│ │ ├── TestUtil.java
│ │ └── UtilTest.java
│ │ ├── cpc
│ │ ├── CompressedStateTest.java
│ │ ├── CompressionDataTest.java
│ │ ├── CpcCompressionTest.java
│ │ ├── CpcSketchCrossLanguageTest.java
│ │ ├── CpcSketchTest.java
│ │ ├── CpcUnionTest.java
│ │ ├── CpcWrapperTest.java
│ │ ├── IconEstimatorTest.java
│ │ ├── PairTableTest.java
│ │ ├── PreambleUtilTest.java
│ │ ├── RuntimeAssertsTest.java
│ │ └── TestAllTest.java
│ │ ├── fdt
│ │ ├── FdtSketchTest.java
│ │ └── GroupTest.java
│ │ ├── filters
│ │ └── bloomfilter
│ │ │ ├── BloomFilterBuilderTest.java
│ │ │ ├── BloomFilterCrossLanguageTest.java
│ │ │ ├── BloomFilterTest.java
│ │ │ ├── DirectBitArrayRTest.java
│ │ │ ├── DirectBitArrayTest.java
│ │ │ └── HeapBitArrayTest.java
│ │ ├── frequencies
│ │ ├── DistTest.java
│ │ ├── FrequentItemsSketchCrossLanguageTest.java
│ │ ├── HashMapStressTest.java
│ │ ├── ItemsSketchTest.java
│ │ ├── LongsSketchTest.java
│ │ ├── ReversePurgeLongHashMapTest.java
│ │ └── SerDeCompatibilityTest.java
│ │ ├── hash
│ │ ├── MurmurHash3FFM21Test.java
│ │ ├── MurmurHash3FFM21bTest.java
│ │ ├── MurmurHash3Test.java
│ │ └── XxHashTest.java
│ │ ├── hll
│ │ ├── AuxHashMapTest.java
│ │ ├── BaseHllSketchTest.java
│ │ ├── CouponListTest.java
│ │ ├── CrossCountingTest.java
│ │ ├── DirectAuxHashMapTest.java
│ │ ├── DirectCouponListTest.java
│ │ ├── DirectHllSketchTest.java
│ │ ├── DirectUnionTest.java
│ │ ├── HllArrayTest.java
│ │ ├── HllSketchCrossLanguageTest.java
│ │ ├── HllSketchTest.java
│ │ ├── IsomorphicTest.java
│ │ ├── PreambleUtilTest.java
│ │ ├── SizeAndModeTransitions.java
│ │ ├── TablesTest.java
│ │ ├── ToFromByteArrayTest.java
│ │ ├── UnionCaseTest.java
│ │ └── UnionTest.java
│ │ ├── hllmap
│ │ ├── CouponHashMapTest.java
│ │ ├── CouponTraverseMapTest.java
│ │ ├── HllMapTest.java
│ │ ├── SingleCouponMapTest.java
│ │ └── UniqueCountMapTest.java
│ │ ├── kll
│ │ ├── KllCrossLanguageTest.java
│ │ ├── KllDirectCompactDoublesSketchTest.java
│ │ ├── KllDirectCompactFloatsSketchTest.java
│ │ ├── KllDirectCompactItemsSketchIteratorTest.java
│ │ ├── KllDirectCompactItemsSketchTest.java
│ │ ├── KllDirectCompactLongsSketchTest.java
│ │ ├── KllDirectDoublesSketchIteratorTest.java
│ │ ├── KllDirectDoublesSketchTest.java
│ │ ├── KllDirectFloatsSketchIteratorTest.java
│ │ ├── KllDirectFloatsSketchTest.java
│ │ ├── KllDirectLongsSketchIteratorTest.java
│ │ ├── KllDirectLongsSketchTest.java
│ │ ├── KllDoublesSketchIteratorTest.java
│ │ ├── KllDoublesSketchSerDeTest.java
│ │ ├── KllDoublesSketchTest.java
│ │ ├── KllDoublesValidationTest.java
│ │ ├── KllFloatsSketchIteratorTest.java
│ │ ├── KllFloatsSketchSerDeTest.java
│ │ ├── KllFloatsSketchTest.java
│ │ ├── KllFloatsValidationTest.java
│ │ ├── KllHelperTest.java
│ │ ├── KllItemsSketchSerDeTest.java
│ │ ├── KllItemsSketchTest.java
│ │ ├── KllItemsSketchiteratorTest.java
│ │ ├── KllLongsSketchIteratorTest.java
│ │ ├── KllLongsSketchSerDeTest.java
│ │ ├── KllLongsSketchTest.java
│ │ ├── KllMemoryValidateTest.java
│ │ ├── KllMiscDirectDoublesTest.java
│ │ ├── KllMiscDirectFloatsTest.java
│ │ ├── KllMiscDirectLongsTest.java
│ │ ├── KllMiscDoublesTest.java
│ │ ├── KllMiscFloatsTest.java
│ │ ├── KllMiscItemsTest.java
│ │ ├── KllMiscLongsTest.java
│ │ └── KllSketchTest.java
│ │ ├── partitions
│ │ ├── ClassicPartitionsTest.java
│ │ ├── ItemsSketchFillRequestLongAsString.java
│ │ ├── KllItemsSketchFillRequestLongAsString.java
│ │ ├── KllPartitionsTest.java
│ │ └── PartitionResults.java
│ │ ├── quantiles
│ │ ├── AccuracyTest.java
│ │ ├── CustomQuantilesTest.java
│ │ ├── DebugUnionTest.java
│ │ ├── DirectCompactDoublesSketchTest.java
│ │ ├── DirectQuantilesMemoryRequestTest.java
│ │ ├── DirectUpdateDoublesSketchTest.java
│ │ ├── DoublesSketchBuilderTest.java
│ │ ├── DoublesSketchIteratorTest.java
│ │ ├── DoublesSketchTest.java
│ │ ├── DoublesUnionBuilderTest.java
│ │ ├── DoublesUnionImplTest.java
│ │ ├── DoublesUtilTest.java
│ │ ├── HeapCompactDoublesSketchTest.java
│ │ ├── HeapUpdateDoublesSketchTest.java
│ │ ├── ItemsSketchIteratorTest.java
│ │ ├── ItemsSketchTest.java
│ │ ├── ItemsUnionTest.java
│ │ ├── PreambleUtilTest.java
│ │ ├── QuantilesSketchCrossLanguageTest.java
│ │ ├── ReadOnlyMemoryTest.java
│ │ ├── SerDeCompatibilityTest.java
│ │ └── UtilTest.java
│ │ ├── quantilescommon
│ │ ├── CrossCheckQuantilesTest.java
│ │ ├── GenericInequalitySearchTest.java
│ │ ├── IncludeMinMaxTest.java
│ │ ├── InequalitySearchTest.java
│ │ ├── KolmogorovSmirnovTest.java
│ │ ├── LinearRanksAndQuantiles.java
│ │ ├── LongsAsOrderableStrings.java
│ │ ├── PartitionBoundariesTest.java
│ │ └── QuantilesUtilTest.java
│ │ ├── req
│ │ ├── ReqCompactorTest.java
│ │ ├── ReqDebugImplTest.java
│ │ ├── ReqFloatBufferTest.java
│ │ ├── ReqSketchBuilderTest.java
│ │ ├── ReqSketchCrossLanguageTest.java
│ │ ├── ReqSketchOtherTest.java
│ │ ├── ReqSketchSortedViewTest.java
│ │ └── ReqSketchTest.java
│ │ ├── sampling
│ │ ├── EbppsItemsSampleTest.java
│ │ ├── EbppsItemsSketchTest.java
│ │ ├── ReservoirItemsSketchTest.java
│ │ ├── ReservoirItemsUnionTest.java
│ │ ├── ReservoirLongsSketchTest.java
│ │ ├── ReservoirLongsUnionTest.java
│ │ ├── ReservoirSizeTest.java
│ │ ├── VarOptCrossLanguageTest.java
│ │ ├── VarOptItemsSamplesTest.java
│ │ ├── VarOptItemsSketchTest.java
│ │ └── VarOptItemsUnionTest.java
│ │ ├── tdigest
│ │ ├── SortTest.java
│ │ ├── TDigestCrossLanguageTest.java
│ │ └── TDigestDoubleTest.java
│ │ ├── theta
│ │ ├── AnotBimplTest.java
│ │ ├── BackwardConversions.java
│ │ ├── BitPackingTest.java
│ │ ├── CompactSketchTest.java
│ │ ├── ConcurrentDirectQuickSelectSketchTest.java
│ │ ├── ConcurrentHeapQuickSelectSketchTest.java
│ │ ├── CornerCaseThetaSetOperationsTest.java
│ │ ├── DirectIntersectionTest.java
│ │ ├── DirectQuickSelectSketchTest.java
│ │ ├── DirectUnionTest.java
│ │ ├── EmptyTest.java
│ │ ├── ExamplesTest.java
│ │ ├── ForwardCompatibilityTest.java
│ │ ├── HeapAlphaSketchTest.java
│ │ ├── HeapIntersectionTest.java
│ │ ├── HeapQuickSelectSketchTest.java
│ │ ├── HeapUnionTest.java
│ │ ├── HeapifyWrapSerVer1and2Test.java
│ │ ├── IteratorTest.java
│ │ ├── JaccardSimilarityTest.java
│ │ ├── PairwiseSetOperationsTest.java
│ │ ├── PreambleUtilTest.java
│ │ ├── ReadOnlyMemoryTest.java
│ │ ├── SetOperationTest.java
│ │ ├── SetOpsCornerCasesTest.java
│ │ ├── SingleItemSketchTest.java
│ │ ├── SketchTest.java
│ │ ├── SketchesTest.java
│ │ ├── ThetaSketchCrossLanguageTest.java
│ │ ├── UnionImplTest.java
│ │ └── UpdateSketchTest.java
│ │ ├── thetacommon
│ │ ├── BinomialBoundsNTest.java
│ │ ├── BoundsOnRatiosInThetaSketchedSetsTest.java
│ │ ├── BoundsOnRatiosInTupleSketchedSetsTest.java
│ │ ├── HashOperationsTest.java
│ │ ├── QuickSelectTest.java
│ │ └── ThetaUtilTest.java
│ │ └── tuple
│ │ ├── CompactSketchWithDoubleSummaryTest.java
│ │ ├── IntegerSummary.java
│ │ ├── IntegerSummaryDeserializer.java
│ │ ├── IntegerSummaryFactory.java
│ │ ├── JaccardSimilarityTest.java
│ │ ├── MiscTest.java
│ │ ├── ReadOnlyMemoryTest.java
│ │ ├── SerializerDeserializerTest.java
│ │ ├── TupleCrossLanguageTest.java
│ │ ├── TupleExamples2Test.java
│ │ ├── TupleExamplesTest.java
│ │ ├── adouble
│ │ ├── AdoubleAnotBTest.java
│ │ ├── AdoubleIntersectionTest.java
│ │ ├── AdoubleTest.java
│ │ ├── AdoubleUnionTest.java
│ │ └── FilterTest.java
│ │ ├── aninteger
│ │ ├── CornerCaseTupleSetOperationsTest.java
│ │ ├── EngagementTest.java
│ │ ├── IntegerSketchTest.java
│ │ ├── MikhailsBugTupleTest.java
│ │ └── ParameterLeakageTest.java
│ │ ├── arrayofdoubles
│ │ ├── AodSketchCrossLanguageTest.java
│ │ ├── ArrayOfDoublesAnotBTest.java
│ │ ├── ArrayOfDoublesCompactSketchTest.java
│ │ ├── ArrayOfDoublesIntersectionTest.java
│ │ ├── ArrayOfDoublesQuickSelectSketchTest.java
│ │ ├── ArrayOfDoublesUnionTest.java
│ │ ├── CornerCaseArrayOfDoublesSetOperationsTest.java
│ │ ├── DirectArrayOfDoublesCompactSketchTest.java
│ │ ├── DirectArrayOfDoublesQuickSelectSketchTest.java
│ │ ├── HeapArrayOfDoublesCompactSketchTest.java
│ │ └── HeapArrayOfDoublesQuickSelectSketchTest.java
│ │ └── strings
│ │ ├── ArrayOfStringsSketchTest.java
│ │ └── ArrayOfStringsSummaryTest.java
│ └── resources
│ ├── ArrayOfDoublesUnion_v0.9.1.sk
│ ├── CompactSketchWithDoubleSummary4K_serialVersion1.sk
│ ├── Qk128_n1000_v0.3.0.sk
│ ├── Qk128_n1000_v0.6.0.sk
│ ├── Qk128_n1000_v0.8.0.sk
│ ├── Qk128_n1000_v0.8.3.sk
│ ├── Qk128_n50_v0.3.0.sk
│ ├── Qk128_n50_v0.6.0.sk
│ ├── Qk128_n50_v0.8.0.sk
│ ├── Qk128_n50_v0.8.3.sk
│ ├── TupleWithTestIntegerSummary4kTrimmedSerVer2.sk
│ ├── kll_double_n1.sk
│ ├── kll_sketch_double_one_item_v1.sk
│ ├── kll_sketch_float_one_item_v1.sk
│ ├── tdigest_ref_k100_n10000_double.sk
│ └── tdigest_ref_k100_n10000_float.sk
└── tools
├── FindBugsExcludeFilter.xml
└── SketchesCheckstyle.xml
/NOTICE:
--------------------------------------------------------------------------------
1 | Apache DataSketches Java
2 | Copyright 2025 The Apache Software Foundation
3 |
4 | Copyright 2015-2018 Yahoo Inc.
5 | Copyright 2019-2020 Verizon Media
6 | Copyright 2021- Yahoo Inc.
7 |
8 | This product includes software developed at
9 | The Apache Software Foundation (http://www.apache.org/).
10 |
11 | Prior to moving to ASF, the software for this project was developed at
12 | Yahoo Inc. (https://developer.yahoo.com).
13 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/common/MemoryStatus.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.common;
21 |
22 | import org.apache.datasketches.memory.Memory;
23 |
24 | /**
25 | * Methods for inquiring the status of a backing Memory object.
26 | */
27 | public interface MemoryStatus {
28 |
29 | /**
30 | * Returns true if this object's internal data is backed by a Memory object,
31 | * which may be on-heap or off-heap.
32 | * @return true if this object's internal data is backed by a Memory object.
33 | */
34 | default boolean hasMemory() { return false; }
35 |
36 | /**
37 | * Returns true if this object's internal data is backed by direct (off-heap) Memory.
38 | * @return true if this object's internal data is backed by direct (off-heap) Memory.
39 | */
40 | default boolean isDirect() { return false; }
41 |
42 | /**
43 | * Returns true if the backing resource of this is identical with the backing resource
44 | * of that. The capacities must be the same. If this is a region,
45 | * the region offset must also be the same.
46 | *
47 | * @param that A different non-null and alive Memory object.
48 | * @return true if the backing resource of this is identical with the backing resource
49 | * of that.
50 | * @throws SketchesArgumentException if that is not alive (already closed).
51 | */
52 | default boolean isSameResource(final Memory that) { return false; }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/common/ResizeFactor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.common;
21 |
22 | /**
23 | * For the Families that accept this configuration parameter, it controls the size multiple that
24 | * affects how fast the internal cache grows, when more space is required.
25 | * See Resize Factor
26 | *
27 | * @author Lee Rhodes
28 | */
29 | public enum ResizeFactor {
30 | /**
31 | * Do not resize. Sketch will be configured to full size.
32 | */
33 | X1(0),
34 | /**
35 | * Resize factor is 2.
36 | */
37 | X2(1),
38 | /**
39 | * Resize factor is 4.
40 | */
41 | X4(2),
42 | /**
43 | * Resize factor is 8.
44 | */
45 | X8(3);
46 |
47 | private int lg_;
48 |
49 | ResizeFactor(final int lg) {
50 | this.lg_ = lg;
51 | }
52 |
53 | /**
54 | * Returns the Log-base 2 of the Resize Factor
55 | * @return the Log-base 2 of the Resize Factor
56 | */
57 | public int lg() {
58 | return lg_;
59 | }
60 |
61 | /**
62 | * Returns the Resize Factor given the Log-base 2 of the Resize Factor
63 | * @param lg a value between zero and 3, inclusive.
64 | * @return the Resize Factor given the Log-base 2 of the Resize Factor
65 | */
66 | public static ResizeFactor getRF(final int lg) {
67 | if (X1.lg() == lg) { return X1; }
68 | if (X2.lg() == lg) { return X2; }
69 | if (X4.lg() == lg) { return X4; }
70 | return X8;
71 | }
72 |
73 | /**
74 | * Returns the Resize Factor
75 | * @return the Resize Factor
76 | */
77 | public int getValue() {
78 | return 1 << lg_;
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/common/SketchesArgumentException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.common;
21 |
22 | /**
23 | * Illegal Arguments Exception class for the library
24 | *
25 | * @author Lee Rhodes
26 | */
27 | public class SketchesArgumentException extends SketchesException {
28 | private static final long serialVersionUID = 1L;
29 |
30 | //other constructors to be added as needed.
31 |
32 | /**
33 | * Constructs a new runtime exception with the specified detail message. The cause is not
34 | * initialized, and may subsequently be initialized by a call to
35 | * Throwable.initCause(java.lang.Throwable).
36 | *
37 | * @param message the detail message. The detail message is saved for later retrieval by the
38 | * Throwable.getMessage() method.
39 | */
40 | public SketchesArgumentException(final String message) {
41 | super(message);
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/common/SketchesException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.common;
21 |
22 | /**
23 | * Exception class for the library
24 | *
25 | * @author Lee Rhodes
26 | */
27 | public class SketchesException extends RuntimeException {
28 | private static final long serialVersionUID = 1L;
29 |
30 | //other constructors to be added as needed.
31 |
32 | /**
33 | * Constructs a new runtime exception with the specified detail message. The cause is not
34 | * initialized, and may subsequently be initialized by a call to
35 | * Throwable.initCause(java.lang.Throwable).
36 | *
37 | * @param message the detail message. The detail message is saved for later retrieval by the
38 | * Throwable.getMessage() method.
39 | */
40 | public SketchesException(final String message) {
41 | super(message);
42 | }
43 |
44 | /**
45 | * Constructs a new runtime exception with the specified detail message and cause.
46 | *
47 | *
Note that the detail message associated with cause is not automatically incorporated
48 | * in this runtime exception's detail message.
49 | *
50 | * @param message the detail message (which is saved for later retrieval by the
51 | * Throwable.getMessage() method).
52 | * @param cause the cause (which is saved for later retrieval by the Throwable.getCause()
53 | * method). (A null value is permitted, and indicates that the cause is nonexistent or unknown.)
54 | */
55 | public SketchesException(final String message, final Throwable cause) {
56 | super(message, cause);
57 | }
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/common/SketchesReadOnlyException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.common;
21 |
22 | /**
23 | * Write operation attempted on a read-only class.
24 | *
25 | * @author Lee Rhodes
26 | */
27 | public class SketchesReadOnlyException extends SketchesException {
28 | private static final long serialVersionUID = 1L;
29 |
30 | //other constructors to be added as needed.
31 |
32 | /**
33 | * Constructs a new runtime exception with the message:
34 | * "Write operation attempted on a read-only class."
35 | *
36 | * The cause is not initialized, and may subsequently be initialized by a call to
37 | * Throwable.initCause(java.lang.Throwable).
38 | */
39 | public SketchesReadOnlyException() {
40 | super("Write operation attempted on a read-only class.");
41 | }
42 |
43 | /**
44 | * Constructs a new runtime exception with the specified detail message. The cause is not
45 | * initialized, and may subsequently be initialized by a call to
46 | * Throwable.initCause(java.lang.Throwable).
47 | *
48 | * @param message the detail message. The detail message is saved for later retrieval by the
49 | * Throwable.getMessage() method.
50 | */
51 | public SketchesReadOnlyException(final String message) {
52 | super(message);
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/common/SketchesStateException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.common;
21 |
22 | /**
23 | * Illegal State Exception class for the library
24 | *
25 | * @author Lee Rhodes
26 | */
27 | public class SketchesStateException extends SketchesException {
28 | private static final long serialVersionUID = 1L;
29 |
30 | //other constructors to be added as needed.
31 |
32 | /**
33 | * Constructs a new runtime exception with the specified detail message. The cause is not
34 | * initialized, and may subsequently be initialized by a call to
35 | * Throwable.initCause(java.lang.Throwable).
36 | *
37 | * @param message the detail message. The detail message is saved for later retrieval by the
38 | * Throwable.getMessage() method.
39 | */
40 | public SketchesStateException(final String message) {
41 | super(message);
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/common/SuppressFBWarnings.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.common;
21 |
22 | import java.lang.annotation.Retention;
23 | import java.lang.annotation.RetentionPolicy;
24 |
25 | /**
26 | * Used to suppress SpotBug warnings.
27 | *
28 | * @author Lee Rhodes
29 | */
30 | @Retention(RetentionPolicy.CLASS)
31 | public @interface SuppressFBWarnings {
32 |
33 | /**
34 | * A list of comma-separated, quoted SpotBugs warnings that are to be suppressed in the associated
35 | * annotated element. The value can be a bug category, kind or pattern.
36 | * @return list of relevant bug descriptors
37 | */
38 | String[] value() default {};
39 |
40 | /**
41 | * Optional explanation for the suppression.
42 | * @return explanation
43 | */
44 | String justification() default "";
45 | }
46 |
47 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/common/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package is for common classes that may be used across all the sketch families.
22 | */
23 | package org.apache.datasketches.common;
24 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/cpc/Flavor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.cpc;
21 |
22 | /**
23 | * Note: except for brief transitional moments, these sketches always obey the following strict
24 | * mapping between the flavor of a sketch and the number of coupons that it has collected.
25 | * @author Lee Rhodes
26 | * @author Kevin Lang
27 | */
28 | enum Flavor {
29 | EMPTY, // 0 == C < 1
30 | SPARSE, // 1 <= C < 3K/32
31 | HYBRID, // 3K/32 <= C < K/2
32 | PINNED, // K/2 <= C < 27K/8 [NB: 27/8 = 3 + 3/8]
33 | SLIDING; // 27K/8 <= C
34 |
35 | private static Flavor[] fmtArr = Flavor.class.getEnumConstants();
36 |
37 | /**
38 | * Returns the Flavor given its enum ordinal
39 | * @param ordinal the given enum ordinal
40 | * @return the Flavor given its enum ordinal
41 | */
42 | static Flavor ordinalToFlavor(final int ordinal) {
43 | return fmtArr[ordinal];
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/cpc/Format.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.cpc;
21 |
22 | /**
23 | * There are seven different preamble formats (8 combinations) that determine the layout of the
24 | * HiField variables after the first 8 bytes of the preamble.
25 | * Do not change the order.
26 | */
27 | enum Format {
28 | EMPTY_MERGED,
29 | EMPTY_HIP,
30 | SPARSE_HYBRID_MERGED,
31 | SPARSE_HYBRID_HIP,
32 | PINNED_SLIDING_MERGED_NOSV,
33 | PINNED_SLIDING_HIP_NOSV,
34 | PINNED_SLIDING_MERGED,
35 | PINNED_SLIDING_HIP;
36 |
37 | private static Format[] fmtArr = Format.class.getEnumConstants();
38 |
39 | /**
40 | * Returns the Format given its enum ordinal
41 | * @param ordinal the given enum ordinal
42 | * @return the Format given its enum ordinal
43 | */
44 | static Format ordinalToFormat(final int ordinal) {
45 | return fmtArr[ordinal];
46 | }
47 |
48 | } //end enum Format
49 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/cpc/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * Compressed Probabilistic Counting sketch family
22 | */
23 |
24 | package org.apache.datasketches.cpc;
25 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/fdt/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * Frequent Distinct Tuples Sketch
22 | */
23 | package org.apache.datasketches.fdt;
24 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/filters/bloomfilter/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 | /**
20 | * BloomFilter package
21 | */
22 | package org.apache.datasketches.filters.bloomfilter;
23 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/filters/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * The filters package contains data structures used to determine
22 | * approximate set-membership. Classes in this package may not follow
23 | * the standard sub-linear properties of other offerings in this
24 | * library, but they fit with the spirit of DataSketches by providing
25 | * fast and approximate answers to complex problems.
26 | */
27 | package org.apache.datasketches.filters;
28 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/frequencies/ErrorType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.frequencies;
21 |
22 | /**
23 | * Specifies one of two types of error regions of the statistical classification Confusion Matrix
24 | * that can be excluded from a returned sample of Frequent Items.
25 | */
26 | public enum ErrorType {
27 |
28 | /**
29 | * No Type I error samples will be included in the sample set,
30 | * which means all Truly Negative samples will be excluded from the sample set.
31 | * However, there may be Type II error samples (False Negatives)
32 | * that should have been included that were not.
33 | * This is a subset of the NO_FALSE_NEGATIVES ErrorType.
34 | */
35 | NO_FALSE_POSITIVES,
36 |
37 | /**
38 | * No Type II error samples will be excluded from the sample set,
39 | * which means all Truly Positive samples will be included in the sample set.
40 | * However, there may be Type I error samples (False Positives)
41 | * that were included that should not have been.
42 | */
43 | NO_FALSE_NEGATIVES
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/frequencies/Util.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.frequencies;
21 |
22 | final class Util {
23 |
24 | private Util() {}
25 |
26 | /**
27 | * The following constant controls the size of the initial data structure for the
28 | * frequencies sketches and its value is somewhat arbitrary.
29 | */
30 | static final int LG_MIN_MAP_SIZE = 3;
31 |
32 | /**
33 | * This constant is large enough so that computing the median of SAMPLE_SIZE
34 | * randomly selected entries from a list of numbers and outputting
35 | * the empirical median will give a constant-factor approximation to the
36 | * true median with high probability.
37 | */
38 | static final int SAMPLE_SIZE = 1024;
39 |
40 | /**
41 | * @param key to be hashed
42 | * @return an index into the hash table This hash function is taken from the internals of
43 | * Austin Appleby's MurmurHash3 algorithm. It is also used by the Trove for Java libraries.
44 | */
45 | static long hash(long key) {
46 | key ^= key >>> 33;
47 | key *= 0xff51afd7ed558ccdL;
48 | key ^= key >>> 33;
49 | key *= 0xc4ceb9fe1a85ec53L;
50 | key ^= key >>> 33;
51 | return key;
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/frequencies/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package is dedicated to streaming algorithms that enable estimation of the
22 | * frequency of occurrence of items in a weighted multiset stream of items.
23 | * If the frequency distribution of items is sufficiently skewed, these algorithms are very
24 | * useful in identifying the "Heavy Hitters" that occurred most frequently in the stream.
25 | * The accuracy of the estimation of the frequency of an item has well understood error
26 | * bounds that can be returned by the sketch.
27 | *
28 | *
These algorithms are sometimes referred to as "TopN" algorithms.
29 | */
30 | package org.apache.datasketches.frequencies;
31 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/hash/XxHash.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.hash;
21 |
22 | import org.apache.datasketches.memory.Memory;
23 |
24 | /**
25 | * The XxHash is a fast, non-cryptographic, 64-bit hash function that has
26 | * excellent avalanche and 2-way bit independence properties.
27 | *
28 | * This class wraps the
29 | * Memory Component XxHash
30 | * implementation.
31 | *
32 | * @author Lee Rhodes
33 | */
34 | public class XxHash {
35 |
36 | /**
37 | * Compute the hash of the given Memory object.
38 | * @param mem The given Memory object
39 | * @param offsetBytes Starting at this offset in bytes
40 | * @param lengthBytes Continuing for this number of bytes
41 | * @param seed use this seed for the hash function
42 | * @return return the resulting 64-bit hash value.
43 | */
44 | public static long hash(final Memory mem, final long offsetBytes, final long lengthBytes,
45 | final long seed) {
46 | return mem.xxHash64(offsetBytes, lengthBytes, seed);
47 | }
48 |
49 | /**
50 | * Returns a 64-bit hash.
51 | * @param in a long
52 | * @param seed A long valued seed.
53 | * @return the hash
54 | */
55 | public static long hash(final long in, final long seed) {
56 | return org.apache.datasketches.memory.XxHash.hashLong(in, seed);
57 | }
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/hash/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * The hash package contains a high-performing and extended Java implementations
22 | * of Austin Appleby's 128-bit MurmurHash3 hash function originally coded in C.
23 | * This core MurmurHash3.java class is used throughout many of the sketch classes for consistency
24 | * and as long as the user specifies the same seed will result in coordinated hash operations.
25 | * This package also contains an adaptor class that extends the basic class with more functions
26 | * commonly associated with hashing.
27 | */
28 | package org.apache.datasketches.hash;
29 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/hll/AuxHashMap.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.hll;
21 |
22 | import org.apache.datasketches.common.SketchesStateException;
23 |
24 | /**
25 | * @author Lee Rhodes
26 | */
27 | interface AuxHashMap {
28 |
29 | AuxHashMap copy();
30 |
31 | int getAuxCount();
32 |
33 | int[] getAuxIntArr();
34 |
35 | int getCompactSizeBytes();
36 |
37 | PairIterator getIterator();
38 |
39 | int getLgAuxArrInts();
40 |
41 | int getUpdatableSizeBytes();
42 |
43 | boolean isMemory();
44 |
45 | boolean isOffHeap();
46 |
47 | /**
48 | * Adds the slotNo and value to the aux array.
49 | * @param slotNo the index from the HLL array
50 | * @param value the HLL value at the slotNo.
51 | * @throws SketchesStateException if this slotNo already exists in the aux array.
52 | */
53 | void mustAdd(int slotNo, int value);
54 |
55 | /**
56 | * Returns value given slotNo. If this fails an exception is thrown.
57 | * @param slotNo the index from the HLL array
58 | * @return value the HLL value at the slotNo
59 | * @throws SketchesStateException if valid slotNo and value is not found.
60 | */
61 | int mustFindValueFor(int slotNo);
62 |
63 | /**
64 | * Replaces the entry at slotNo with the given value.
65 | * @param slotNo the index from the HLL array
66 | * @param value the HLL value at the slotNo
67 | * @throws SketchesStateException if a valid slotNo, value is not found.
68 | */
69 | void mustReplace(int slotNo, int value);
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/hll/CurMode.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.hll;
21 |
22 | /**
23 | * Represents the three fundamental modes of the HLL Sketch.
24 | *
25 | * @author Lee Rhodes
26 | * @author Kevin Lang
27 | */
28 | enum CurMode { LIST, SET, HLL; //do not change the order.
29 |
30 | public static final CurMode values[] = values();
31 |
32 | /**
33 | * Returns the CurMode given its ordinal
34 | * @param ordinal the order of appearance in the enum definition.
35 | * @return the CurMode given its ordinal
36 | */
37 | public static CurMode fromOrdinal(final int ordinal) {
38 | return values[ordinal];
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/hll/HllPairIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.hll;
21 |
22 | import static org.apache.datasketches.hll.HllUtil.EMPTY;
23 | import static org.apache.datasketches.hll.HllUtil.pair;
24 |
25 | /**
26 | * Iterates over an on-heap HLL byte array producing pairs of index, value.
27 | *
28 | * @author Lee Rhodes
29 | */
30 | abstract class HllPairIterator extends PairIterator {
31 | final int lengthPairs;
32 | int index;
33 | int value;
34 |
35 | //Used by Direct<4,6,8>Array, Heap<4,6,8>Array
36 | HllPairIterator(final int lengthPairs) {
37 | this.lengthPairs = lengthPairs;
38 | index = - 1;
39 | }
40 |
41 | @Override
42 | public String getHeader() {
43 | return String.format("%10s%6s", "Slot", "Value");
44 | }
45 |
46 | @Override
47 | public int getIndex() {
48 | return index;
49 | }
50 |
51 | @Override
52 | public int getKey() {
53 | return index;
54 | }
55 |
56 | @Override
57 | public int getPair() {
58 | return pair(index, value);
59 | }
60 |
61 | @Override
62 | public int getSlot() {
63 | return index;
64 | }
65 |
66 | @Override
67 | public String getString() {
68 | final int slot = getSlot();
69 | final int value = getValue();
70 | return String.format("%10d%6d", slot, value);
71 | }
72 |
73 | @Override
74 | public int getValue() {
75 | return value;
76 | }
77 |
78 | @Override
79 | public boolean nextAll() {
80 | if (++index < lengthPairs) {
81 | value = value();
82 | return true;
83 | }
84 | return false;
85 | }
86 |
87 | @Override
88 | public boolean nextValid() {
89 | while (++index < lengthPairs) {
90 | value = value();
91 | if (value != EMPTY) {
92 | return true;
93 | }
94 | }
95 | return false;
96 | }
97 |
98 | abstract int value();
99 |
100 | }
101 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/hll/IntArrayPairIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.hll;
21 |
22 | import static org.apache.datasketches.hll.HllUtil.EMPTY;
23 |
24 | /**
25 | * Iterates over an on-heap integer array of pairs extracting
26 | * the components of the pair at a given index.
27 | *
28 | * @author Lee Rhodes
29 | */
30 | class IntArrayPairIterator extends PairIterator {
31 | private final int[] array;
32 | private final int arrLen;
33 | private final int slotMask;
34 | private int index;
35 | private int pair;
36 |
37 | //used by CouponList, HeapAuxHashMap
38 | IntArrayPairIterator(final int[] array, final int lgConfigK) {
39 | this.array = array;
40 | slotMask = (1 << lgConfigK) - 1;
41 | arrLen = array.length;
42 | index = - 1;
43 | }
44 |
45 | @Override
46 | public int getIndex() {
47 | return index;
48 | }
49 |
50 | @Override
51 | public int getKey() {
52 | return HllUtil.getPairLow26(pair);
53 | }
54 |
55 | @Override
56 | public int getPair() {
57 | return pair;
58 | }
59 |
60 | @Override
61 | public int getSlot() {
62 | return getKey() & slotMask;
63 | }
64 |
65 | @Override
66 | public int getValue() {
67 | return HllUtil.getPairValue(pair);
68 | }
69 |
70 | @Override
71 | public boolean nextAll() {
72 | if (++index < arrLen) {
73 | pair = array[index];
74 | return true;
75 | }
76 | return false;
77 | }
78 |
79 | @Override
80 | public boolean nextValid() {
81 | while (++index < arrLen) {
82 | final int pair = array[index];
83 | if (pair != EMPTY) {
84 | this.pair = pair;
85 | return true;
86 | }
87 | }
88 | return false;
89 | }
90 |
91 | }
92 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/hll/doc-files/HLL_HIP_K12T20U20.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/main/java/org/apache/datasketches/hll/doc-files/HLL_HIP_K12T20U20.png
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/hll/doc-files/HLL_UnionTime4_6_8_Java_CPP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/main/java/org/apache/datasketches/hll/doc-files/HLL_UnionTime4_6_8_Java_CPP.png
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/hllmap/CouponsIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.hllmap;
21 |
22 | /**
23 | * Common iterator class for maps that need one.
24 | *
25 | * @author Alex Saydakov
26 | */
27 | class CouponsIterator {
28 |
29 | private final int offset_;
30 | private final int maxEntries_;
31 | private final short[] couponsArr_;
32 | private int index_;
33 |
34 | CouponsIterator(final short[] couponsArr, final int offset, final int maxEntries) {
35 | offset_ = offset;
36 | maxEntries_ = maxEntries;
37 | couponsArr_ = couponsArr;
38 | index_ = -1;
39 | }
40 |
41 | /**
42 | * next() must be called before the first getValue(). This skips over zero values.
43 | * @return the next coupon in the array.
44 | */
45 | boolean next() {
46 | index_++;
47 | while (index_ < maxEntries_) {
48 | if (couponsArr_[offset_ + index_] != 0) { return true; }
49 | index_++;
50 | }
51 | return false;
52 | }
53 |
54 | /**
55 | * Returns the value at the current index.
56 | * @return the value at the current index.
57 | */
58 | short getValue() {
59 | return couponsArr_[offset_ + index_];
60 | }
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/hllmap/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * The hllmap package contains a space efficient HLL mapping sketch of keys to approximate unique
22 | * count of identifiers. For example, counting the number of unique users (identifiers) per IP
23 | * address.
24 | *
25 | *
In cases where the number of keys is very large, having an individual HLL sketch per key may
26 | * not be practical. If the distribution of values per key is highly skewed where the vast
27 | * majority of keys have only a few values then this mapping sketch will make sense as it will be
28 | * far more space efficient than dedicating individual HLL sketches per key.
29 | *
30 | *
From our own testing, sketching 100 million IPv4 addresses with such a
31 | * highly skewed distribution of identifiers per IP uses only 1.4GB of memory. This translates to
32 | * an average of about 10 bytes per IP allocated to the equivalent of a full k=1024 HLL sketch
33 | * and provides an RSE of less than 2.5%. Your results will vary depending on the actual
34 | * distribution of identifiers per key.
35 | *
36 | * @see org.apache.datasketches.hllmap.UniqueCountMap
37 | */
38 | package org.apache.datasketches.hllmap;
39 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.kll;
21 |
22 | import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator;
23 |
24 | /**
25 | * Iterator over KllDoublesSketch. The order is not defined.
26 | */
27 | public final class KllDoublesSketchIterator extends KllSketchIterator implements QuantilesDoublesSketchIterator {
28 | private final double[] quantiles;
29 |
30 | KllDoublesSketchIterator(final double[] quantiles, final int[] levelsArr, final int numLevels) {
31 | super(levelsArr, numLevels);
32 | this.quantiles = quantiles;
33 | }
34 |
35 | @Override
36 | public double getQuantile() {
37 | return quantiles[index];
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.kll;
21 |
22 | import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator;
23 |
24 | /**
25 | * Iterator over KllFloatsSketch. The order is not defined.
26 | */
27 | public final class KllFloatsSketchIterator extends KllSketchIterator implements QuantilesFloatsSketchIterator {
28 | private final float[] quantiles;
29 |
30 | KllFloatsSketchIterator(final float[] quantiles, final int[] levelsArr, final int numLevels) {
31 | super(levelsArr, numLevels);
32 | this.quantiles = quantiles;
33 | }
34 |
35 | @Override
36 | public float getQuantile() {
37 | return quantiles[index];
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/kll/KllItemsSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.kll;
21 |
22 | import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator;
23 |
24 | /**
25 | * Iterator over KllItemsSketch. The order is not defined.
26 | * @param the item class type
27 | */
28 | public final class KllItemsSketchIterator extends KllSketchIterator implements QuantilesGenericSketchIterator {
29 | private final Object[] quantiles;
30 |
31 | KllItemsSketchIterator(final Object[] quantiles, final int[] levelsArr, final int numLevels) {
32 | super(levelsArr, numLevels);
33 | this.quantiles = quantiles;
34 | }
35 |
36 | @SuppressWarnings("unchecked")
37 | @Override
38 | public T getQuantile() {
39 | return (T)quantiles[index];
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/kll/KllLongsSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.kll;
21 |
22 | import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator;
23 |
24 | /**
25 | * Iterator over KllLongsSketch. The order is not defined.
26 | */
27 | public final class KllLongsSketchIterator extends KllSketchIterator implements QuantilesLongsSketchIterator {
28 | private final long[] quantiles;
29 |
30 | KllLongsSketchIterator(final long[] quantiles, final int[] levelsArr, final int numLevels) {
31 | super(levelsArr, numLevels);
32 | this.quantiles = quantiles;
33 | }
34 |
35 | @Override
36 | public long getQuantile() {
37 | return quantiles[index];
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/kll/KllSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.kll;
21 |
22 | import org.apache.datasketches.quantilescommon.QuantilesSketchIterator;
23 |
24 | /**
25 | * The base implementation for the KLL sketch iterator hierarchy used for viewing the
26 | * non-ordered quantiles retained by a sketch.
27 | *
28 | * Prototype example of the recommended iteration loop:
29 | * {@code
30 | * SketchIterator itr = sketch.iterator();
31 | * while (itr.next()) {
32 | * ...get*();
33 | * }
34 | * }
35 | *
36 | * @author Lee Rhodes
37 | */
38 | public class KllSketchIterator implements QuantilesSketchIterator {
39 | protected final int[] levelsArr;
40 | protected final int numLevels;
41 | protected int level;
42 | protected int index;
43 | protected long weight;
44 | protected boolean isInitialized_;
45 |
46 | KllSketchIterator(final int[] levelsArr, final int numLevels) {
47 | this.levelsArr = levelsArr;
48 | this.numLevels = numLevels;
49 | this.isInitialized_ = false;
50 | }
51 |
52 | @Override
53 | public long getWeight() {
54 | return weight;
55 | }
56 |
57 | @Override
58 | public boolean next() {
59 | if (!isInitialized_) {
60 | level = 0;
61 | index = levelsArr[level];
62 | weight = 1;
63 | isInitialized_ = true;
64 | } else {
65 | index++;
66 | }
67 | if (index < levelsArr[level + 1]) {
68 | return true;
69 | }
70 | // go to the next non-empty level
71 | do {
72 | level++;
73 | if (level == numLevels) {
74 | return false; // run out of levels
75 | }
76 | weight *= 2;
77 | } while (levelsArr[level] == levelsArr[level + 1]);
78 | index = levelsArr[level];
79 | return true;
80 | }
81 |
82 | }
83 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/kll/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package is for the implementations of the sketch algorithm developed by Zohar Karnin, Kevin Lang,
22 | * and Edo Liberty that is commonly referred to as the "KLL" sketch after the authors' last names.
23 | */
24 |
25 | package org.apache.datasketches.kll;
26 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package is the parent package for all sketch families and common code areas.
22 | *
23 | * @author Lee Rhodes
24 | */
25 | package org.apache.datasketches;
26 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/partitions/BoundsRule.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.partitions;
21 |
22 | /**
23 | * This instructs the user about which of the upper and lower bounds of a partition definition row
24 | * should be included with the returned data.
25 | */
26 | public enum BoundsRule {
27 |
28 | /**
29 | * Include both the upper and lower bounds
30 | */
31 | INCLUDE_BOTH,
32 |
33 | /**
34 | * Include only the upper bound but not the lower bound
35 | */
36 | INCLUDE_UPPER,
37 |
38 | /**
39 | * Include only the lower bound but not the upper bound
40 | */
41 | INCLUDE_LOWER,
42 |
43 | /**
44 | * Include none
45 | */
46 | INCLUDE_NEITHER;
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/partitions/SketchFillRequest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.partitions;
21 |
22 | import org.apache.datasketches.quantilescommon.PartitioningFeature;
23 | import org.apache.datasketches.quantilescommon.QuantilesGenericAPI;
24 |
25 | /**
26 | * This is a callback request to the data source to fill a quantiles sketch,
27 | * which is returned to the caller.
28 | * @param the item class type
29 | * @param the sketch type
30 | * @author Lee Rhodes
31 | */
32 | public interface SketchFillRequest & PartitioningFeature> {
33 |
34 | /**
35 | * This is a callback request to the data source to fill a quantiles sketch
36 | * with a range of data between upper and lower bounds. Which of these bounds are to be included is determined by
37 | * the BoundsRule.
38 | *
39 | * This range of data may or may not be subsequently further partitioned.
40 | * @param lowerQuantile the lowest quantile of a range
41 | * @param upperQuantile the highest quantile of a range
42 | * @param boundsRule determines which quantile bounds to include
43 | * @return a quantiles sketch filled from the given upper and lower bounds.
44 | */
45 | public S getRange(final T lowerQuantile, final T upperQuantile, final BoundsRule boundsRule);
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/partitions/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | *
22 | */
23 | package org.apache.datasketches.partitions;
24 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantiles/CompactDoublesSketch.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantiles;
21 |
22 | import org.apache.datasketches.common.SketchesStateException;
23 |
24 | /**
25 | * Compact sketches are inherently read only.
26 | * @author Jon Malkin
27 | */
28 | public abstract class CompactDoublesSketch extends DoublesSketch {
29 | CompactDoublesSketch(final int k) {
30 | super(k);
31 | }
32 |
33 | @Override
34 | boolean isCompact() {
35 | return true;
36 | }
37 |
38 | @Override
39 | public boolean isReadOnly() {
40 | return false;
41 | }
42 |
43 | @Override
44 | public void reset() {
45 | throw new SketchesStateException("Cannot reset a compact sketch, which is read-only.");
46 | }
47 |
48 | @Override
49 | public void update(final double quantile) {
50 | throw new SketchesStateException("Cannot update a compact sketch, which is read-only.");
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantiles/DoublesArrayAccessor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantiles;
21 |
22 | import java.util.Arrays;
23 |
24 | /**
25 | * @author Jon Malkin
26 | */
27 | final class DoublesArrayAccessor extends DoublesBufferAccessor {
28 | private int numItems_;
29 | private double[] buffer_;
30 |
31 | private DoublesArrayAccessor(final double[] buffer) {
32 | numItems_ = buffer.length;
33 | buffer_ = buffer;
34 | }
35 |
36 | static DoublesArrayAccessor wrap(final double[] buffer) {
37 | return new DoublesArrayAccessor(buffer);
38 | }
39 |
40 | static DoublesArrayAccessor initialize(final int numItems) {
41 | return new DoublesArrayAccessor(new double[numItems]);
42 | }
43 |
44 | @Override
45 | double get(final int index) {
46 | assert index >= 0 && index < numItems_;
47 | return buffer_[index];
48 | }
49 |
50 | @Override
51 | double set(final int index, final double quantile) {
52 | assert index >= 0 && index < numItems_;
53 |
54 | final double retVal = buffer_[index];
55 | buffer_[index] = quantile;
56 | return retVal;
57 | }
58 |
59 | @Override
60 | int numItems() {
61 | return numItems_;
62 | }
63 |
64 | @Override
65 | double[] getArray(final int fromIdx, final int numItems) {
66 | return Arrays.copyOfRange(buffer_, fromIdx, fromIdx + numItems);
67 | }
68 |
69 | @Override
70 | void putArray(final double[] srcArray, final int srcIndex, final int dstIndex, final int numItems) {
71 | System.arraycopy(srcArray, srcIndex, buffer_, dstIndex, numItems);
72 | }
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantiles/DoublesBufferAccessor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantiles;
21 |
22 | /**
23 | * @author Jon Malkin
24 | */
25 | abstract class DoublesBufferAccessor {
26 |
27 | abstract double get(final int index);
28 |
29 | abstract double set(final int index, final double quantile);
30 |
31 | abstract int numItems();
32 |
33 | abstract double[] getArray(int fromIdx, int numItems);
34 |
35 | abstract void putArray(double[] srcArray, int srcIndex, int dstIndex, int numItems);
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantiles/DoublesSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantiles;
21 |
22 | import java.util.Objects;
23 |
24 | import org.apache.datasketches.common.SketchesStateException;
25 | import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator;
26 |
27 | /**
28 | * Iterator over DoublesSketch. The order is not defined.
29 | */
30 | public final class DoublesSketchIterator implements QuantilesDoublesSketchIterator {
31 | private DoublesSketchAccessor sketchAccessor;
32 | private long bitPattern;
33 | private int level;
34 | private long weight;
35 | private int index;
36 |
37 | DoublesSketchIterator(final DoublesSketch sketch, final long bitPattern) {
38 | Objects.requireNonNull(sketch, "sketch must not be null");
39 | sketchAccessor = DoublesSketchAccessor.wrap(sketch);
40 | this.bitPattern = bitPattern;
41 | this.level = -1;
42 | this.weight = 1;
43 | this.index = -1;
44 | }
45 |
46 | @Override
47 | public double getQuantile() {
48 | if (index < 0) { throw new SketchesStateException("index < 0; getQuantile() was called before next()"); }
49 | return sketchAccessor.get(index);
50 | }
51 |
52 | @Override
53 | public long getWeight() {
54 | return weight;
55 | }
56 |
57 | @Override
58 | public boolean next() {
59 | index++; // advance index within the current level
60 | if (index < sketchAccessor.numItems()) {
61 | return true;
62 | }
63 | // go to the next non-empty level
64 | do {
65 | level++;
66 | if (level > 0) {
67 | bitPattern >>>= 1;
68 | }
69 | if (bitPattern == 0L) {
70 | return false; // run out of levels
71 | }
72 | weight *= 2;
73 | } while ((bitPattern & 1L) == 0L);
74 | index = 0;
75 | sketchAccessor.setLevel(level);
76 | return true;
77 | }
78 |
79 | }
80 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantiles/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * The quantiles package contains stochastic streaming algorithms that enable single-pass
22 | * analysis of the distribution of a stream of quantiles.
23 | *
24 | * @see org.apache.datasketches.quantiles.DoublesSketch
25 | * @see org.apache.datasketches.quantiles.ItemsSketch
26 | */
27 | package org.apache.datasketches.quantiles;
28 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedViewIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantilescommon;
21 |
22 | /**
23 | * Iterator over quantile sketches of primitive type double.
24 | */
25 | public final class DoublesSortedViewIterator extends SortedViewIterator {
26 | private final double[] quantiles;
27 |
28 | /**
29 | * Constructor.
30 | * @param quantiles the given array of quantiles, which must be ordered.
31 | * @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and
32 | * the last value must be equal to N, the total number of items updated to the sketch.
33 | */
34 | public DoublesSortedViewIterator(final double[] quantiles, final long[] cumWeights) {
35 | super(cumWeights);
36 | this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter
37 | }
38 |
39 | /**
40 | * Gets the quantile at the current index.
41 | *
42 | * Don't call this before calling next() for the first time
43 | * or after getting false from next().
44 | *
45 | * @return the quantile at the current index.
46 | */
47 | public double getQuantile() {
48 | return quantiles[index];
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedViewIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantilescommon;
21 |
22 | /**
23 | * Iterator over quantile sketches of primitive type float.
24 | */
25 | public final class FloatsSortedViewIterator extends SortedViewIterator {
26 | private final float[] quantiles;
27 |
28 | /**
29 | * Constructor.
30 | * @param quantiles the given array of quantiles, which must be ordered.
31 | * @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and
32 | * the last value must be equal to N, the total number of items updated to the sketch.
33 | */
34 | public FloatsSortedViewIterator(final float[] quantiles, final long[] cumWeights) {
35 | super(cumWeights);
36 | this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter
37 | }
38 |
39 | /**
40 | * Gets the quantile at the current index.
41 | *
42 | * Don't call this before calling next() for the first time
43 | * or after getting false from next().
44 | *
45 | * @return the quantile at the current index.
46 | */
47 | public float getQuantile() {
48 | return quantiles[index];
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantilescommon;
21 |
22 | /**
23 | * Iterator over quantile sketches of primitive type long.
24 | */
25 | public final class LongsSortedViewIterator extends SortedViewIterator {
26 | private final long[] quantiles;
27 |
28 | /**
29 | * Constructor.
30 | * @param quantiles the given array of quantiles, which must be ordered.
31 | * @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and
32 | * the last value must be equal to N, the total number of items updated to the sketch.
33 | */
34 | public LongsSortedViewIterator(final long[] quantiles, final long[] cumWeights) {
35 | super(cumWeights);
36 | this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter
37 | }
38 |
39 | /**
40 | * Gets the quantile at the current index.
41 | *
42 | * Don't call this before calling next() for the first time
43 | * or after getting false from next().
44 | *
45 | * @return the quantile at the current index.
46 | */
47 | public long getQuantile() {
48 | return quantiles[index];
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantilescommon;
21 |
22 | /**
23 | * These search criteria are used by the KLL, REQ and Classic Quantiles sketches in the DataSketches library.
24 | *
25 | * @see
26 | * Sketching Quantiles and Ranks Tutorial
27 | *
28 | * @author Lee Rhodes
29 | */
30 | public enum QuantileSearchCriteria {
31 |
32 | /**
33 | * Definition of INCLUSIVE getQuantile(r) search:
34 | * Given rank r, return the quantile of the smallest rank that is
35 | * strictly greater than or equal to r.
36 | *
37 | * Definition of INCLUSIVE getRank(q) search:
38 | * Given quantile q, return the rank, r, of the largest quantile that is
39 | * less than or equal to q.
40 | */
41 | INCLUSIVE,
42 |
43 | /**
44 | * Definition of EXCLUSIVE getQuantile(r) search:
45 | * Given rank r, return the quantile of the smallest rank that is
46 | * strictly greater than r.
47 | *
48 | * However, if the given rank is is equal to 1.0, or there is no quantile that satisfies this criterion
49 | * the method will return a NaN or null.
50 | *
51 | * Definition of EXCLUSIVE getRank(q) search:
52 | * Given quantile q, return the rank, r, of the largest quantile that is
53 | * strictly less than q.
54 | *
55 | * If there is no quantile value that is strictly less than q,
56 | * the method will return a rank of zero.
57 | *
58 | */
59 | EXCLUSIVE;
60 |
61 | }
62 |
63 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantilescommon;
21 |
22 | /**
23 | * The quantiles sketch iterator for primitive type double.
24 | * @see QuantilesSketchIterator
25 | * @author Lee Rhodes
26 | */
27 | public interface QuantilesDoublesSketchIterator extends QuantilesSketchIterator {
28 |
29 | /**
30 | * Gets the double quantile at the current index.
31 | *
32 | * Don't call this before calling next() for the first time
33 | * or after getting false from next().
34 | *
35 | * @return the double quantile at the current index.
36 | */
37 | double getQuantile();
38 |
39 | }
40 |
41 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantilescommon;
21 |
22 | /**
23 | * The quantiles sketch iterator for primitive type float.
24 | * @see QuantilesSketchIterator
25 | * @author Lee Rhodes
26 | */
27 | public interface QuantilesFloatsSketchIterator extends QuantilesSketchIterator {
28 |
29 | /**
30 | * Gets the float quantile at the current index.
31 | *
32 | * Don't call this before calling next() for the first time
33 | * or after getting false from next().
34 | *
35 | * @return the float quantile at the current index.
36 | */
37 | float getQuantile();
38 |
39 | }
40 |
41 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantilescommon;
21 |
22 | /**
23 | * The quantiles sketch iterator for generic types.
24 | * @see QuantilesSketchIterator
25 | * @param The generic quantile type
26 | * @author Lee Rhodes
27 | */
28 | public interface QuantilesGenericSketchIterator extends QuantilesSketchIterator {
29 |
30 | /**
31 | * Gets the generic quantile at the current index.
32 | *
33 | * Don't call this before calling next() for the first time
34 | * or after getting false from next().
35 | *
36 | * @return the generic quantile at the current index.
37 | */
38 | T getQuantile();
39 |
40 | }
41 |
42 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantilescommon;
21 |
22 | /**
23 | * The quantiles sketch iterator for primitive type long.
24 | * @see QuantilesSketchIterator
25 | * @author Zac Blanco
26 | */
27 | public interface QuantilesLongsSketchIterator extends QuantilesSketchIterator {
28 |
29 | /**
30 | * Gets the long quantile at the current index.
31 | *
32 | * Don't call this before calling next() for the first time
33 | * or after getting false from next().
34 | *
35 | * @return the long quantile at the current index.
36 | */
37 | long getQuantile();
38 |
39 | }
40 |
41 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/QuantilesSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantilescommon;
21 |
22 | /**
23 | * This is the base interface for the SketchIterator hierarchy used for viewing the
24 | * non-ordered quantiles retained by a sketch.
25 | *
26 | * Prototype example of the recommended iteration loop:
27 | * {@code
28 | * SketchIterator itr = sketch.iterator();
29 | * while (itr.next()) {
30 | * ...get*();
31 | * }
32 | * }
33 | *
34 | * @author Lee Rhodes
35 | */
36 |
37 | public interface QuantilesSketchIterator {
38 |
39 | /**
40 | * Gets the natural weight at the current index.
41 | *
42 | * Don't call this before calling next() for the first time
43 | * or after getting false from next().
44 | *
45 | * @return the natural weight at the current index.
46 | */
47 | long getWeight();
48 |
49 | /**
50 | * Advances the index and checks if it is valid.
51 | * The state of this iterator is undefined before the first call of this method.
52 | * @return true if the next index is valid.
53 | */
54 | boolean next();
55 |
56 | }
57 |
58 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/SketchPartitionLimits.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantilescommon;
21 |
22 | import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
23 |
24 | import org.apache.datasketches.common.SketchesArgumentException;
25 |
26 | /**
27 | * This defines the methods required to compute the partition limits.
28 | */
29 | public interface SketchPartitionLimits {
30 |
31 | /**
32 | * Gets the maximum number of partitions this sketch will support based on the configured size K
33 | * and the number of retained values of this sketch.
34 | * @return the maximum number of partitions this sketch will support.
35 | */
36 | int getMaxPartitions();
37 |
38 | /**
39 | * Gets the minimum partition size in items this sketch will support based on the configured size K of this
40 | * sketch and the number of retained values of this sketch.
41 | * @return the minimum partition size in items this sketch will support.
42 | */
43 | default long getMinPartitionSizeItems() {
44 | final long totalN = getN();
45 | if (totalN <= 0) { throw new SketchesArgumentException(EMPTY_MSG); }
46 | return totalN / getMaxPartitions();
47 | }
48 |
49 | /**
50 | * Gets the length of the input stream offered to the sketch..
51 | * @return the length of the input stream offered to the sketch.
52 | */
53 | long getN();
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/quantilescommon/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package contains common tools and methods for the quantiles, kll and
22 | * req packages.
23 | */
24 | package org.apache.datasketches.quantilescommon;
25 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/req/ReqSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.req;
21 |
22 | import java.util.List;
23 |
24 | import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator;
25 |
26 | /**
27 | * Iterator over all retained items of the ReqSketch. The order is not defined.
28 | *
29 | * @author Lee Rhodes
30 | */
31 | public final class ReqSketchIterator implements QuantilesFloatsSketchIterator {
32 | private List compactors;
33 | private int cIndex;
34 | private int bIndex;
35 | private int numRetainedItems;
36 | private FloatBuffer currentBuf;
37 |
38 | ReqSketchIterator(final ReqSketch sketch) {
39 | compactors = sketch.getCompactors();
40 | numRetainedItems = sketch.getNumRetained();
41 | currentBuf = compactors.get(0).getBuffer();
42 | cIndex = 0;
43 | bIndex = -1;
44 | }
45 |
46 | @Override
47 | public float getQuantile() {
48 | return currentBuf.getItem(bIndex);
49 | }
50 |
51 | @Override
52 | public long getWeight() {
53 | return 1 << cIndex;
54 | }
55 |
56 | @Override
57 | public boolean next() {
58 | if ((numRetainedItems == 0)
59 | || ((cIndex == (compactors.size() - 1)) && (bIndex == (currentBuf.getCount() - 1)))) {
60 | return false;
61 | }
62 | if (bIndex == (currentBuf.getCount() - 1)) {
63 | cIndex++;
64 | currentBuf = compactors.get(cIndex).getBuffer();
65 | bIndex = 0;
66 | } else {
67 | bIndex++;
68 | }
69 | return true;
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/req/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package is for the implementation of the Relative Error Quantiles sketch algorithm.
22 | * @see org.apache.datasketches.req.ReqSketch
23 | */
24 | package org.apache.datasketches.req;
25 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/sampling/SampleSubsetSummary.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.sampling;
21 |
22 | /**
23 | * A simple object o capture the results of a subset sum query on a sampling sketch.
24 | *
25 | * @author Jon Malkin
26 | */
27 | public class SampleSubsetSummary {
28 | private double lowerBound;
29 | private double estimate;
30 | private double upperBound;
31 | private double totalSketchWeight;
32 |
33 | SampleSubsetSummary(final double lowerBound,
34 | final double estimate,
35 | final double upperBound,
36 | final double totalSketchWeight) {
37 | this.lowerBound = lowerBound;
38 | this.estimate = estimate;
39 | this.upperBound = upperBound;
40 | this.totalSketchWeight = totalSketchWeight;
41 | }
42 |
43 | /**
44 | * Returns the lower bound
45 | * @return the lower bound
46 | */
47 | public double getLowerBound() {
48 | return lowerBound;
49 | }
50 |
51 | /**
52 | * Returns the total sketch weight
53 | * @return the total sketch weight
54 | */
55 | public double getTotalSketchWeight() {
56 | return totalSketchWeight;
57 | }
58 |
59 | /**
60 | * Returns the upper bound
61 | * @return the upper bound
62 | */
63 | public double getUpperBound() {
64 | return upperBound;
65 | }
66 |
67 | /**
68 | * Returns the unique count estimate
69 | * @return the unique count estimate
70 | */
71 | public double getEstimate() {
72 | return estimate;
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/sampling/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of
22 | * weighted and unweighted items from a stream.
23 | *
24 | * These sketches are mergeable and can be serialized and deserialized to/from a compact
25 | * form.
26 | * @see org.apache.datasketches.sampling.ReservoirItemsSketch
27 | * @see org.apache.datasketches.sampling.ReservoirLongsSketch
28 | * @see org.apache.datasketches.sampling.VarOptItemsSketch
29 | */
30 | package org.apache.datasketches.sampling;
31 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tdigest/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * t-Digest for estimating quantiles and ranks.
22 | */
23 | package org.apache.datasketches.tdigest;
24 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/theta/BytesCompactHashIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.theta;
21 |
22 | import org.apache.datasketches.common.ByteArrayUtil;
23 |
24 | /*
25 | * This is to iterate over serial version 3 sketch representation
26 | */
27 | class BytesCompactHashIterator implements HashIterator {
28 | final private byte[] bytes;
29 | final private int offset;
30 | final private int numEntries;
31 | private int index;
32 |
33 | BytesCompactHashIterator(
34 | final byte[] bytes,
35 | final int offset,
36 | final int numEntries
37 | ) {
38 | this.bytes = bytes;
39 | this.offset = offset;
40 | this.numEntries = numEntries;
41 | index = -1;
42 | }
43 |
44 | @Override
45 | public long get() {
46 | return ByteArrayUtil.getLongLE(bytes, offset + index * Long.BYTES);
47 | }
48 |
49 | @Override
50 | public boolean next() {
51 | return ++index < numEntries;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/theta/HashIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.theta;
21 |
22 | /**
23 | * This is used to iterate over the retained hash values of the Theta sketch.
24 | * @author Lee Rhodes
25 | */
26 | public interface HashIterator {
27 |
28 | /**
29 | * Gets the hash value
30 | * @return the hash value
31 | */
32 | long get();
33 |
34 | /**
35 | * Returns true at the next hash value in sequence.
36 | * If false, the iteration is done.
37 | * @return true at the next hash value in sequence.
38 | */
39 | boolean next();
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/theta/HeapCompactHashIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.theta;
21 |
22 | class HeapCompactHashIterator implements HashIterator {
23 | private long[] cache;
24 | private int index;
25 |
26 | HeapCompactHashIterator(final long[] cache) {
27 | this.cache = cache;
28 | index = -1;
29 | }
30 |
31 | @Override
32 | public long get() {
33 | return cache[index];
34 | }
35 |
36 | @Override
37 | public boolean next() {
38 | return ++index < cache.length;
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/theta/HeapHashIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.theta;
21 |
22 | /**
23 | * @author Lee Rhodes
24 | */
25 | class HeapHashIterator implements HashIterator {
26 | private long[] cache;
27 | private long thetaLong;
28 | private int index;
29 | private long hash;
30 |
31 | HeapHashIterator(final long[] cache, final long thetaLong) {
32 | this.cache = cache;
33 | this.thetaLong = thetaLong;
34 | index = -1;
35 | hash = 0;
36 | }
37 |
38 | @Override
39 | public long get() {
40 | return hash;
41 | }
42 |
43 | @Override
44 | public boolean next() {
45 | while (++index < cache.length) {
46 | hash = cache[index];
47 | if ((hash != 0) && (hash < thetaLong)) {
48 | return true;
49 | }
50 | }
51 | return false;
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/theta/MemoryHashIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.theta;
21 |
22 | import org.apache.datasketches.memory.Memory;
23 |
24 | /**
25 | * @author Lee Rhodes
26 | */
27 | class MemoryHashIterator implements HashIterator {
28 | private Memory mem;
29 | private int arrLongs;
30 | private long thetaLong;
31 | private long offsetBytes;
32 | private int index;
33 | private long hash;
34 |
35 | MemoryHashIterator(final Memory mem, final int arrLongs, final long thetaLong) {
36 | this.mem = mem;
37 | this.arrLongs = arrLongs;
38 | this.thetaLong = thetaLong;
39 | offsetBytes = PreambleUtil.extractPreLongs(mem) << 3;
40 | index = -1;
41 | hash = 0;
42 | }
43 |
44 | @Override
45 | public long get() {
46 | return hash;
47 | }
48 |
49 | @Override
50 | public boolean next() {
51 | while (++index < arrLongs) {
52 | hash = mem.getLong(offsetBytes + (index << 3));
53 | if ((hash != 0) && (hash < thetaLong)) {
54 | return true;
55 | }
56 | }
57 | return false;
58 | }
59 |
60 | }
61 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/theta/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * The theta package contains the basic sketch classes that are members of the
22 | * Theta Sketch Framework.
23 | *
24 | * There is a separate Tuple package for many of the sketches that are derived from the
25 | * same algorithms defined in the Theta Sketch Framework paper.
26 | */
27 | package org.apache.datasketches.theta;
28 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/thetacommon/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package contains common tools and methods for the theta,
22 | * tuple, tuple/* and fdt packages.
23 | */
24 | package org.apache.datasketches.thetacommon;
25 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/DeserializeResult.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | /**
23 | * Returns an object and its size in bytes as a result of a deserialize operation
24 | * @param Type of object
25 | */
26 | public class DeserializeResult {
27 | private final T object;
28 | private final int size;
29 |
30 | /**
31 | * Creates an instance.
32 | * @param object Deserialized object.
33 | * @param size Deserialized size in bytes.
34 | */
35 | public DeserializeResult(final T object, final int size) {
36 | this.object = object;
37 | this.size = size;
38 | }
39 |
40 | /**
41 | * Returns Deserialized object
42 | * @return Deserialized object
43 | */
44 | public T getObject() {
45 | return object;
46 | }
47 |
48 | /**
49 | * Returns size in bytes occupied by the object in the serialized form
50 | * @return size in bytes occupied by the object in the serialized form
51 | */
52 | public int getSize() {
53 | return size;
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/Summary.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | /**
23 | * Interface for user-defined Summary, which is associated with every hash in a tuple sketch
24 | */
25 | public interface Summary {
26 |
27 | /**
28 | * Deep copy.
29 | *
30 | * Caution: This must implement a deep copy.
31 | *
32 | * @return deep copy of the Summary
33 | */
34 | public Summary copy();
35 |
36 | /**
37 | * This is to serialize a Summary instance to a byte array.
38 | *
39 | *
The user should encode in the byte array its total size, which is used during
40 | * deserialization, especially if the Summary has variable sized elements.
41 | *
42 | * @return serialized representation of the Summary
43 | */
44 | public byte[] toByteArray();
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/SummaryDeserializer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | import org.apache.datasketches.memory.Memory;
23 |
24 | /**
25 | * Interface for deserializing user-defined Summary
26 | * @param type of Summary
27 | */
28 | public interface SummaryDeserializer {
29 |
30 | /**
31 | * This is to create an instance of a Summary given a serialized representation.
32 | * The user may assume that the start of the given Memory is the correct place to start
33 | * deserializing. However, the user must be able to determine the number of bytes required to
34 | * deserialize the summary as the capacity of the given Memory may
35 | * include multiple such summaries and may be much larger than required for a single summary.
36 | * @param mem Memory object with serialized representation of a Summary
37 | * @return DeserializedResult object, which contains a Summary object and number of bytes read
38 | * from the Memory
39 | */
40 | public DeserializeResult heapifySummary(Memory mem);
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/SummaryFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | /**
23 | * Interface for user-defined SummaryFactory
24 | * @param type of Summary
25 | */
26 | public interface SummaryFactory {
27 |
28 | /**
29 | * Returns new instance of Summary
30 | * @return new instance of Summary
31 | */
32 | public S newSummary();
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/SummarySetOperations.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | /**
23 | * This is to provide methods of producing unions and intersections of two Summary objects.
24 | * @param type of Summary
25 | */
26 | public interface SummarySetOperations {
27 |
28 | /**
29 | * This is called by the union operator when both sketches have the same hash value.
30 | *
31 | *
Caution: Do not modify the input Summary objects. Also do not return them directly,
32 | * unless they are immutable (most Summary objects are not). For mutable Summary objects, it is
33 | * important to create a new Summary object with the correct contents to be returned. Do not
34 | * return null summaries.
35 | *
36 | * @param a Summary from sketch A
37 | * @param b Summary from sketch B
38 | * @return union of Summary A and Summary B
39 | */
40 | public S union(S a, S b);
41 |
42 | /**
43 | * This is called by the intersection operator when both sketches have the same hash value.
44 | *
45 | *
Caution: Do not modify the input Summary objects. Also do not return them directly,
46 | * unless they are immutable (most Summary objects are not). For mutable Summary objects, it is
47 | * important to create a new Summary object with the correct contents to be returned. Do not
48 | * return null summaries.
49 | *
50 | * @param a Summary from sketch A
51 | * @param b Summary from sketch B
52 | * @return intersection of Summary A and Summary B
53 | */
54 | public S intersection(S a, S b);
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/TupleSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | /**
23 | * Iterator over a generic tuple sketch
24 | * @param Type of Summary
25 | */
26 | public class TupleSketchIterator {
27 |
28 | private final long[] hashArrTbl_; //could be either hashArr or hashTable
29 | private final S[] summaryArrTbl_; //could be either summaryArr or summaryTable
30 | private int i_;
31 |
32 | TupleSketchIterator(final long[] hashes, final S[] summaries) {
33 | hashArrTbl_ = hashes;
34 | summaryArrTbl_ = summaries;
35 | i_ = -1;
36 | }
37 |
38 | /**
39 | * Advancing the iterator and checking existence of the next entry
40 | * is combined here for efficiency. This results in an undefined
41 | * state of the iterator before the first call of this method.
42 | * @return true if the next element exists
43 | */
44 | public boolean next() {
45 | if (hashArrTbl_ == null) { return false; }
46 | i_++;
47 | while (i_ < hashArrTbl_.length) {
48 | if (hashArrTbl_[i_] > 0) { return true; }
49 | i_++;
50 | }
51 | return false;
52 | }
53 |
54 | /**
55 | * Gets the hash from the current entry in the sketch, which is a hash
56 | * of the original key passed to update(). The original keys are not
57 | * retained. Don't call this before calling next() for the first time
58 | * or after getting false from next().
59 | * @return hash from the current entry
60 | */
61 | public long getHash() {
62 | return hashArrTbl_[i_];
63 | }
64 |
65 | /**
66 | * Gets a Summary object from the current entry in the sketch.
67 | * Don't call this before calling next() for the first time
68 | * or after getting false from next().
69 | * @return Summary object for the current entry (this is not a copy!)
70 | */
71 | public S getSummary() {
72 | return summaryArrTbl_[i_];
73 | }
74 |
75 | }
76 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | /**
23 | * Interface for updating user-defined Summary
24 | * @param type of update value
25 | */
26 | public interface UpdatableSummary extends Summary {
27 |
28 | /**
29 | * This is to provide a method of updating summaries.
30 | * This is primarily used internally.
31 | * @param value update value
32 | * @return this
33 | */
34 | UpdatableSummary update(U value);
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryDeserializer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.adouble;
21 |
22 | import org.apache.datasketches.memory.Memory;
23 | import org.apache.datasketches.tuple.DeserializeResult;
24 | import org.apache.datasketches.tuple.SummaryDeserializer;
25 |
26 | /**
27 | * Implements SummaryDeserializer<DoubleSummary>
28 | * @author Lee Rhodes
29 | */
30 | public class DoubleSummaryDeserializer implements SummaryDeserializer {
31 |
32 | @Override
33 | public DeserializeResult heapifySummary(final Memory mem) {
34 | return DoubleSummary.fromMemory(mem);
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.adouble;
21 |
22 | import org.apache.datasketches.tuple.SummaryFactory;
23 |
24 | /**
25 | * Factory for DoubleSummary.
26 | *
27 | * @author Lee Rhodes
28 | */
29 | public final class DoubleSummaryFactory implements SummaryFactory {
30 |
31 | private final DoubleSummary.Mode summaryMode_;
32 |
33 | /**
34 | * Creates an instance of DoubleSummaryFactory with a given mode
35 | * @param summaryMode summary mode
36 | */
37 | public DoubleSummaryFactory(final DoubleSummary.Mode summaryMode) {
38 | summaryMode_ = summaryMode;
39 | }
40 |
41 | @Override
42 | public DoubleSummary newSummary() {
43 | return new DoubleSummary(summaryMode_);
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/adouble/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package is for a generic implementation of the Tuple sketch for single Double value.
22 | */
23 | package org.apache.datasketches.tuple.adouble;
24 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.aninteger;
21 |
22 | import org.apache.datasketches.memory.Memory;
23 | import org.apache.datasketches.tuple.DeserializeResult;
24 | import org.apache.datasketches.tuple.SummaryDeserializer;
25 |
26 | /**
27 | * Implements SummaryDeserializer<IntegerSummary>
28 | * @author Lee Rhodes
29 | */
30 | public class IntegerSummaryDeserializer implements SummaryDeserializer {
31 |
32 | @Override
33 | public DeserializeResult heapifySummary(final Memory mem) {
34 | return IntegerSummary.fromMemory(mem);
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.aninteger;
21 |
22 | import org.apache.datasketches.tuple.SummaryFactory;
23 |
24 | /**
25 | * Factory for IntegerSummary.
26 | *
27 | * @author Lee Rhodes
28 | */
29 | public class IntegerSummaryFactory implements SummaryFactory {
30 |
31 | private final IntegerSummary.Mode summaryMode_;
32 |
33 | /**
34 | * Creates an instance of IntegerSummaryFactory with a given mode
35 | * @param summaryMode summary mode
36 | */
37 | public IntegerSummaryFactory(final IntegerSummary.Mode summaryMode) {
38 | summaryMode_ = summaryMode;
39 | }
40 |
41 | @Override
42 | public IntegerSummary newSummary() {
43 | return new IntegerSummary(summaryMode_);
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummarySetOperations.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.aninteger;
21 |
22 | import org.apache.datasketches.tuple.SummarySetOperations;
23 | import org.apache.datasketches.tuple.aninteger.IntegerSummary.Mode;
24 |
25 | /**
26 | * Methods for defining how unions and intersections of two objects of type IntegerSummary
27 | * are performed.
28 | *
29 | * @author Lee Rhodes
30 | */
31 | public class IntegerSummarySetOperations implements SummarySetOperations {
32 |
33 | private final Mode unionSummaryMode_;
34 |
35 | /**
36 | * Intersection is not well defined or even meaningful between numeric values.
37 | * Nevertheless, this can be defined to be a different type of aggregation for intersecting hashes.
38 | */
39 | private final Mode intersectionSummaryMode_;
40 |
41 | /**
42 | * Creates a new instance with two modes
43 | * @param unionSummaryMode for unions
44 | * @param intersectionSummaryMode for intersections
45 | */
46 | public IntegerSummarySetOperations(final Mode unionSummaryMode, final Mode intersectionSummaryMode) {
47 | unionSummaryMode_ = unionSummaryMode;
48 | intersectionSummaryMode_ = intersectionSummaryMode;
49 | }
50 |
51 | @Override
52 | public IntegerSummary union(final IntegerSummary a, final IntegerSummary b) {
53 | final IntegerSummary result = new IntegerSummary(unionSummaryMode_);
54 | result.update(a.getValue());
55 | result.update(b.getValue());
56 | return result;
57 | }
58 |
59 | @Override
60 | public IntegerSummary intersection(final IntegerSummary a, final IntegerSummary b) {
61 | final IntegerSummary result = new IntegerSummary(intersectionSummaryMode_);
62 | result.update(a.getValue());
63 | result.update(b.getValue());
64 | return result;
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/aninteger/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package is for a generic implementation of the Tuple sketch for single Integer value.
22 | */
23 | package org.apache.datasketches.tuple.aninteger;
24 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotB.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.arrayofdoubles;
21 |
22 | import org.apache.datasketches.memory.WritableMemory;
23 |
24 | /**
25 | * Computes a set difference of two tuple sketches of type ArrayOfDoubles
26 | */
27 | public abstract class ArrayOfDoublesAnotB {
28 |
29 | ArrayOfDoublesAnotB() {}
30 |
31 | /**
32 | * Perform A-and-not-B set operation on the two given sketches.
33 | * A null sketch is interpreted as an empty sketch.
34 | * This is not an accumulating update. Calling update() more than once
35 | * without calling getResult() will discard the result of previous update().
36 | * Both input sketches must have the same numValues.
37 | *
38 | * @param a The incoming sketch for the first argument
39 | * @param b The incoming sketch for the second argument
40 | */
41 | public abstract void update(ArrayOfDoublesSketch a, ArrayOfDoublesSketch b);
42 |
43 | /**
44 | * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch
45 | * @return compact sketch representing the result of the operation
46 | */
47 | public abstract ArrayOfDoublesCompactSketch getResult();
48 |
49 | /**
50 | * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch
51 | * @param mem memory for the result (can be null)
52 | * @return compact sketch representing the result of the operation (off-heap if memory is
53 | * provided)
54 | */
55 | public abstract ArrayOfDoublesCompactSketch getResult(WritableMemory mem);
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCombiner.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.arrayofdoubles;
21 |
22 | /**
23 | * Combines two arrays of double values for use with ArrayOfDoubles tuple sketches
24 | */
25 | public interface ArrayOfDoublesCombiner {
26 |
27 | /**
28 | * Method of combining two arrays of double values
29 | * @param a Array A.
30 | * @param b Array B.
31 | * @return Result of combining A and B
32 | */
33 | public double[] combine(double[] a, double[] b);
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCompactSketch.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.arrayofdoubles;
21 |
22 | /**
23 | * Top level compact tuple sketch of type ArrayOfDoubles. Compact sketches are never created
24 | * directly. They are created as a result of the compact() method on a QuickSelectSketch
25 | * or the getResult() method of a set operation like Union, Intersection or AnotB.
26 | * Compact sketch consists of a compact list (i.e. no intervening spaces) of hash values,
27 | * corresponding list of double values, and a value for theta. The lists may or may
28 | * not be ordered. A compact sketch is read-only.
29 | */
30 | public abstract class ArrayOfDoublesCompactSketch extends ArrayOfDoublesSketch {
31 |
32 | static final byte serialVersionUID = 1;
33 |
34 | // Layout of retained entries:
35 | // Long || Start Byte Adr:
36 | // Adr:
37 | // || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |
38 | // 3 ||-----------------------------------|----------Retained Entries------------|
39 |
40 | static final int EMPTY_SIZE = 16;
41 | static final int RETAINED_ENTRIES_INT = 16;
42 | // 4 bytes of padding for 8 byte alignment
43 | static final int ENTRIES_START = 24;
44 |
45 | ArrayOfDoublesCompactSketch(final int numValues) {
46 | super(numValues);
47 | }
48 |
49 | @Override
50 | public int getCurrentBytes() {
51 | final int count = getRetainedEntries();
52 | int sizeBytes = EMPTY_SIZE;
53 | if (count > 0) {
54 | sizeBytes = ENTRIES_START + (SIZE_OF_KEY_BYTES * count)
55 | + (SIZE_OF_VALUE_BYTES * numValues_ * count);
56 | }
57 | return sizeBytes;
58 | }
59 |
60 | @Override
61 | public int getMaxBytes() {
62 | return getCurrentBytes();
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.arrayofdoubles;
21 |
22 | /**
23 | * Interface for iterating over tuple sketches of type ArrayOfDoubles
24 | */
25 | public interface ArrayOfDoublesSketchIterator {
26 | /**
27 | * Advancing the iterator and checking existence of the next entry
28 | * is combined here for efficiency. This results in an undefined
29 | * state of the iterator before the first call of this method.
30 | * @return true if the next element exists
31 | */
32 | public boolean next();
33 |
34 | /**
35 | * Gets a key from the current entry in the sketch, which is a hash
36 | * of the original key passed to update(). The original keys are not
37 | * retained. Don't call this before calling next() for the first time
38 | * or after getting false from next().
39 | * @return hash key from the current entry
40 | */
41 | public long getKey();
42 |
43 | /**
44 | * Gets an array of values from the current entry in the sketch.
45 | * Don't call this before calling next() for the first time
46 | * or after getting false from next().
47 | * @return array of double values for the current entry (may or may not be a copy)
48 | */
49 | public double[] getValues();
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.arrayofdoubles;
21 |
22 | import org.apache.datasketches.memory.WritableMemory;
23 |
24 | /**
25 | * Direct Intersection operation for tuple sketches of type ArrayOfDoubles.
26 | *
27 | * This implementation uses data in a given Memory that is owned and managed by the caller.
28 | * This Memory can be off-heap, which if managed properly will greatly reduce the need for
29 | * the JVM to perform garbage collection.
30 | */
31 | final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection {
32 |
33 | private WritableMemory mem_;
34 |
35 | /**
36 | * Creates an instance of a DirectArrayOfDoublesIntersection with a custom update seed
37 | * @param numValues number of double values associated with each key
38 | * @param seed See seed
39 | * @param dstMem See Memory
40 | */
41 | DirectArrayOfDoublesIntersection(final int numValues, final long seed, final WritableMemory dstMem) {
42 | super(numValues, seed);
43 | mem_ = dstMem;
44 | }
45 |
46 | @Override
47 | protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues,
48 | final long seed) {
49 | return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, mem_);
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.arrayofdoubles;
21 |
22 | import org.apache.datasketches.common.SketchesReadOnlyException;
23 | import org.apache.datasketches.memory.Memory;
24 | import org.apache.datasketches.memory.WritableMemory;
25 |
26 | final class DirectArrayOfDoublesQuickSelectSketchR extends DirectArrayOfDoublesQuickSelectSketch {
27 |
28 | DirectArrayOfDoublesQuickSelectSketchR(final Memory mem, final long seed) {
29 | super((WritableMemory) mem, seed);
30 | }
31 |
32 | @Override
33 | void insertOrIgnore(final long key, final double[] values) {
34 | throw new SketchesReadOnlyException();
35 | }
36 |
37 | @Override
38 | public void trim() {
39 | throw new SketchesReadOnlyException();
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnionR.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.arrayofdoubles;
21 |
22 | import org.apache.datasketches.common.SketchesReadOnlyException;
23 | import org.apache.datasketches.memory.WritableMemory;
24 |
25 | final class DirectArrayOfDoublesUnionR extends DirectArrayOfDoublesUnion {
26 |
27 | /**
28 | * Wraps the given Memory.
29 | * @param gadget the ArrayOfDoublesQuickSelectSketch
30 | * @param mem See Memory
31 | */
32 | DirectArrayOfDoublesUnionR(final ArrayOfDoublesQuickSelectSketch gadget, final WritableMemory mem) {
33 | super(gadget, mem);
34 | }
35 |
36 | @Override
37 | public void union(final ArrayOfDoublesSketch tupleSketch) {
38 | throw new SketchesReadOnlyException();
39 | }
40 |
41 | @Override
42 | public void reset() {
43 | throw new SketchesReadOnlyException();
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesIntersection.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.arrayofdoubles;
21 |
22 | /**
23 | * On-heap implementation of intersection set operation for tuple sketches of type
24 | * ArrayOfDoubles.
25 | */
26 | final class HeapArrayOfDoublesIntersection extends ArrayOfDoublesIntersection {
27 |
28 | /**
29 | * Creates an instance of a HeapArrayOfDoublesIntersection with a custom update seed
30 | * @param numValues number of double values associated with each key
31 | * @param seed See seed
32 | */
33 | HeapArrayOfDoublesIntersection(final int numValues, final long seed) {
34 | super(numValues, seed);
35 | }
36 |
37 | @Override
38 | protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues,
39 | final long seed) {
40 | return new HeapArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed);
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.arrayofdoubles;
21 |
22 | import java.util.Arrays;
23 |
24 | /**
25 | * Iterator over the on-heap ArrayOfDoublesSketch (compact or hash table)
26 | */
27 | final class HeapArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIterator {
28 |
29 | private long[] keys_;
30 | private double[] values_;
31 | private int numValues_;
32 | private int i_;
33 |
34 | HeapArrayOfDoublesSketchIterator(final long[] keys, final double[] values, final int numValues) {
35 | keys_ = keys;
36 | values_ = values;
37 | numValues_ = numValues;
38 | i_ = -1;
39 | }
40 |
41 | @Override
42 | public boolean next() {
43 | if (keys_ == null) { return false; }
44 | i_++;
45 | while (i_ < keys_.length) {
46 | if (keys_[i_] != 0) { return true; }
47 | i_++;
48 | }
49 | return false;
50 | }
51 |
52 | @Override
53 | public long getKey() {
54 | return keys_[i_];
55 | }
56 |
57 | @Override
58 | public double[] getValues() {
59 | if (numValues_ == 1) {
60 | return new double[] { values_[i_] };
61 | }
62 | return Arrays.copyOfRange(values_, i_ * numValues_, (i_ + 1) * numValues_);
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package is for a concrete implementation of the Tuple sketch for an array of double values.
22 | */
23 |
24 | package org.apache.datasketches.tuple.arrayofdoubles;
25 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * The tuple package contains a number of sketches based on the same
22 | * fundamental algorithms of the Theta Sketch Framework and extend these
23 | * concepts for whole new families of sketches.
24 | */
25 | package org.apache.datasketches.tuple;
26 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryDeserializer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.strings;
21 |
22 | import org.apache.datasketches.memory.Memory;
23 | import org.apache.datasketches.tuple.DeserializeResult;
24 | import org.apache.datasketches.tuple.SummaryDeserializer;
25 |
26 | /**
27 | * Implements SummaryDeserializer<ArrayOfStringsSummary>
28 | * @author Lee Rhodes
29 | */
30 | public class ArrayOfStringsSummaryDeserializer implements SummaryDeserializer {
31 |
32 | @Override
33 | public DeserializeResult heapifySummary(final Memory mem) {
34 | return ArrayOfStringsSummaryDeserializer.fromMemory(mem);
35 | }
36 |
37 | /**
38 | * Also used in test.
39 | * @param mem the given memory
40 | * @return the DeserializeResult
41 | */
42 | static DeserializeResult fromMemory(final Memory mem) {
43 | final ArrayOfStringsSummary nsum = new ArrayOfStringsSummary(mem);
44 | final int totBytes = mem.getInt(0);
45 | return new DeserializeResult<>(nsum, totBytes);
46 | }
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.strings;
21 |
22 | import org.apache.datasketches.tuple.SummaryFactory;
23 |
24 | /**
25 | * Implements SummaryFactory<ArrayOfStringsSummary>
26 | * @author Lee Rhodes
27 | */
28 | public class ArrayOfStringsSummaryFactory implements SummaryFactory {
29 |
30 | @Override
31 | public ArrayOfStringsSummary newSummary() {
32 | return new ArrayOfStringsSummary();
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummarySetOperations.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple.strings;
21 |
22 | import org.apache.datasketches.tuple.SummarySetOperations;
23 |
24 | /**
25 | * Implements SummarySetOperations<ArrayOfStringsSummary>
26 | * @author Lee Rhodes
27 | */
28 | public class ArrayOfStringsSummarySetOperations implements SummarySetOperations {
29 |
30 | @Override
31 | public ArrayOfStringsSummary union(final ArrayOfStringsSummary a, final ArrayOfStringsSummary b) {
32 | return a.copy();
33 | }
34 |
35 | @Override
36 | public ArrayOfStringsSummary intersection(final ArrayOfStringsSummary a, final ArrayOfStringsSummary b) {
37 | return a.copy();
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/org/apache/datasketches/tuple/strings/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | /**
21 | * This package is for a generic implementation of the Tuple sketch for single String value.
22 | */
23 |
24 | package org.apache.datasketches.tuple.strings;
25 |
--------------------------------------------------------------------------------
/src/main/javadoc/overview.html:
--------------------------------------------------------------------------------
1 |
3 |
21 |
22 |
23 |
24 |
25 |
26 | Sketching Core Library
27 | Overview
28 |
29 | The Sketching Core Library provides a range of stochastic streaming algorithms and closely
30 | related java technologies that are particularly useful when integrating this technology into
31 | systems that must deal with massive data.
32 |
33 |
34 | This library is divided into packages that constitute distinct groups of functionality:
35 |
36 | Note: In general, if the requirements or promises of any method's contract are not fulfilled
37 | (that is, if there is a bug in either the method or its caller),
38 | then an unchecked exception will be thrown.
39 | The precise type of such an unchecked exception does not form part of any method's contract.
40 |
41 |
42 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/common/BoundsOnRatiosInSampledSetsTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.common;
21 |
22 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.checkInputs;
23 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.getEstimateOfA;
24 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.getEstimateOfB;
25 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.getEstimateOfBoverA;
26 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.getLowerBoundForBoverA;
27 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.getUpperBoundForBoverA;
28 | import static org.testng.Assert.assertEquals;
29 |
30 | import org.testng.annotations.Test;
31 |
32 | public class BoundsOnRatiosInSampledSetsTest {
33 |
34 | @Test
35 | public void checkNormalReturns() {
36 | getLowerBoundForBoverA(500, 100, .1);
37 | getLowerBoundForBoverA(500, 100, 0.75);
38 | getLowerBoundForBoverA(500, 100, 1.0);
39 | assertEquals(getLowerBoundForBoverA(0, 0, .1), 0.0, 0.0);
40 |
41 | getUpperBoundForBoverA(500, 100, .1);
42 | getUpperBoundForBoverA(500, 100, 0.75);
43 | getUpperBoundForBoverA(500, 100, 1.0);
44 | assertEquals(getUpperBoundForBoverA(0, 0, .1), 1.0, 0.0);
45 |
46 | getEstimateOfBoverA(500,100);
47 | getEstimateOfA(500, .1);
48 | getEstimateOfB(100, .1);
49 | assertEquals(getEstimateOfBoverA(0, 0), .5, 0.0);
50 | }
51 |
52 | @Test(expectedExceptions = SketchesArgumentException.class)
53 | public void checkInputA() {
54 | checkInputs(-1, 0, .3);
55 | }
56 |
57 | @Test(expectedExceptions = SketchesArgumentException.class)
58 | public void checkInputB() {
59 | checkInputs(500, -1, .3);
60 | }
61 |
62 | @Test(expectedExceptions = SketchesArgumentException.class)
63 | public void checkInputF() {
64 | checkInputs(500, 100, -1);
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/common/ShuffleTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.common;
21 |
22 | import static org.testng.Assert.fail;
23 |
24 | import org.testng.annotations.Test;
25 |
26 | public class ShuffleTest {
27 |
28 | @Test
29 | public void checkFloat() {
30 | float[] array = new float[10];
31 | for (int i = 0; i < array.length; i++) { array[i] = i; }
32 | Shuffle.shuffle(array);
33 | int neCount = 0;
34 | for (int i = 0; i < array.length; i++) {
35 | if (array[i] != i) { neCount++; }
36 | }
37 | //System.out.println(neCount);
38 | if (neCount == 0) { fail(); }
39 | }
40 |
41 | @Test
42 | public void checkDouble() {
43 | double[] array = new double[10];
44 | for (int i = 0; i < array.length; i++) { array[i] = i; }
45 | Shuffle.shuffle(array);
46 | int neCount = 0;
47 | for (int i = 0; i < array.length; i++) {
48 | if (array[i] != i) { neCount++; }
49 | }
50 | //System.out.println(neCount);
51 | if (neCount == 0) { fail(); }
52 | }
53 |
54 | @Test
55 | public void checkLong() {
56 | long[] array = new long[10];
57 | for (int i = 0; i < array.length; i++) { array[i] = i; }
58 | Shuffle.shuffle(array);
59 | int neCount = 0;
60 | for (int i = 0; i < array.length; i++) {
61 | if (array[i] != i) { neCount++; }
62 | }
63 | //System.out.println(neCount);
64 | if (neCount == 0) { fail(); }
65 | }
66 |
67 | @Test
68 | public void checkInt() {
69 | int[] array = new int[10];
70 | for (int i = 0; i < array.length; i++) { array[i] = i; }
71 | Shuffle.shuffle(array);
72 | int neCount = 0;
73 | for (int i = 0; i < array.length; i++) {
74 | if (array[i] != i) { neCount++; }
75 | }
76 | //System.out.println(neCount);
77 | if (neCount == 0) { fail(); }
78 | }
79 | }
80 |
81 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/common/SketchesExceptionTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.common;
21 |
22 | import org.testng.annotations.Test;
23 |
24 | public class SketchesExceptionTest {
25 |
26 | @Test(expectedExceptions = SketchesException.class)
27 | public void checkSketchesException() {
28 | throw new SketchesException("This is a test.");
29 | }
30 |
31 | @Test(expectedExceptions = SketchesArgumentException.class)
32 | public void checkSketchesArgumentException() {
33 | throw new SketchesArgumentException("This is a test.");
34 | }
35 |
36 | @Test(expectedExceptions = SketchesStateException.class)
37 | public void checkSketchesStateException() {
38 | throw new SketchesStateException("This is a test.");
39 | }
40 |
41 | @Test
42 | public void checkSketchesExceptionWithThrowable() {
43 | try {
44 | throw new SketchesException("First Exception.");
45 | } catch (final SketchesException se) {
46 | try {
47 | throw new SketchesException("Second Exception. ", se);
48 | } catch (final SketchesException se2) {
49 | //success
50 | }
51 | }
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/cpc/CompressionDataTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.cpc;
21 |
22 | import static org.apache.datasketches.cpc.CompressionData.decodingTablesForHighEntropyByte;
23 | import static org.apache.datasketches.cpc.CompressionData.encodingTablesForHighEntropyByte;
24 | import static org.apache.datasketches.cpc.CompressionData.lengthLimitedUnaryDecodingTable65;
25 | import static org.apache.datasketches.cpc.CompressionData.lengthLimitedUnaryEncodingTable65;
26 | import static org.apache.datasketches.cpc.CompressionData.validateDecodingTable;
27 |
28 | import org.testng.annotations.Test;
29 |
30 | /**
31 | * @author Lee Rhodes
32 | */
33 | public class CompressionDataTest {
34 |
35 | @Test
36 | public static void checkTables() {
37 | validateDecodingTable(lengthLimitedUnaryDecodingTable65, lengthLimitedUnaryEncodingTable65);
38 |
39 | for (int i = 0; i < (16 + 6); i++) {
40 | validateDecodingTable(decodingTablesForHighEntropyByte[i], encodingTablesForHighEntropyByte[i]);
41 | }
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/fdt/GroupTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.fdt;
21 |
22 | import static org.testng.Assert.assertEquals;
23 |
24 | import org.testng.annotations.Test;
25 |
26 | /**
27 | * @author Lee Rhodes
28 | */
29 | public class GroupTest {
30 | private static final String LS = System.getProperty("line.separator");
31 |
32 | @Test
33 | public void checkToString() { //check visually
34 | Group gp = new Group();
35 | gp.init("AAAAAAAA,BBBBBBBBBB", 100_000_000, 1E8, 1.2E8, 8E7, 0.1, 0.01);
36 | assertEquals(gp.getPrimaryKey(), "AAAAAAAA,BBBBBBBBBB");
37 | assertEquals(gp.getCount(), 100_000_000);
38 | assertEquals(gp.getEstimate(), 1E8);
39 | assertEquals(gp.getUpperBound(), 1.2E8);
40 | assertEquals(gp.getLowerBound(), 8E7);
41 | assertEquals(gp.getFraction(), 0.1);
42 | assertEquals(gp.getRse(), 0.01);
43 |
44 | println(gp.getHeader());
45 | println(gp.toString());
46 | }
47 |
48 | @Test
49 | public void printlnTest() {
50 | println("PRINTING: "+this.getClass().getName());
51 | }
52 |
53 | /**
54 | * @param s value to print
55 | */
56 | static void println(String s) {
57 | print(s + LS);
58 | }
59 |
60 | /**
61 | * @param s value to print
62 | */
63 | static void print(String s) {
64 | //System.out.print(s); //disable here
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/frequencies/HashMapStressTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.frequencies;
21 |
22 | import org.apache.datasketches.hash.MurmurHash3;
23 | //import org.testng.annotations.Test;
24 |
25 | public class HashMapStressTest {
26 |
27 | //@Test
28 | public static void stress() {
29 | println("ReversePurgeLongHashMap Stress Test");
30 | printf("%12s%15s%n", "Capacity", "TimePerAdjust");
31 | for (int capacity = 2 << 5; capacity < (2 << 24); capacity *= 2) {
32 | int n = 10000000;
33 |
34 | long[] keys = new long[n];
35 | long[] values = new long[n];
36 |
37 | for (int i = 0; i < n; i++) {
38 | keys[i] = murmur(i);
39 | values[i] = (i < (capacity / 2)) ? n : 1;
40 | }
41 |
42 | ReversePurgeLongHashMap hashmap = new ReversePurgeLongHashMap(capacity);
43 | long timePerAdjust = timeOneHashMap(hashmap, keys, values, (int) (.75 * capacity));
44 | printf("%12d%15d%n", capacity, timePerAdjust);
45 | }
46 | }
47 |
48 | private static long timeOneHashMap(ReversePurgeLongHashMap hashMap, long[] keys, long[] values,
49 | int sizeToShift) {
50 | final long startTime = System.nanoTime();
51 | int n = keys.length;
52 | assert (n == values.length);
53 | for (int i = 0; i < n; i++) {
54 | hashMap.adjustOrPutValue(keys[i], values[i]);
55 | if (hashMap.getNumActive() == sizeToShift) {
56 | hashMap.adjustAllValuesBy(-1);
57 | hashMap.keepOnlyPositiveCounts();
58 | }
59 | }
60 | final long endTime = System.nanoTime();
61 | return (endTime - startTime) / n;
62 | }
63 |
64 | private static long murmur(long key) {
65 | long[] keyArr = { key };
66 | return MurmurHash3.hash(keyArr, 0)[0];
67 | }
68 |
69 | private static void println(Object obj) { System.out.println(obj.toString()); }
70 |
71 | private static void printf(String fmt, Object ... args) { System.out.printf(fmt, args); }
72 |
73 | }
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/frequencies/ReversePurgeLongHashMapTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.frequencies;
21 |
22 | import static org.testng.Assert.assertNull;
23 |
24 | import org.apache.datasketches.common.SketchesArgumentException;
25 | import org.testng.annotations.Test;
26 |
27 | public class ReversePurgeLongHashMapTest {
28 |
29 | @Test(expectedExceptions = SketchesArgumentException.class)
30 | public void checkgetInstanceString() {
31 | ReversePurgeLongHashMap.getInstance("");
32 | }
33 |
34 | @Test
35 | public void checkActiveNull() {
36 | ReversePurgeLongHashMap map = new ReversePurgeLongHashMap(4);
37 | assertNull(map.getActiveKeys());
38 | assertNull(map.getActiveValues());
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/hash/XxHashTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.hash;
21 |
22 | import static org.testng.Assert.assertEquals;
23 |
24 | import org.testng.annotations.Test;
25 |
26 | import org.apache.datasketches.memory.Memory;
27 |
28 | /**
29 | * @author Lee Rhodes
30 | */
31 | public class XxHashTest {
32 |
33 | @Test
34 | public void longCheck() {
35 | long seed = 0;
36 | long hash1 = XxHash.hash(123L, seed);
37 | long[] arr = new long[1];
38 | arr[0] = 123L;
39 | Memory mem = Memory.wrap(arr);
40 | long hash2 = XxHash.hash(mem, 0, 8, 0);
41 | assertEquals(hash2, hash1);
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/hll/TablesTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.hll;
21 |
22 | import static org.apache.datasketches.hll.CouponMapping.xArr;
23 | import static org.apache.datasketches.hll.CouponMapping.yArr;
24 | import static org.apache.datasketches.hll.CubicInterpolation.usingXAndYTables;
25 | import static org.testng.Assert.assertEquals;
26 | import static org.testng.Assert.fail;
27 |
28 | import org.apache.datasketches.common.SketchesArgumentException;
29 | import org.testng.annotations.Test;
30 |
31 | /**
32 | * @author Lee Rhodes
33 | *
34 | */
35 | public class TablesTest {
36 |
37 | @Test
38 | public void checkInterpolationExceptions() {
39 | try {
40 | usingXAndYTables(xArr, yArr, -1);
41 | fail();
42 | } catch (SketchesArgumentException e) {
43 | //expected
44 | }
45 | try {
46 | usingXAndYTables(xArr, yArr, 11000000.0);
47 | fail();
48 | } catch (SketchesArgumentException e) {
49 | //expected
50 | }
51 | }
52 |
53 | @Test
54 | public void checkCornerCases() {
55 | int len = xArr.length;
56 | double x = xArr[len - 1];
57 | double y = usingXAndYTables(xArr, yArr, x);
58 | double yExp = yArr[len - 1];
59 | assertEquals(y, yExp, 0.0);
60 | }
61 |
62 | @Test
63 | public void printlnTest() {
64 | println("PRINTING: "+this.getClass().getName());
65 | }
66 |
67 | /**
68 | * @param s value to print
69 | */
70 | static void println(String s) {
71 | //System.out.println(s); //disable here
72 | }
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/kll/KllSketchTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.kll;
21 |
22 | import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_EMPTY;
23 | import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_FULL;
24 | import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_SINGLE;
25 | import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE;
26 | import static org.apache.datasketches.kll.KllSketch.SketchStructure.getSketchStructure;
27 | import static org.testng.Assert.assertEquals;
28 | import static org.testng.Assert.fail;
29 |
30 | import org.apache.datasketches.common.SketchesArgumentException;
31 | import org.testng.annotations.Test;
32 |
33 | public class KllSketchTest {
34 |
35 | @Test
36 | public void checkSketchStructureEnum() {
37 | assertEquals(getSketchStructure(2,1), COMPACT_EMPTY);
38 | assertEquals(getSketchStructure(2,2), COMPACT_SINGLE);
39 | assertEquals(getSketchStructure(5,1), COMPACT_FULL);
40 | assertEquals(getSketchStructure(5,3), UPDATABLE);
41 | try { getSketchStructure(5,2); fail(); } catch (SketchesArgumentException e) { }
42 | try { getSketchStructure(2,3); fail(); } catch (SketchesArgumentException e) { }
43 | }
44 |
45 | private final static boolean enablePrinting = false;
46 |
47 | /**
48 | * @param o the Object to println
49 | */
50 | static final void println(final Object o) {
51 | if (enablePrinting) { System.out.println(o.toString()); }
52 | }
53 |
54 | }
55 |
56 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/quantiles/DoublesSketchBuilderTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantiles;
21 |
22 | import static org.testng.Assert.assertEquals;
23 | import static org.testng.Assert.assertFalse;
24 | import static org.testng.Assert.assertTrue;
25 |
26 | import org.testng.annotations.Test;
27 |
28 | import org.apache.datasketches.memory.WritableMemory;
29 |
30 | public class DoublesSketchBuilderTest {
31 |
32 | @Test
33 | public void checkBuilder() {
34 | int k = 256; //default is 128
35 | DoublesSketchBuilder bldr = DoublesSketch.builder();
36 | bldr.setK(k);
37 | assertEquals(bldr.getK(), k); //confirms new k
38 | println(bldr.toString());
39 | int bytes = DoublesSketch.getUpdatableStorageBytes(k, 0);
40 | byte[] byteArr = new byte[bytes];
41 | WritableMemory mem = WritableMemory.writableWrap(byteArr);
42 | DoublesSketch ds = bldr.build(mem);
43 | assertTrue(ds.hasMemory());
44 | assertFalse(ds.isDirect());
45 | println(bldr.toString());
46 |
47 | bldr = DoublesSketch.builder();
48 | assertEquals(bldr.getK(), PreambleUtil.DEFAULT_K);
49 | }
50 |
51 | @Test
52 | public void printlnTest() {
53 | println("PRINTING: "+this.getClass().getName());
54 | }
55 |
56 | /**
57 | * @param s value to print
58 | */
59 | static void println(String s) {
60 | //System.out.println(s); //disable here
61 | }
62 |
63 | }
64 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/quantiles/DoublesSketchIteratorTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantiles;
21 |
22 | import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator;
23 | import org.testng.Assert;
24 | import org.testng.annotations.Test;
25 |
26 | public class DoublesSketchIteratorTest {
27 |
28 | @Test
29 | public void emptySketch() {
30 | DoublesSketch sketch = DoublesSketch.builder().build();
31 | QuantilesDoublesSketchIterator it = sketch.iterator();
32 | Assert.assertFalse(it.next());
33 | }
34 |
35 | @Test
36 | public void oneItemSketch() {
37 | UpdateDoublesSketch sketch = DoublesSketch.builder().build();
38 | sketch.update(0);
39 | QuantilesDoublesSketchIterator it = sketch.iterator();
40 | Assert.assertTrue(it.next());
41 | Assert.assertEquals(it.getQuantile(), 0.0);
42 | Assert.assertEquals(it.getWeight(), 1);
43 | Assert.assertFalse(it.next());
44 | }
45 |
46 | @Test
47 | public void bigSketches() {
48 | for (int n = 1000; n < 100000; n += 2000) {
49 | UpdateDoublesSketch sketch = DoublesSketch.builder().build();
50 | for (int i = 0; i < n; i++) {
51 | sketch.update(i);
52 | }
53 | QuantilesDoublesSketchIterator it = sketch.iterator();
54 | int count = 0;
55 | int weight = 0;
56 | while (it.next()) {
57 | count++;
58 | weight += (int)it.getWeight();
59 | }
60 | Assert.assertEquals(count, sketch.getNumRetained());
61 | Assert.assertEquals(weight, n);
62 | }
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/quantiles/ItemsSketchIteratorTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.quantiles;
21 |
22 | import java.util.Comparator;
23 |
24 | import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator;
25 | import org.testng.Assert;
26 | import org.testng.annotations.Test;
27 |
28 | public class ItemsSketchIteratorTest {
29 |
30 | @Test
31 | public void emptySketch() {
32 | ItemsSketch sketch = ItemsSketch.getInstance(Integer.class, 128, Comparator.naturalOrder());
33 | QuantilesGenericSketchIterator it = sketch.iterator();
34 | Assert.assertFalse(it.next());
35 | }
36 |
37 | @Test
38 | public void oneItemSketch() {
39 | ItemsSketch sketch = ItemsSketch.getInstance(Integer.class, 128, Comparator.naturalOrder());
40 | sketch.update(0);
41 | QuantilesGenericSketchIterator it = sketch.iterator();
42 | Assert.assertTrue(it.next());
43 | Assert.assertEquals(it.getQuantile(), Integer.valueOf(0));
44 | Assert.assertEquals(it.getWeight(), 1);
45 | Assert.assertFalse(it.next());
46 | }
47 |
48 | @Test
49 | public void bigSketches() {
50 | for (int n = 1000; n < 100000; n += 2000) {
51 | ItemsSketch sketch = ItemsSketch.getInstance(Integer.class, 128, Comparator.naturalOrder());
52 | for (int i = 0; i < n; i++) {
53 | sketch.update(i);
54 | }
55 | QuantilesGenericSketchIterator it = sketch.iterator();
56 | int count = 0;
57 | int weight = 0;
58 | while (it.next()) {
59 | count++;
60 | weight += (int)it.getWeight();
61 | }
62 | Assert.assertEquals(count, sketch.getNumRetained());
63 | Assert.assertEquals(weight, n);
64 | }
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/req/ReqSketchBuilderTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.req;
21 |
22 | import static org.testng.Assert.assertEquals;
23 | import static org.testng.Assert.assertTrue;
24 |
25 | import org.testng.annotations.Test;
26 |
27 | /**
28 | * @author Lee Rhodes
29 | */
30 | public class ReqSketchBuilderTest {
31 |
32 | @Test
33 | public void checkBldr() {
34 | final ReqSketchBuilder bldr = new ReqSketchBuilder();
35 | final ReqDebugImplTest rdi = new ReqDebugImplTest(2, "%4.0f");
36 | bldr.setK(50).setHighRankAccuracy(true).setReqDebug(rdi);
37 | assertEquals(bldr.getK(), 50);
38 | assertEquals(bldr.getHighRankAccuracy(), true);
39 | assertTrue(bldr.getReqDebug() != null);
40 | println(bldr.toString());
41 | bldr.setReqDebug(null);
42 | println(bldr.toString());
43 | }
44 |
45 | /**
46 | * @param o object to be printed
47 | */
48 | static void println(final Object o) {
49 | //System.out.println(o.toString());
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/tdigest/SortTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tdigest;
21 |
22 | import static org.testng.Assert.assertEquals;
23 |
24 | import org.apache.datasketches.common.Shuffle;
25 | import org.testng.annotations.Test;
26 |
27 | public class SortTest {
28 |
29 | @Test
30 | public void smallWithRepetition() {
31 | final double[] keys = {3, 1, 4, 2, 1};
32 | final long[] values = {4, 1, 5, 3, 2};
33 | Sort.stableSort(keys, values, keys.length);
34 | assertEquals(keys[0], 1);
35 | assertEquals(keys[1], 1);
36 | assertEquals(keys[2], 2);
37 | assertEquals(keys[3], 3);
38 | assertEquals(keys[4], 4);
39 | assertEquals(values[0], 1);
40 | assertEquals(values[1], 2);
41 | assertEquals(values[2], 3);
42 | assertEquals(values[3], 4);
43 | assertEquals(values[4], 5);
44 | }
45 |
46 | @Test
47 | public void large() {
48 | final int n = 1000;
49 | final double[] keys = new double[n];
50 | final long[] values = new long[n];
51 | for (int i = 0; i < n; i++) values[i] = i;
52 | Shuffle.shuffle(values);
53 | for (int i = 0; i < n; i++) keys[i] = values[i];
54 | Sort.stableSort(keys, values, n);
55 | for (int i = 0; i < n; i++) {
56 | assertEquals(keys[i], i);
57 | assertEquals(values[i], i);
58 | }
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/thetacommon/ThetaUtilTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.thetacommon;
21 |
22 | import org.apache.datasketches.quantilescommon.QuantilesUtil;
23 | import org.testng.Assert;
24 | import org.testng.annotations.Test;
25 |
26 | /**
27 | * @author Lee Rhodes
28 | */
29 | public class ThetaUtilTest {
30 |
31 | @Test
32 | public void checkStartingSubMultiple() {
33 | Assert.assertEquals(ThetaUtil.startingSubMultiple(8, 3, 4), 5);
34 | Assert.assertEquals(ThetaUtil.startingSubMultiple(7, 3, 4), 4);
35 | Assert.assertEquals(ThetaUtil.startingSubMultiple(6, 3, 4), 6);
36 | }
37 |
38 | @Test(expectedExceptions = NullPointerException.class)
39 | public void checkValidateValuesNullException() {
40 | QuantilesUtil.checkDoublesSplitPointsOrder(null);
41 | }
42 |
43 | }
44 |
45 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/tuple/IntegerSummary.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | import org.apache.datasketches.common.ByteArrayUtil;
23 | import org.apache.datasketches.memory.Memory;
24 |
25 | /**
26 | * Summary for generic tuple sketches of type Integer.
27 | * This summary keeps an Integer value.
28 | */
29 | public class IntegerSummary implements UpdatableSummary {
30 | private int value_;
31 |
32 | /**
33 | * Creates an instance of IntegerSummary with a given starting value.
34 | * @param value starting value
35 | */
36 | public IntegerSummary(final int value) {
37 | value_ = value;
38 | }
39 |
40 | @Override
41 | public IntegerSummary update(final Integer value) {
42 | value_ += value;
43 | return this;
44 | }
45 |
46 | @Override
47 | public IntegerSummary copy() {
48 | return new IntegerSummary(value_);
49 | }
50 |
51 | /**
52 | * @return current value of the IntegerSummary
53 | */
54 | public int getValue() {
55 | return value_;
56 | }
57 |
58 | private static final int SERIALIZED_SIZE_BYTES = 4;
59 | private static final int VALUE_INDEX = 0;
60 |
61 | @Override
62 | public byte[] toByteArray() {
63 | final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES];
64 | ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_);
65 | return bytes;
66 | }
67 |
68 | /**
69 | * Creates an instance of the IntegerSummary given a serialized representation
70 | * @param mem Memory object with serialized IntegerSummary
71 | * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes
72 | * read from the Memory
73 | */
74 | public static DeserializeResult fromMemory(final Memory mem) {
75 | return new DeserializeResult<>(new IntegerSummary(mem.getInt(VALUE_INDEX)), SERIALIZED_SIZE_BYTES);
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/tuple/IntegerSummaryDeserializer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | import org.apache.datasketches.memory.Memory;
23 |
24 | public class IntegerSummaryDeserializer implements SummaryDeserializer {
25 |
26 | @Override
27 | public DeserializeResult heapifySummary(final Memory mem) {
28 | return IntegerSummary.fromMemory(mem);
29 | }
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/tuple/IntegerSummaryFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | /**
23 | * Factory for IntegerSummary.
24 | */
25 | public class IntegerSummaryFactory implements SummaryFactory {
26 |
27 | @Override
28 | public IntegerSummary newSummary() {
29 | return new IntegerSummary(0);
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/src/test/java/org/apache/datasketches/tuple/SerializerDeserializerTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing,
13 | * software distributed under the License is distributed on an
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 | * KIND, either express or implied. See the License for the
16 | * specific language governing permissions and limitations
17 | * under the License.
18 | */
19 |
20 | package org.apache.datasketches.tuple;
21 |
22 | import org.apache.datasketches.common.Family;
23 | import org.apache.datasketches.common.SketchesArgumentException;
24 | import org.apache.datasketches.memory.Memory;
25 | import org.testng.Assert;
26 | import org.testng.annotations.Test;
27 |
28 | public class SerializerDeserializerTest {
29 |
30 | @Test
31 | public void validSketchType() {
32 | byte[] bytes = new byte[4];
33 | bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal();
34 | Assert.assertEquals(SerializerDeserializer.getSketchType(Memory.wrap(bytes)), SerializerDeserializer.SketchType.CompactSketch);
35 | }
36 |
37 | @Test(expectedExceptions = SketchesArgumentException.class)
38 | public void invalidSketchType() {
39 | byte[] bytes = new byte[4];
40 | bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = 33;
41 | SerializerDeserializer.getSketchType(Memory.wrap(bytes));
42 | }
43 |
44 | // @Test(expectedExceptions = SketchesArgumentException.class)
45 | // public void deserializeFromMemoryUsupportedClass() {
46 | // Memory mem = null;
47 | // SerializerDeserializer.deserializeFromMemory(mem, 0, "bogus");
48 | // }
49 |
50 | @Test(expectedExceptions = SketchesArgumentException.class)
51 | public void validateFamilyNotTuple() {
52 | SerializerDeserializer.validateFamily((byte) 1, (byte) 0);
53 | }
54 |
55 | @Test(expectedExceptions = SketchesArgumentException.class)
56 | public void validateFamilyWrongPreambleLength() {
57 | SerializerDeserializer.validateFamily((byte) Family.TUPLE.getID(), (byte) 0);
58 | }
59 |
60 | @Test(expectedExceptions = SketchesArgumentException.class)
61 | public void checkBadSeedHash() {
62 | org.apache.datasketches.tuple.Util.computeSeedHash(50541);
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/src/test/resources/ArrayOfDoublesUnion_v0.9.1.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/ArrayOfDoublesUnion_v0.9.1.sk
--------------------------------------------------------------------------------
/src/test/resources/CompactSketchWithDoubleSummary4K_serialVersion1.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/CompactSketchWithDoubleSummary4K_serialVersion1.sk
--------------------------------------------------------------------------------
/src/test/resources/Qk128_n1000_v0.3.0.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n1000_v0.3.0.sk
--------------------------------------------------------------------------------
/src/test/resources/Qk128_n1000_v0.6.0.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n1000_v0.6.0.sk
--------------------------------------------------------------------------------
/src/test/resources/Qk128_n1000_v0.8.0.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n1000_v0.8.0.sk
--------------------------------------------------------------------------------
/src/test/resources/Qk128_n1000_v0.8.3.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n1000_v0.8.3.sk
--------------------------------------------------------------------------------
/src/test/resources/Qk128_n50_v0.3.0.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n50_v0.3.0.sk
--------------------------------------------------------------------------------
/src/test/resources/Qk128_n50_v0.6.0.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n50_v0.6.0.sk
--------------------------------------------------------------------------------
/src/test/resources/Qk128_n50_v0.8.0.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n50_v0.8.0.sk
--------------------------------------------------------------------------------
/src/test/resources/Qk128_n50_v0.8.3.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n50_v0.8.3.sk
--------------------------------------------------------------------------------
/src/test/resources/TupleWithTestIntegerSummary4kTrimmedSerVer2.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/TupleWithTestIntegerSummary4kTrimmedSerVer2.sk
--------------------------------------------------------------------------------
/src/test/resources/kll_double_n1.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/kll_double_n1.sk
--------------------------------------------------------------------------------
/src/test/resources/kll_sketch_double_one_item_v1.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/kll_sketch_double_one_item_v1.sk
--------------------------------------------------------------------------------
/src/test/resources/kll_sketch_float_one_item_v1.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/kll_sketch_float_one_item_v1.sk
--------------------------------------------------------------------------------
/src/test/resources/tdigest_ref_k100_n10000_double.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/tdigest_ref_k100_n10000_double.sk
--------------------------------------------------------------------------------
/src/test/resources/tdigest_ref_k100_n10000_float.sk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/tdigest_ref_k100_n10000_float.sk
--------------------------------------------------------------------------------
/tools/FindBugsExcludeFilter.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------