├── LICENSE ├── NOTICE ├── README.md ├── pom.xml ├── src ├── main │ ├── java │ │ └── org │ │ │ └── apache │ │ │ └── datasketches │ │ │ ├── common │ │ │ ├── ArrayOfBooleansSerDe.java │ │ │ ├── ArrayOfDoublesSerDe.java │ │ │ ├── ArrayOfItemsSerDe.java │ │ │ ├── ArrayOfLongsSerDe.java │ │ │ ├── ArrayOfNumbersSerDe.java │ │ │ ├── ArrayOfStringsSerDe.java │ │ │ ├── ArrayOfUtf16StringsSerDe.java │ │ │ ├── BoundsOnBinomialProportions.java │ │ │ ├── BoundsOnRatiosInSampledSets.java │ │ │ ├── ByteArrayUtil.java │ │ │ ├── Family.java │ │ │ ├── MemoryStatus.java │ │ │ ├── ResizeFactor.java │ │ │ ├── SketchesArgumentException.java │ │ │ ├── SketchesException.java │ │ │ ├── SketchesReadOnlyException.java │ │ │ ├── SketchesStateException.java │ │ │ ├── SuppressFBWarnings.java │ │ │ ├── Util.java │ │ │ └── package-info.java │ │ │ ├── cpc │ │ │ ├── BitMatrix.java │ │ │ ├── CompressedState.java │ │ │ ├── CompressionCharacterization.java │ │ │ ├── CompressionData.java │ │ │ ├── CpcCompression.java │ │ │ ├── CpcConfidence.java │ │ │ ├── CpcSketch.java │ │ │ ├── CpcUnion.java │ │ │ ├── CpcUtil.java │ │ │ ├── CpcWrapper.java │ │ │ ├── Flavor.java │ │ │ ├── Format.java │ │ │ ├── IconEstimator.java │ │ │ ├── IconPolynomialCoefficients.java │ │ │ ├── MergingValidation.java │ │ │ ├── PairTable.java │ │ │ ├── PreambleUtil.java │ │ │ ├── QuickMergingValidation.java │ │ │ ├── RuntimeAsserts.java │ │ │ ├── StreamingValidation.java │ │ │ ├── TestUtil.java │ │ │ └── package-info.java │ │ │ ├── fdt │ │ │ ├── FdtSketch.java │ │ │ ├── Group.java │ │ │ ├── PostProcessor.java │ │ │ └── package-info.java │ │ │ ├── filters │ │ │ ├── bloomfilter │ │ │ │ ├── BitArray.java │ │ │ │ ├── BloomFilter.java │ │ │ │ ├── BloomFilterBuilder.java │ │ │ │ ├── DirectBitArray.java │ │ │ │ ├── DirectBitArrayR.java │ │ │ │ ├── HeapBitArray.java │ │ │ │ └── package-info.java │ │ │ └── package-info.java │ │ │ ├── frequencies │ │ │ ├── ErrorType.java │ │ │ ├── ItemsSketch.java │ │ │ ├── LongsSketch.java │ │ │ ├── PreambleUtil.java │ │ │ ├── ReversePurgeItemHashMap.java │ │ │ ├── ReversePurgeLongHashMap.java │ │ │ ├── Util.java │ │ │ └── package-info.java │ │ │ ├── hash │ │ │ ├── MurmurHash3.java │ │ │ ├── MurmurHash3FFM21.java │ │ │ ├── XxHash.java │ │ │ └── package-info.java │ │ │ ├── hll │ │ │ ├── AbstractCoupons.java │ │ │ ├── AbstractHllArray.java │ │ │ ├── AuxHashMap.java │ │ │ ├── BaseHllSketch.java │ │ │ ├── CompositeInterpolationXTable.java │ │ │ ├── Conversions.java │ │ │ ├── CouponHashSet.java │ │ │ ├── CouponList.java │ │ │ ├── CouponMapping.java │ │ │ ├── CubicInterpolation.java │ │ │ ├── CurMode.java │ │ │ ├── DirectAuxHashMap.java │ │ │ ├── DirectCouponHashSet.java │ │ │ ├── DirectCouponList.java │ │ │ ├── DirectHll4Array.java │ │ │ ├── DirectHll6Array.java │ │ │ ├── DirectHll8Array.java │ │ │ ├── DirectHllArray.java │ │ │ ├── HarmonicNumbers.java │ │ │ ├── HeapAuxHashMap.java │ │ │ ├── Hll4Array.java │ │ │ ├── Hll4Update.java │ │ │ ├── Hll6Array.java │ │ │ ├── Hll8Array.java │ │ │ ├── HllArray.java │ │ │ ├── HllEstimators.java │ │ │ ├── HllPairIterator.java │ │ │ ├── HllSketch.java │ │ │ ├── HllSketchImpl.java │ │ │ ├── HllUtil.java │ │ │ ├── IntArrayPairIterator.java │ │ │ ├── IntMemoryPairIterator.java │ │ │ ├── PairIterator.java │ │ │ ├── PreambleUtil.java │ │ │ ├── RelativeErrorTables.java │ │ │ ├── TgtHllType.java │ │ │ ├── ToByteArrayImpl.java │ │ │ ├── Union.java │ │ │ ├── doc-files │ │ │ │ ├── HLL_HIP_K12T20U20.png │ │ │ │ └── HLL_UnionTime4_6_8_Java_CPP.png │ │ │ └── package-info.java │ │ │ ├── hllmap │ │ │ ├── CouponHashMap.java │ │ │ ├── CouponTraverseMap.java │ │ │ ├── CouponsIterator.java │ │ │ ├── HllMap.java │ │ │ ├── Map.java │ │ │ ├── SingleCouponMap.java │ │ │ ├── UniqueCountMap.java │ │ │ └── package-info.java │ │ │ ├── kll │ │ │ ├── KllDirectCompactItemsSketch.java │ │ │ ├── KllDirectDoublesSketch.java │ │ │ ├── KllDirectFloatsSketch.java │ │ │ ├── KllDirectLongsSketch.java │ │ │ ├── KllDoublesHelper.java │ │ │ ├── KllDoublesSketch.java │ │ │ ├── KllDoublesSketchIterator.java │ │ │ ├── KllFloatsHelper.java │ │ │ ├── KllFloatsSketch.java │ │ │ ├── KllFloatsSketchIterator.java │ │ │ ├── KllHeapDoublesSketch.java │ │ │ ├── KllHeapFloatsSketch.java │ │ │ ├── KllHeapItemsSketch.java │ │ │ ├── KllHeapLongsSketch.java │ │ │ ├── KllHelper.java │ │ │ ├── KllItemsHelper.java │ │ │ ├── KllItemsSketch.java │ │ │ ├── KllItemsSketchIterator.java │ │ │ ├── KllLongsHelper.java │ │ │ ├── KllLongsSketch.java │ │ │ ├── KllLongsSketchIterator.java │ │ │ ├── KllMemoryValidate.java │ │ │ ├── KllPreambleUtil.java │ │ │ ├── KllSketch.java │ │ │ ├── KllSketchIterator.java │ │ │ └── package-info.java │ │ │ ├── package-info.java │ │ │ ├── partitions │ │ │ ├── BoundsRule.java │ │ │ ├── Partitioner.java │ │ │ ├── SketchFillRequest.java │ │ │ └── package-info.java │ │ │ ├── quantiles │ │ │ ├── ClassicUtil.java │ │ │ ├── CompactDoublesSketch.java │ │ │ ├── DirectCompactDoublesSketch.java │ │ │ ├── DirectDoublesSketchAccessor.java │ │ │ ├── DirectUpdateDoublesSketch.java │ │ │ ├── DirectUpdateDoublesSketchR.java │ │ │ ├── DoublesArrayAccessor.java │ │ │ ├── DoublesBufferAccessor.java │ │ │ ├── DoublesByteArrayImpl.java │ │ │ ├── DoublesMergeImpl.java │ │ │ ├── DoublesSketch.java │ │ │ ├── DoublesSketchAccessor.java │ │ │ ├── DoublesSketchBuilder.java │ │ │ ├── DoublesSketchIterator.java │ │ │ ├── DoublesUnion.java │ │ │ ├── DoublesUnionBuilder.java │ │ │ ├── DoublesUnionImpl.java │ │ │ ├── DoublesUnionImplR.java │ │ │ ├── DoublesUpdateImpl.java │ │ │ ├── DoublesUtil.java │ │ │ ├── HeapCompactDoublesSketch.java │ │ │ ├── HeapDoublesSketchAccessor.java │ │ │ ├── HeapUpdateDoublesSketch.java │ │ │ ├── ItemsByteArrayImpl.java │ │ │ ├── ItemsMergeImpl.java │ │ │ ├── ItemsSketch.java │ │ │ ├── ItemsSketchIterator.java │ │ │ ├── ItemsUnion.java │ │ │ ├── ItemsUpdateImpl.java │ │ │ ├── ItemsUtil.java │ │ │ ├── PreambleUtil.java │ │ │ ├── UpdateDoublesSketch.java │ │ │ └── package-info.java │ │ │ ├── quantilescommon │ │ │ ├── BinarySearch.java │ │ │ ├── DoublesSketchSortedView.java │ │ │ ├── DoublesSortedView.java │ │ │ ├── DoublesSortedViewIterator.java │ │ │ ├── FloatsSketchSortedView.java │ │ │ ├── FloatsSortedView.java │ │ │ ├── FloatsSortedViewIterator.java │ │ │ ├── GenericInequalitySearch.java │ │ │ ├── GenericPartitionBoundaries.java │ │ │ ├── GenericSortedView.java │ │ │ ├── GenericSortedViewIterator.java │ │ │ ├── IncludeMinMax.java │ │ │ ├── InequalitySearch.java │ │ │ ├── ItemsSketchSortedView.java │ │ │ ├── KolmogorovSmirnov.java │ │ │ ├── LongsSketchSortedView.java │ │ │ ├── LongsSortedView.java │ │ │ ├── LongsSortedViewIterator.java │ │ │ ├── PartitioningFeature.java │ │ │ ├── QuantileSearchCriteria.java │ │ │ ├── QuantilesAPI.java │ │ │ ├── QuantilesDoublesAPI.java │ │ │ ├── QuantilesDoublesSketchIterator.java │ │ │ ├── QuantilesFloatsAPI.java │ │ │ ├── QuantilesFloatsSketchIterator.java │ │ │ ├── QuantilesGenericAPI.java │ │ │ ├── QuantilesGenericSketchIterator.java │ │ │ ├── QuantilesLongsAPI.java │ │ │ ├── QuantilesLongsSketchIterator.java │ │ │ ├── QuantilesSketchIterator.java │ │ │ ├── QuantilesUtil.java │ │ │ ├── SketchPartitionLimits.java │ │ │ ├── SortedView.java │ │ │ ├── SortedViewIterator.java │ │ │ └── package-info.java │ │ │ ├── req │ │ │ ├── BaseReqSketch.java │ │ │ ├── FloatBuffer.java │ │ │ ├── ReqCompactor.java │ │ │ ├── ReqDebug.java │ │ │ ├── ReqSerDe.java │ │ │ ├── ReqSketch.java │ │ │ ├── ReqSketchBuilder.java │ │ │ ├── ReqSketchIterator.java │ │ │ └── package-info.java │ │ │ ├── sampling │ │ │ ├── EbppsItemsSample.java │ │ │ ├── EbppsItemsSketch.java │ │ │ ├── PreambleUtil.java │ │ │ ├── ReservoirItemsSketch.java │ │ │ ├── ReservoirItemsUnion.java │ │ │ ├── ReservoirLongsSketch.java │ │ │ ├── ReservoirLongsUnion.java │ │ │ ├── ReservoirSize.java │ │ │ ├── SampleSubsetSummary.java │ │ │ ├── SamplingUtil.java │ │ │ ├── VarOptItemsSamples.java │ │ │ ├── VarOptItemsSketch.java │ │ │ ├── VarOptItemsUnion.java │ │ │ └── package-info.java │ │ │ ├── tdigest │ │ │ ├── BinarySearch.java │ │ │ ├── Sort.java │ │ │ ├── TDigestDouble.java │ │ │ └── package-info.java │ │ │ ├── theta │ │ │ ├── AnotB.java │ │ │ ├── AnotBimpl.java │ │ │ ├── BitPacking.java │ │ │ ├── BytesCompactCompressedHashIterator.java │ │ │ ├── BytesCompactHashIterator.java │ │ │ ├── CompactOperations.java │ │ │ ├── CompactSketch.java │ │ │ ├── ConcurrentBackgroundThetaPropagation.java │ │ │ ├── ConcurrentDirectQuickSelectSketch.java │ │ │ ├── ConcurrentHeapQuickSelectSketch.java │ │ │ ├── ConcurrentHeapThetaBuffer.java │ │ │ ├── ConcurrentPropagationService.java │ │ │ ├── ConcurrentSharedThetaSketch.java │ │ │ ├── DirectCompactCompressedSketch.java │ │ │ ├── DirectCompactSketch.java │ │ │ ├── DirectQuickSelectSketch.java │ │ │ ├── DirectQuickSelectSketchR.java │ │ │ ├── EmptyCompactSketch.java │ │ │ ├── ForwardCompatibility.java │ │ │ ├── HashIterator.java │ │ │ ├── HeapAlphaSketch.java │ │ │ ├── HeapCompactHashIterator.java │ │ │ ├── HeapCompactSketch.java │ │ │ ├── HeapHashIterator.java │ │ │ ├── HeapQuickSelectSketch.java │ │ │ ├── HeapUpdateSketch.java │ │ │ ├── Intersection.java │ │ │ ├── IntersectionImpl.java │ │ │ ├── JaccardSimilarity.java │ │ │ ├── MemoryCompactCompressedHashIterator.java │ │ │ ├── MemoryHashIterator.java │ │ │ ├── PreambleUtil.java │ │ │ ├── Rebuilder.java │ │ │ ├── SetOperation.java │ │ │ ├── SetOperationBuilder.java │ │ │ ├── SingleItemSketch.java │ │ │ ├── Sketch.java │ │ │ ├── Sketches.java │ │ │ ├── Union.java │ │ │ ├── UnionImpl.java │ │ │ ├── UpdateReturnState.java │ │ │ ├── UpdateSketch.java │ │ │ ├── UpdateSketchBuilder.java │ │ │ ├── WrappedCompactCompressedSketch.java │ │ │ ├── WrappedCompactSketch.java │ │ │ └── package-info.java │ │ │ ├── thetacommon │ │ │ ├── BinomialBoundsN.java │ │ │ ├── BoundsOnRatiosInThetaSketchedSets.java │ │ │ ├── BoundsOnRatiosInTupleSketchedSets.java │ │ │ ├── EquivTables.java │ │ │ ├── HashOperations.java │ │ │ ├── QuickSelect.java │ │ │ ├── SetOperationCornerCases.java │ │ │ ├── ThetaUtil.java │ │ │ └── package-info.java │ │ │ └── tuple │ │ │ ├── AnotB.java │ │ │ ├── CompactSketch.java │ │ │ ├── DeserializeResult.java │ │ │ ├── Filter.java │ │ │ ├── HashTables.java │ │ │ ├── Intersection.java │ │ │ ├── JaccardSimilarity.java │ │ │ ├── QuickSelectSketch.java │ │ │ ├── SerializerDeserializer.java │ │ │ ├── Sketch.java │ │ │ ├── Sketches.java │ │ │ ├── Summary.java │ │ │ ├── SummaryDeserializer.java │ │ │ ├── SummaryFactory.java │ │ │ ├── SummarySetOperations.java │ │ │ ├── TupleSketchIterator.java │ │ │ ├── Union.java │ │ │ ├── UpdatableSketch.java │ │ │ ├── UpdatableSketchBuilder.java │ │ │ ├── UpdatableSummary.java │ │ │ ├── Util.java │ │ │ ├── adouble │ │ │ ├── DoubleSketch.java │ │ │ ├── DoubleSummary.java │ │ │ ├── DoubleSummaryDeserializer.java │ │ │ ├── DoubleSummaryFactory.java │ │ │ ├── DoubleSummarySetOperations.java │ │ │ └── package-info.java │ │ │ ├── aninteger │ │ │ ├── IntegerSketch.java │ │ │ ├── IntegerSummary.java │ │ │ ├── IntegerSummaryDeserializer.java │ │ │ ├── IntegerSummaryFactory.java │ │ │ ├── IntegerSummarySetOperations.java │ │ │ └── package-info.java │ │ │ ├── arrayofdoubles │ │ │ ├── ArrayOfDoublesAnotB.java │ │ │ ├── ArrayOfDoublesAnotBImpl.java │ │ │ ├── ArrayOfDoublesCombiner.java │ │ │ ├── ArrayOfDoublesCompactSketch.java │ │ │ ├── ArrayOfDoublesIntersection.java │ │ │ ├── ArrayOfDoublesQuickSelectSketch.java │ │ │ ├── ArrayOfDoublesSetOperationBuilder.java │ │ │ ├── ArrayOfDoublesSketch.java │ │ │ ├── ArrayOfDoublesSketchIterator.java │ │ │ ├── ArrayOfDoublesSketches.java │ │ │ ├── ArrayOfDoublesUnion.java │ │ │ ├── ArrayOfDoublesUpdatableSketch.java │ │ │ ├── ArrayOfDoublesUpdatableSketchBuilder.java │ │ │ ├── DirectArrayOfDoublesCompactSketch.java │ │ │ ├── DirectArrayOfDoublesIntersection.java │ │ │ ├── DirectArrayOfDoublesQuickSelectSketch.java │ │ │ ├── DirectArrayOfDoublesQuickSelectSketchR.java │ │ │ ├── DirectArrayOfDoublesSketchIterator.java │ │ │ ├── DirectArrayOfDoublesUnion.java │ │ │ ├── DirectArrayOfDoublesUnionR.java │ │ │ ├── HashTables.java │ │ │ ├── HeapArrayOfDoublesCompactSketch.java │ │ │ ├── HeapArrayOfDoublesIntersection.java │ │ │ ├── HeapArrayOfDoublesQuickSelectSketch.java │ │ │ ├── HeapArrayOfDoublesSketchIterator.java │ │ │ ├── HeapArrayOfDoublesUnion.java │ │ │ └── package-info.java │ │ │ ├── package-info.java │ │ │ └── strings │ │ │ ├── ArrayOfStringsSketch.java │ │ │ ├── ArrayOfStringsSummary.java │ │ │ ├── ArrayOfStringsSummaryDeserializer.java │ │ │ ├── ArrayOfStringsSummaryFactory.java │ │ │ ├── ArrayOfStringsSummarySetOperations.java │ │ │ └── package-info.java │ └── javadoc │ │ ├── overview.html │ │ └── resources │ │ └── dictionary.html └── test │ ├── java │ └── org │ │ └── apache │ │ └── datasketches │ │ ├── common │ │ ├── ArrayOfXSerDeTest.java │ │ ├── BoundsOnBinomialProportionsTest.java │ │ ├── BoundsOnRatiosInSampledSetsTest.java │ │ ├── ByteArrayUtilTest.java │ │ ├── FamilyTest.java │ │ ├── Shuffle.java │ │ ├── ShuffleTest.java │ │ ├── SketchesExceptionTest.java │ │ ├── TestUtil.java │ │ └── UtilTest.java │ │ ├── cpc │ │ ├── CompressedStateTest.java │ │ ├── CompressionDataTest.java │ │ ├── CpcCompressionTest.java │ │ ├── CpcSketchCrossLanguageTest.java │ │ ├── CpcSketchTest.java │ │ ├── CpcUnionTest.java │ │ ├── CpcWrapperTest.java │ │ ├── IconEstimatorTest.java │ │ ├── PairTableTest.java │ │ ├── PreambleUtilTest.java │ │ ├── RuntimeAssertsTest.java │ │ └── TestAllTest.java │ │ ├── fdt │ │ ├── FdtSketchTest.java │ │ └── GroupTest.java │ │ ├── filters │ │ └── bloomfilter │ │ │ ├── BloomFilterBuilderTest.java │ │ │ ├── BloomFilterCrossLanguageTest.java │ │ │ ├── BloomFilterTest.java │ │ │ ├── DirectBitArrayRTest.java │ │ │ ├── DirectBitArrayTest.java │ │ │ └── HeapBitArrayTest.java │ │ ├── frequencies │ │ ├── DistTest.java │ │ ├── FrequentItemsSketchCrossLanguageTest.java │ │ ├── HashMapStressTest.java │ │ ├── ItemsSketchTest.java │ │ ├── LongsSketchTest.java │ │ ├── ReversePurgeLongHashMapTest.java │ │ └── SerDeCompatibilityTest.java │ │ ├── hash │ │ ├── MurmurHash3FFM21Test.java │ │ ├── MurmurHash3FFM21bTest.java │ │ ├── MurmurHash3Test.java │ │ └── XxHashTest.java │ │ ├── hll │ │ ├── AuxHashMapTest.java │ │ ├── BaseHllSketchTest.java │ │ ├── CouponListTest.java │ │ ├── CrossCountingTest.java │ │ ├── DirectAuxHashMapTest.java │ │ ├── DirectCouponListTest.java │ │ ├── DirectHllSketchTest.java │ │ ├── DirectUnionTest.java │ │ ├── HllArrayTest.java │ │ ├── HllSketchCrossLanguageTest.java │ │ ├── HllSketchTest.java │ │ ├── IsomorphicTest.java │ │ ├── PreambleUtilTest.java │ │ ├── SizeAndModeTransitions.java │ │ ├── TablesTest.java │ │ ├── ToFromByteArrayTest.java │ │ ├── UnionCaseTest.java │ │ └── UnionTest.java │ │ ├── hllmap │ │ ├── CouponHashMapTest.java │ │ ├── CouponTraverseMapTest.java │ │ ├── HllMapTest.java │ │ ├── SingleCouponMapTest.java │ │ └── UniqueCountMapTest.java │ │ ├── kll │ │ ├── KllCrossLanguageTest.java │ │ ├── KllDirectCompactDoublesSketchTest.java │ │ ├── KllDirectCompactFloatsSketchTest.java │ │ ├── KllDirectCompactItemsSketchIteratorTest.java │ │ ├── KllDirectCompactItemsSketchTest.java │ │ ├── KllDirectCompactLongsSketchTest.java │ │ ├── KllDirectDoublesSketchIteratorTest.java │ │ ├── KllDirectDoublesSketchTest.java │ │ ├── KllDirectFloatsSketchIteratorTest.java │ │ ├── KllDirectFloatsSketchTest.java │ │ ├── KllDirectLongsSketchIteratorTest.java │ │ ├── KllDirectLongsSketchTest.java │ │ ├── KllDoublesSketchIteratorTest.java │ │ ├── KllDoublesSketchSerDeTest.java │ │ ├── KllDoublesSketchTest.java │ │ ├── KllDoublesValidationTest.java │ │ ├── KllFloatsSketchIteratorTest.java │ │ ├── KllFloatsSketchSerDeTest.java │ │ ├── KllFloatsSketchTest.java │ │ ├── KllFloatsValidationTest.java │ │ ├── KllHelperTest.java │ │ ├── KllItemsSketchSerDeTest.java │ │ ├── KllItemsSketchTest.java │ │ ├── KllItemsSketchiteratorTest.java │ │ ├── KllLongsSketchIteratorTest.java │ │ ├── KllLongsSketchSerDeTest.java │ │ ├── KllLongsSketchTest.java │ │ ├── KllMemoryValidateTest.java │ │ ├── KllMiscDirectDoublesTest.java │ │ ├── KllMiscDirectFloatsTest.java │ │ ├── KllMiscDirectLongsTest.java │ │ ├── KllMiscDoublesTest.java │ │ ├── KllMiscFloatsTest.java │ │ ├── KllMiscItemsTest.java │ │ ├── KllMiscLongsTest.java │ │ └── KllSketchTest.java │ │ ├── partitions │ │ ├── ClassicPartitionsTest.java │ │ ├── ItemsSketchFillRequestLongAsString.java │ │ ├── KllItemsSketchFillRequestLongAsString.java │ │ ├── KllPartitionsTest.java │ │ └── PartitionResults.java │ │ ├── quantiles │ │ ├── AccuracyTest.java │ │ ├── CustomQuantilesTest.java │ │ ├── DebugUnionTest.java │ │ ├── DirectCompactDoublesSketchTest.java │ │ ├── DirectQuantilesMemoryRequestTest.java │ │ ├── DirectUpdateDoublesSketchTest.java │ │ ├── DoublesSketchBuilderTest.java │ │ ├── DoublesSketchIteratorTest.java │ │ ├── DoublesSketchTest.java │ │ ├── DoublesUnionBuilderTest.java │ │ ├── DoublesUnionImplTest.java │ │ ├── DoublesUtilTest.java │ │ ├── HeapCompactDoublesSketchTest.java │ │ ├── HeapUpdateDoublesSketchTest.java │ │ ├── ItemsSketchIteratorTest.java │ │ ├── ItemsSketchTest.java │ │ ├── ItemsUnionTest.java │ │ ├── PreambleUtilTest.java │ │ ├── QuantilesSketchCrossLanguageTest.java │ │ ├── ReadOnlyMemoryTest.java │ │ ├── SerDeCompatibilityTest.java │ │ └── UtilTest.java │ │ ├── quantilescommon │ │ ├── CrossCheckQuantilesTest.java │ │ ├── GenericInequalitySearchTest.java │ │ ├── IncludeMinMaxTest.java │ │ ├── InequalitySearchTest.java │ │ ├── KolmogorovSmirnovTest.java │ │ ├── LinearRanksAndQuantiles.java │ │ ├── LongsAsOrderableStrings.java │ │ ├── PartitionBoundariesTest.java │ │ └── QuantilesUtilTest.java │ │ ├── req │ │ ├── ReqCompactorTest.java │ │ ├── ReqDebugImplTest.java │ │ ├── ReqFloatBufferTest.java │ │ ├── ReqSketchBuilderTest.java │ │ ├── ReqSketchCrossLanguageTest.java │ │ ├── ReqSketchOtherTest.java │ │ ├── ReqSketchSortedViewTest.java │ │ └── ReqSketchTest.java │ │ ├── sampling │ │ ├── EbppsItemsSampleTest.java │ │ ├── EbppsItemsSketchTest.java │ │ ├── ReservoirItemsSketchTest.java │ │ ├── ReservoirItemsUnionTest.java │ │ ├── ReservoirLongsSketchTest.java │ │ ├── ReservoirLongsUnionTest.java │ │ ├── ReservoirSizeTest.java │ │ ├── VarOptCrossLanguageTest.java │ │ ├── VarOptItemsSamplesTest.java │ │ ├── VarOptItemsSketchTest.java │ │ └── VarOptItemsUnionTest.java │ │ ├── tdigest │ │ ├── SortTest.java │ │ ├── TDigestCrossLanguageTest.java │ │ └── TDigestDoubleTest.java │ │ ├── theta │ │ ├── AnotBimplTest.java │ │ ├── BackwardConversions.java │ │ ├── BitPackingTest.java │ │ ├── CompactSketchTest.java │ │ ├── ConcurrentDirectQuickSelectSketchTest.java │ │ ├── ConcurrentHeapQuickSelectSketchTest.java │ │ ├── CornerCaseThetaSetOperationsTest.java │ │ ├── DirectIntersectionTest.java │ │ ├── DirectQuickSelectSketchTest.java │ │ ├── DirectUnionTest.java │ │ ├── EmptyTest.java │ │ ├── ExamplesTest.java │ │ ├── ForwardCompatibilityTest.java │ │ ├── HeapAlphaSketchTest.java │ │ ├── HeapIntersectionTest.java │ │ ├── HeapQuickSelectSketchTest.java │ │ ├── HeapUnionTest.java │ │ ├── HeapifyWrapSerVer1and2Test.java │ │ ├── IteratorTest.java │ │ ├── JaccardSimilarityTest.java │ │ ├── PairwiseSetOperationsTest.java │ │ ├── PreambleUtilTest.java │ │ ├── ReadOnlyMemoryTest.java │ │ ├── SetOperationTest.java │ │ ├── SetOpsCornerCasesTest.java │ │ ├── SingleItemSketchTest.java │ │ ├── SketchTest.java │ │ ├── SketchesTest.java │ │ ├── ThetaSketchCrossLanguageTest.java │ │ ├── UnionImplTest.java │ │ └── UpdateSketchTest.java │ │ ├── thetacommon │ │ ├── BinomialBoundsNTest.java │ │ ├── BoundsOnRatiosInThetaSketchedSetsTest.java │ │ ├── BoundsOnRatiosInTupleSketchedSetsTest.java │ │ ├── HashOperationsTest.java │ │ ├── QuickSelectTest.java │ │ └── ThetaUtilTest.java │ │ └── tuple │ │ ├── CompactSketchWithDoubleSummaryTest.java │ │ ├── IntegerSummary.java │ │ ├── IntegerSummaryDeserializer.java │ │ ├── IntegerSummaryFactory.java │ │ ├── JaccardSimilarityTest.java │ │ ├── MiscTest.java │ │ ├── ReadOnlyMemoryTest.java │ │ ├── SerializerDeserializerTest.java │ │ ├── TupleCrossLanguageTest.java │ │ ├── TupleExamples2Test.java │ │ ├── TupleExamplesTest.java │ │ ├── adouble │ │ ├── AdoubleAnotBTest.java │ │ ├── AdoubleIntersectionTest.java │ │ ├── AdoubleTest.java │ │ ├── AdoubleUnionTest.java │ │ └── FilterTest.java │ │ ├── aninteger │ │ ├── CornerCaseTupleSetOperationsTest.java │ │ ├── EngagementTest.java │ │ ├── IntegerSketchTest.java │ │ ├── MikhailsBugTupleTest.java │ │ └── ParameterLeakageTest.java │ │ ├── arrayofdoubles │ │ ├── AodSketchCrossLanguageTest.java │ │ ├── ArrayOfDoublesAnotBTest.java │ │ ├── ArrayOfDoublesCompactSketchTest.java │ │ ├── ArrayOfDoublesIntersectionTest.java │ │ ├── ArrayOfDoublesQuickSelectSketchTest.java │ │ ├── ArrayOfDoublesUnionTest.java │ │ ├── CornerCaseArrayOfDoublesSetOperationsTest.java │ │ ├── DirectArrayOfDoublesCompactSketchTest.java │ │ ├── DirectArrayOfDoublesQuickSelectSketchTest.java │ │ ├── HeapArrayOfDoublesCompactSketchTest.java │ │ └── HeapArrayOfDoublesQuickSelectSketchTest.java │ │ └── strings │ │ ├── ArrayOfStringsSketchTest.java │ │ └── ArrayOfStringsSummaryTest.java │ └── resources │ ├── ArrayOfDoublesUnion_v0.9.1.sk │ ├── CompactSketchWithDoubleSummary4K_serialVersion1.sk │ ├── Qk128_n1000_v0.3.0.sk │ ├── Qk128_n1000_v0.6.0.sk │ ├── Qk128_n1000_v0.8.0.sk │ ├── Qk128_n1000_v0.8.3.sk │ ├── Qk128_n50_v0.3.0.sk │ ├── Qk128_n50_v0.6.0.sk │ ├── Qk128_n50_v0.8.0.sk │ ├── Qk128_n50_v0.8.3.sk │ ├── TupleWithTestIntegerSummary4kTrimmedSerVer2.sk │ ├── kll_double_n1.sk │ ├── kll_sketch_double_one_item_v1.sk │ ├── kll_sketch_float_one_item_v1.sk │ ├── tdigest_ref_k100_n10000_double.sk │ └── tdigest_ref_k100_n10000_float.sk └── tools ├── FindBugsExcludeFilter.xml └── SketchesCheckstyle.xml /NOTICE: -------------------------------------------------------------------------------- 1 | Apache DataSketches Java 2 | Copyright 2025 The Apache Software Foundation 3 | 4 | Copyright 2015-2018 Yahoo Inc. 5 | Copyright 2019-2020 Verizon Media 6 | Copyright 2021- Yahoo Inc. 7 | 8 | This product includes software developed at 9 | The Apache Software Foundation (http://www.apache.org/). 10 | 11 | Prior to moving to ASF, the software for this project was developed at 12 | Yahoo Inc. (https://developer.yahoo.com). 13 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/common/MemoryStatus.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.common; 21 | 22 | import org.apache.datasketches.memory.Memory; 23 | 24 | /** 25 | * Methods for inquiring the status of a backing Memory object. 26 | */ 27 | public interface MemoryStatus { 28 | 29 | /** 30 | * Returns true if this object's internal data is backed by a Memory object, 31 | * which may be on-heap or off-heap. 32 | * @return true if this object's internal data is backed by a Memory object. 33 | */ 34 | default boolean hasMemory() { return false; } 35 | 36 | /** 37 | * Returns true if this object's internal data is backed by direct (off-heap) Memory. 38 | * @return true if this object's internal data is backed by direct (off-heap) Memory. 39 | */ 40 | default boolean isDirect() { return false; } 41 | 42 | /** 43 | * Returns true if the backing resource of this is identical with the backing resource 44 | * of that. The capacities must be the same. If this is a region, 45 | * the region offset must also be the same. 46 | * 47 | * @param that A different non-null and alive Memory object. 48 | * @return true if the backing resource of this is identical with the backing resource 49 | * of that. 50 | * @throws SketchesArgumentException if that is not alive (already closed). 51 | */ 52 | default boolean isSameResource(final Memory that) { return false; } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/common/ResizeFactor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.common; 21 | 22 | /** 23 | * For the Families that accept this configuration parameter, it controls the size multiple that 24 | * affects how fast the internal cache grows, when more space is required. 25 | * See Resize Factor 26 | * 27 | * @author Lee Rhodes 28 | */ 29 | public enum ResizeFactor { 30 | /** 31 | * Do not resize. Sketch will be configured to full size. 32 | */ 33 | X1(0), 34 | /** 35 | * Resize factor is 2. 36 | */ 37 | X2(1), 38 | /** 39 | * Resize factor is 4. 40 | */ 41 | X4(2), 42 | /** 43 | * Resize factor is 8. 44 | */ 45 | X8(3); 46 | 47 | private int lg_; 48 | 49 | ResizeFactor(final int lg) { 50 | this.lg_ = lg; 51 | } 52 | 53 | /** 54 | * Returns the Log-base 2 of the Resize Factor 55 | * @return the Log-base 2 of the Resize Factor 56 | */ 57 | public int lg() { 58 | return lg_; 59 | } 60 | 61 | /** 62 | * Returns the Resize Factor given the Log-base 2 of the Resize Factor 63 | * @param lg a value between zero and 3, inclusive. 64 | * @return the Resize Factor given the Log-base 2 of the Resize Factor 65 | */ 66 | public static ResizeFactor getRF(final int lg) { 67 | if (X1.lg() == lg) { return X1; } 68 | if (X2.lg() == lg) { return X2; } 69 | if (X4.lg() == lg) { return X4; } 70 | return X8; 71 | } 72 | 73 | /** 74 | * Returns the Resize Factor 75 | * @return the Resize Factor 76 | */ 77 | public int getValue() { 78 | return 1 << lg_; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/common/SketchesArgumentException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.common; 21 | 22 | /** 23 | * Illegal Arguments Exception class for the library 24 | * 25 | * @author Lee Rhodes 26 | */ 27 | public class SketchesArgumentException extends SketchesException { 28 | private static final long serialVersionUID = 1L; 29 | 30 | //other constructors to be added as needed. 31 | 32 | /** 33 | * Constructs a new runtime exception with the specified detail message. The cause is not 34 | * initialized, and may subsequently be initialized by a call to 35 | * Throwable.initCause(java.lang.Throwable). 36 | * 37 | * @param message the detail message. The detail message is saved for later retrieval by the 38 | * Throwable.getMessage() method. 39 | */ 40 | public SketchesArgumentException(final String message) { 41 | super(message); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/common/SketchesException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.common; 21 | 22 | /** 23 | * Exception class for the library 24 | * 25 | * @author Lee Rhodes 26 | */ 27 | public class SketchesException extends RuntimeException { 28 | private static final long serialVersionUID = 1L; 29 | 30 | //other constructors to be added as needed. 31 | 32 | /** 33 | * Constructs a new runtime exception with the specified detail message. The cause is not 34 | * initialized, and may subsequently be initialized by a call to 35 | * Throwable.initCause(java.lang.Throwable). 36 | * 37 | * @param message the detail message. The detail message is saved for later retrieval by the 38 | * Throwable.getMessage() method. 39 | */ 40 | public SketchesException(final String message) { 41 | super(message); 42 | } 43 | 44 | /** 45 | * Constructs a new runtime exception with the specified detail message and cause. 46 | * 47 | *

Note that the detail message associated with cause is not automatically incorporated 48 | * in this runtime exception's detail message.

49 | * 50 | * @param message the detail message (which is saved for later retrieval by the 51 | * Throwable.getMessage() method). 52 | * @param cause the cause (which is saved for later retrieval by the Throwable.getCause() 53 | * method). (A null value is permitted, and indicates that the cause is nonexistent or unknown.) 54 | */ 55 | public SketchesException(final String message, final Throwable cause) { 56 | super(message, cause); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/common/SketchesReadOnlyException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.common; 21 | 22 | /** 23 | * Write operation attempted on a read-only class. 24 | * 25 | * @author Lee Rhodes 26 | */ 27 | public class SketchesReadOnlyException extends SketchesException { 28 | private static final long serialVersionUID = 1L; 29 | 30 | //other constructors to be added as needed. 31 | 32 | /** 33 | * Constructs a new runtime exception with the message: 34 | * "Write operation attempted on a read-only class." 35 | * 36 | *

The cause is not initialized, and may subsequently be initialized by a call to 37 | * Throwable.initCause(java.lang.Throwable). 38 | */ 39 | public SketchesReadOnlyException() { 40 | super("Write operation attempted on a read-only class."); 41 | } 42 | 43 | /** 44 | * Constructs a new runtime exception with the specified detail message. The cause is not 45 | * initialized, and may subsequently be initialized by a call to 46 | * Throwable.initCause(java.lang.Throwable). 47 | * 48 | * @param message the detail message. The detail message is saved for later retrieval by the 49 | * Throwable.getMessage() method. 50 | */ 51 | public SketchesReadOnlyException(final String message) { 52 | super(message); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/common/SketchesStateException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.common; 21 | 22 | /** 23 | * Illegal State Exception class for the library 24 | * 25 | * @author Lee Rhodes 26 | */ 27 | public class SketchesStateException extends SketchesException { 28 | private static final long serialVersionUID = 1L; 29 | 30 | //other constructors to be added as needed. 31 | 32 | /** 33 | * Constructs a new runtime exception with the specified detail message. The cause is not 34 | * initialized, and may subsequently be initialized by a call to 35 | * Throwable.initCause(java.lang.Throwable). 36 | * 37 | * @param message the detail message. The detail message is saved for later retrieval by the 38 | * Throwable.getMessage() method. 39 | */ 40 | public SketchesStateException(final String message) { 41 | super(message); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/common/SuppressFBWarnings.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.common; 21 | 22 | import java.lang.annotation.Retention; 23 | import java.lang.annotation.RetentionPolicy; 24 | 25 | /** 26 | * Used to suppress SpotBug warnings. 27 | * 28 | * @author Lee Rhodes 29 | */ 30 | @Retention(RetentionPolicy.CLASS) 31 | public @interface SuppressFBWarnings { 32 | 33 | /** 34 | * A list of comma-separated, quoted SpotBugs warnings that are to be suppressed in the associated 35 | * annotated element. The value can be a bug category, kind or pattern. 36 | * @return list of relevant bug descriptors 37 | */ 38 | String[] value() default {}; 39 | 40 | /** 41 | * Optional explanation for the suppression. 42 | * @return explanation 43 | */ 44 | String justification() default ""; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/common/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package is for common classes that may be used across all the sketch families. 22 | */ 23 | package org.apache.datasketches.common; 24 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/cpc/Flavor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.cpc; 21 | 22 | /** 23 | * Note: except for brief transitional moments, these sketches always obey the following strict 24 | * mapping between the flavor of a sketch and the number of coupons that it has collected. 25 | * @author Lee Rhodes 26 | * @author Kevin Lang 27 | */ 28 | enum Flavor { 29 | EMPTY, // 0 == C < 1 30 | SPARSE, // 1 <= C < 3K/32 31 | HYBRID, // 3K/32 <= C < K/2 32 | PINNED, // K/2 <= C < 27K/8 [NB: 27/8 = 3 + 3/8] 33 | SLIDING; // 27K/8 <= C 34 | 35 | private static Flavor[] fmtArr = Flavor.class.getEnumConstants(); 36 | 37 | /** 38 | * Returns the Flavor given its enum ordinal 39 | * @param ordinal the given enum ordinal 40 | * @return the Flavor given its enum ordinal 41 | */ 42 | static Flavor ordinalToFlavor(final int ordinal) { 43 | return fmtArr[ordinal]; 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/cpc/Format.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.cpc; 21 | 22 | /** 23 | * There are seven different preamble formats (8 combinations) that determine the layout of the 24 | * HiField variables after the first 8 bytes of the preamble. 25 | * Do not change the order. 26 | */ 27 | enum Format { 28 | EMPTY_MERGED, 29 | EMPTY_HIP, 30 | SPARSE_HYBRID_MERGED, 31 | SPARSE_HYBRID_HIP, 32 | PINNED_SLIDING_MERGED_NOSV, 33 | PINNED_SLIDING_HIP_NOSV, 34 | PINNED_SLIDING_MERGED, 35 | PINNED_SLIDING_HIP; 36 | 37 | private static Format[] fmtArr = Format.class.getEnumConstants(); 38 | 39 | /** 40 | * Returns the Format given its enum ordinal 41 | * @param ordinal the given enum ordinal 42 | * @return the Format given its enum ordinal 43 | */ 44 | static Format ordinalToFormat(final int ordinal) { 45 | return fmtArr[ordinal]; 46 | } 47 | 48 | } //end enum Format 49 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/cpc/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * Compressed Probabilistic Counting sketch family 22 | */ 23 | 24 | package org.apache.datasketches.cpc; 25 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/fdt/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * Frequent Distinct Tuples Sketch 22 | */ 23 | package org.apache.datasketches.fdt; 24 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/filters/bloomfilter/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | /** 20 | * BloomFilter package 21 | */ 22 | package org.apache.datasketches.filters.bloomfilter; 23 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/filters/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * The filters package contains data structures used to determine 22 | * approximate set-membership. Classes in this package may not follow 23 | * the standard sub-linear properties of other offerings in this 24 | * library, but they fit with the spirit of DataSketches by providing 25 | * fast and approximate answers to complex problems. 26 | */ 27 | package org.apache.datasketches.filters; 28 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/frequencies/ErrorType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.frequencies; 21 | 22 | /** 23 | * Specifies one of two types of error regions of the statistical classification Confusion Matrix 24 | * that can be excluded from a returned sample of Frequent Items. 25 | */ 26 | public enum ErrorType { 27 | 28 | /** 29 | * No Type I error samples will be included in the sample set, 30 | * which means all Truly Negative samples will be excluded from the sample set. 31 | * However, there may be Type II error samples (False Negatives) 32 | * that should have been included that were not. 33 | * This is a subset of the NO_FALSE_NEGATIVES ErrorType. 34 | */ 35 | NO_FALSE_POSITIVES, 36 | 37 | /** 38 | * No Type II error samples will be excluded from the sample set, 39 | * which means all Truly Positive samples will be included in the sample set. 40 | * However, there may be Type I error samples (False Positives) 41 | * that were included that should not have been. 42 | */ 43 | NO_FALSE_NEGATIVES 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/frequencies/Util.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.frequencies; 21 | 22 | final class Util { 23 | 24 | private Util() {} 25 | 26 | /** 27 | * The following constant controls the size of the initial data structure for the 28 | * frequencies sketches and its value is somewhat arbitrary. 29 | */ 30 | static final int LG_MIN_MAP_SIZE = 3; 31 | 32 | /** 33 | * This constant is large enough so that computing the median of SAMPLE_SIZE 34 | * randomly selected entries from a list of numbers and outputting 35 | * the empirical median will give a constant-factor approximation to the 36 | * true median with high probability. 37 | */ 38 | static final int SAMPLE_SIZE = 1024; 39 | 40 | /** 41 | * @param key to be hashed 42 | * @return an index into the hash table This hash function is taken from the internals of 43 | * Austin Appleby's MurmurHash3 algorithm. It is also used by the Trove for Java libraries. 44 | */ 45 | static long hash(long key) { 46 | key ^= key >>> 33; 47 | key *= 0xff51afd7ed558ccdL; 48 | key ^= key >>> 33; 49 | key *= 0xc4ceb9fe1a85ec53L; 50 | key ^= key >>> 33; 51 | return key; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/frequencies/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package is dedicated to streaming algorithms that enable estimation of the 22 | * frequency of occurrence of items in a weighted multiset stream of items. 23 | * If the frequency distribution of items is sufficiently skewed, these algorithms are very 24 | * useful in identifying the "Heavy Hitters" that occurred most frequently in the stream. 25 | * The accuracy of the estimation of the frequency of an item has well understood error 26 | * bounds that can be returned by the sketch. 27 | * 28 | *

These algorithms are sometimes referred to as "TopN" algorithms.

29 | */ 30 | package org.apache.datasketches.frequencies; 31 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/hash/XxHash.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.hash; 21 | 22 | import org.apache.datasketches.memory.Memory; 23 | 24 | /** 25 | * The XxHash is a fast, non-cryptographic, 64-bit hash function that has 26 | * excellent avalanche and 2-way bit independence properties. 27 | * 28 | *

This class wraps the 29 | * Memory Component XxHash 30 | * implementation. 31 | * 32 | * @author Lee Rhodes 33 | */ 34 | public class XxHash { 35 | 36 | /** 37 | * Compute the hash of the given Memory object. 38 | * @param mem The given Memory object 39 | * @param offsetBytes Starting at this offset in bytes 40 | * @param lengthBytes Continuing for this number of bytes 41 | * @param seed use this seed for the hash function 42 | * @return return the resulting 64-bit hash value. 43 | */ 44 | public static long hash(final Memory mem, final long offsetBytes, final long lengthBytes, 45 | final long seed) { 46 | return mem.xxHash64(offsetBytes, lengthBytes, seed); 47 | } 48 | 49 | /** 50 | * Returns a 64-bit hash. 51 | * @param in a long 52 | * @param seed A long valued seed. 53 | * @return the hash 54 | */ 55 | public static long hash(final long in, final long seed) { 56 | return org.apache.datasketches.memory.XxHash.hashLong(in, seed); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/hash/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * The hash package contains a high-performing and extended Java implementations 22 | * of Austin Appleby's 128-bit MurmurHash3 hash function originally coded in C. 23 | * This core MurmurHash3.java class is used throughout many of the sketch classes for consistency 24 | * and as long as the user specifies the same seed will result in coordinated hash operations. 25 | * This package also contains an adaptor class that extends the basic class with more functions 26 | * commonly associated with hashing. 27 | */ 28 | package org.apache.datasketches.hash; 29 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/hll/AuxHashMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.hll; 21 | 22 | import org.apache.datasketches.common.SketchesStateException; 23 | 24 | /** 25 | * @author Lee Rhodes 26 | */ 27 | interface AuxHashMap { 28 | 29 | AuxHashMap copy(); 30 | 31 | int getAuxCount(); 32 | 33 | int[] getAuxIntArr(); 34 | 35 | int getCompactSizeBytes(); 36 | 37 | PairIterator getIterator(); 38 | 39 | int getLgAuxArrInts(); 40 | 41 | int getUpdatableSizeBytes(); 42 | 43 | boolean isMemory(); 44 | 45 | boolean isOffHeap(); 46 | 47 | /** 48 | * Adds the slotNo and value to the aux array. 49 | * @param slotNo the index from the HLL array 50 | * @param value the HLL value at the slotNo. 51 | * @throws SketchesStateException if this slotNo already exists in the aux array. 52 | */ 53 | void mustAdd(int slotNo, int value); 54 | 55 | /** 56 | * Returns value given slotNo. If this fails an exception is thrown. 57 | * @param slotNo the index from the HLL array 58 | * @return value the HLL value at the slotNo 59 | * @throws SketchesStateException if valid slotNo and value is not found. 60 | */ 61 | int mustFindValueFor(int slotNo); 62 | 63 | /** 64 | * Replaces the entry at slotNo with the given value. 65 | * @param slotNo the index from the HLL array 66 | * @param value the HLL value at the slotNo 67 | * @throws SketchesStateException if a valid slotNo, value is not found. 68 | */ 69 | void mustReplace(int slotNo, int value); 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/hll/CurMode.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.hll; 21 | 22 | /** 23 | * Represents the three fundamental modes of the HLL Sketch. 24 | * 25 | * @author Lee Rhodes 26 | * @author Kevin Lang 27 | */ 28 | enum CurMode { LIST, SET, HLL; //do not change the order. 29 | 30 | public static final CurMode values[] = values(); 31 | 32 | /** 33 | * Returns the CurMode given its ordinal 34 | * @param ordinal the order of appearance in the enum definition. 35 | * @return the CurMode given its ordinal 36 | */ 37 | public static CurMode fromOrdinal(final int ordinal) { 38 | return values[ordinal]; 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/hll/HllPairIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.hll; 21 | 22 | import static org.apache.datasketches.hll.HllUtil.EMPTY; 23 | import static org.apache.datasketches.hll.HllUtil.pair; 24 | 25 | /** 26 | * Iterates over an on-heap HLL byte array producing pairs of index, value. 27 | * 28 | * @author Lee Rhodes 29 | */ 30 | abstract class HllPairIterator extends PairIterator { 31 | final int lengthPairs; 32 | int index; 33 | int value; 34 | 35 | //Used by Direct<4,6,8>Array, Heap<4,6,8>Array 36 | HllPairIterator(final int lengthPairs) { 37 | this.lengthPairs = lengthPairs; 38 | index = - 1; 39 | } 40 | 41 | @Override 42 | public String getHeader() { 43 | return String.format("%10s%6s", "Slot", "Value"); 44 | } 45 | 46 | @Override 47 | public int getIndex() { 48 | return index; 49 | } 50 | 51 | @Override 52 | public int getKey() { 53 | return index; 54 | } 55 | 56 | @Override 57 | public int getPair() { 58 | return pair(index, value); 59 | } 60 | 61 | @Override 62 | public int getSlot() { 63 | return index; 64 | } 65 | 66 | @Override 67 | public String getString() { 68 | final int slot = getSlot(); 69 | final int value = getValue(); 70 | return String.format("%10d%6d", slot, value); 71 | } 72 | 73 | @Override 74 | public int getValue() { 75 | return value; 76 | } 77 | 78 | @Override 79 | public boolean nextAll() { 80 | if (++index < lengthPairs) { 81 | value = value(); 82 | return true; 83 | } 84 | return false; 85 | } 86 | 87 | @Override 88 | public boolean nextValid() { 89 | while (++index < lengthPairs) { 90 | value = value(); 91 | if (value != EMPTY) { 92 | return true; 93 | } 94 | } 95 | return false; 96 | } 97 | 98 | abstract int value(); 99 | 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/hll/IntArrayPairIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.hll; 21 | 22 | import static org.apache.datasketches.hll.HllUtil.EMPTY; 23 | 24 | /** 25 | * Iterates over an on-heap integer array of pairs extracting 26 | * the components of the pair at a given index. 27 | * 28 | * @author Lee Rhodes 29 | */ 30 | class IntArrayPairIterator extends PairIterator { 31 | private final int[] array; 32 | private final int arrLen; 33 | private final int slotMask; 34 | private int index; 35 | private int pair; 36 | 37 | //used by CouponList, HeapAuxHashMap 38 | IntArrayPairIterator(final int[] array, final int lgConfigK) { 39 | this.array = array; 40 | slotMask = (1 << lgConfigK) - 1; 41 | arrLen = array.length; 42 | index = - 1; 43 | } 44 | 45 | @Override 46 | public int getIndex() { 47 | return index; 48 | } 49 | 50 | @Override 51 | public int getKey() { 52 | return HllUtil.getPairLow26(pair); 53 | } 54 | 55 | @Override 56 | public int getPair() { 57 | return pair; 58 | } 59 | 60 | @Override 61 | public int getSlot() { 62 | return getKey() & slotMask; 63 | } 64 | 65 | @Override 66 | public int getValue() { 67 | return HllUtil.getPairValue(pair); 68 | } 69 | 70 | @Override 71 | public boolean nextAll() { 72 | if (++index < arrLen) { 73 | pair = array[index]; 74 | return true; 75 | } 76 | return false; 77 | } 78 | 79 | @Override 80 | public boolean nextValid() { 81 | while (++index < arrLen) { 82 | final int pair = array[index]; 83 | if (pair != EMPTY) { 84 | this.pair = pair; 85 | return true; 86 | } 87 | } 88 | return false; 89 | } 90 | 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/hll/doc-files/HLL_HIP_K12T20U20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/main/java/org/apache/datasketches/hll/doc-files/HLL_HIP_K12T20U20.png -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/hll/doc-files/HLL_UnionTime4_6_8_Java_CPP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/main/java/org/apache/datasketches/hll/doc-files/HLL_UnionTime4_6_8_Java_CPP.png -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/hllmap/CouponsIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.hllmap; 21 | 22 | /** 23 | * Common iterator class for maps that need one. 24 | * 25 | * @author Alex Saydakov 26 | */ 27 | class CouponsIterator { 28 | 29 | private final int offset_; 30 | private final int maxEntries_; 31 | private final short[] couponsArr_; 32 | private int index_; 33 | 34 | CouponsIterator(final short[] couponsArr, final int offset, final int maxEntries) { 35 | offset_ = offset; 36 | maxEntries_ = maxEntries; 37 | couponsArr_ = couponsArr; 38 | index_ = -1; 39 | } 40 | 41 | /** 42 | * next() must be called before the first getValue(). This skips over zero values. 43 | * @return the next coupon in the array. 44 | */ 45 | boolean next() { 46 | index_++; 47 | while (index_ < maxEntries_) { 48 | if (couponsArr_[offset_ + index_] != 0) { return true; } 49 | index_++; 50 | } 51 | return false; 52 | } 53 | 54 | /** 55 | * Returns the value at the current index. 56 | * @return the value at the current index. 57 | */ 58 | short getValue() { 59 | return couponsArr_[offset_ + index_]; 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/hllmap/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * The hllmap package contains a space efficient HLL mapping sketch of keys to approximate unique 22 | * count of identifiers. For example, counting the number of unique users (identifiers) per IP 23 | * address. 24 | * 25 | *

In cases where the number of keys is very large, having an individual HLL sketch per key may 26 | * not be practical. If the distribution of values per key is highly skewed where the vast 27 | * majority of keys have only a few values then this mapping sketch will make sense as it will be 28 | * far more space efficient than dedicating individual HLL sketches per key. 29 | * 30 | *

From our own testing, sketching 100 million IPv4 addresses with such a 31 | * highly skewed distribution of identifiers per IP uses only 1.4GB of memory. This translates to 32 | * an average of about 10 bytes per IP allocated to the equivalent of a full k=1024 HLL sketch 33 | * and provides an RSE of less than 2.5%. Your results will vary depending on the actual 34 | * distribution of identifiers per key. 35 | * 36 | * @see org.apache.datasketches.hllmap.UniqueCountMap 37 | */ 38 | package org.apache.datasketches.hllmap; 39 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.kll; 21 | 22 | import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator; 23 | 24 | /** 25 | * Iterator over KllDoublesSketch. The order is not defined. 26 | */ 27 | public final class KllDoublesSketchIterator extends KllSketchIterator implements QuantilesDoublesSketchIterator { 28 | private final double[] quantiles; 29 | 30 | KllDoublesSketchIterator(final double[] quantiles, final int[] levelsArr, final int numLevels) { 31 | super(levelsArr, numLevels); 32 | this.quantiles = quantiles; 33 | } 34 | 35 | @Override 36 | public double getQuantile() { 37 | return quantiles[index]; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.kll; 21 | 22 | import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator; 23 | 24 | /** 25 | * Iterator over KllFloatsSketch. The order is not defined. 26 | */ 27 | public final class KllFloatsSketchIterator extends KllSketchIterator implements QuantilesFloatsSketchIterator { 28 | private final float[] quantiles; 29 | 30 | KllFloatsSketchIterator(final float[] quantiles, final int[] levelsArr, final int numLevels) { 31 | super(levelsArr, numLevels); 32 | this.quantiles = quantiles; 33 | } 34 | 35 | @Override 36 | public float getQuantile() { 37 | return quantiles[index]; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/kll/KllItemsSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.kll; 21 | 22 | import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator; 23 | 24 | /** 25 | * Iterator over KllItemsSketch. The order is not defined. 26 | * @param the item class type 27 | */ 28 | public final class KllItemsSketchIterator extends KllSketchIterator implements QuantilesGenericSketchIterator { 29 | private final Object[] quantiles; 30 | 31 | KllItemsSketchIterator(final Object[] quantiles, final int[] levelsArr, final int numLevels) { 32 | super(levelsArr, numLevels); 33 | this.quantiles = quantiles; 34 | } 35 | 36 | @SuppressWarnings("unchecked") 37 | @Override 38 | public T getQuantile() { 39 | return (T)quantiles[index]; 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/kll/KllLongsSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.kll; 21 | 22 | import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; 23 | 24 | /** 25 | * Iterator over KllLongsSketch. The order is not defined. 26 | */ 27 | public final class KllLongsSketchIterator extends KllSketchIterator implements QuantilesLongsSketchIterator { 28 | private final long[] quantiles; 29 | 30 | KllLongsSketchIterator(final long[] quantiles, final int[] levelsArr, final int numLevels) { 31 | super(levelsArr, numLevels); 32 | this.quantiles = quantiles; 33 | } 34 | 35 | @Override 36 | public long getQuantile() { 37 | return quantiles[index]; 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/kll/KllSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.kll; 21 | 22 | import org.apache.datasketches.quantilescommon.QuantilesSketchIterator; 23 | 24 | /** 25 | * The base implementation for the KLL sketch iterator hierarchy used for viewing the 26 | * non-ordered quantiles retained by a sketch. 27 | * 28 | *

Prototype example of the recommended iteration loop:

29 | *
{@code
30 |  *   SketchIterator itr = sketch.iterator();
31 |  *   while (itr.next()) {
32 |  *     ...get*();
33 |  *   }
34 |  * }
35 | * 36 | * @author Lee Rhodes 37 | */ 38 | public class KllSketchIterator implements QuantilesSketchIterator { 39 | protected final int[] levelsArr; 40 | protected final int numLevels; 41 | protected int level; 42 | protected int index; 43 | protected long weight; 44 | protected boolean isInitialized_; 45 | 46 | KllSketchIterator(final int[] levelsArr, final int numLevels) { 47 | this.levelsArr = levelsArr; 48 | this.numLevels = numLevels; 49 | this.isInitialized_ = false; 50 | } 51 | 52 | @Override 53 | public long getWeight() { 54 | return weight; 55 | } 56 | 57 | @Override 58 | public boolean next() { 59 | if (!isInitialized_) { 60 | level = 0; 61 | index = levelsArr[level]; 62 | weight = 1; 63 | isInitialized_ = true; 64 | } else { 65 | index++; 66 | } 67 | if (index < levelsArr[level + 1]) { 68 | return true; 69 | } 70 | // go to the next non-empty level 71 | do { 72 | level++; 73 | if (level == numLevels) { 74 | return false; // run out of levels 75 | } 76 | weight *= 2; 77 | } while (levelsArr[level] == levelsArr[level + 1]); 78 | index = levelsArr[level]; 79 | return true; 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/kll/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package is for the implementations of the sketch algorithm developed by Zohar Karnin, Kevin Lang, 22 | * and Edo Liberty that is commonly referred to as the "KLL" sketch after the authors' last names. 23 | */ 24 | 25 | package org.apache.datasketches.kll; 26 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package is the parent package for all sketch families and common code areas. 22 | * 23 | * @author Lee Rhodes 24 | */ 25 | package org.apache.datasketches; 26 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/partitions/BoundsRule.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.partitions; 21 | 22 | /** 23 | * This instructs the user about which of the upper and lower bounds of a partition definition row 24 | * should be included with the returned data. 25 | */ 26 | public enum BoundsRule { 27 | 28 | /** 29 | * Include both the upper and lower bounds 30 | */ 31 | INCLUDE_BOTH, 32 | 33 | /** 34 | * Include only the upper bound but not the lower bound 35 | */ 36 | INCLUDE_UPPER, 37 | 38 | /** 39 | * Include only the lower bound but not the upper bound 40 | */ 41 | INCLUDE_LOWER, 42 | 43 | /** 44 | * Include none 45 | */ 46 | INCLUDE_NEITHER; 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/partitions/SketchFillRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.partitions; 21 | 22 | import org.apache.datasketches.quantilescommon.PartitioningFeature; 23 | import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; 24 | 25 | /** 26 | * This is a callback request to the data source to fill a quantiles sketch, 27 | * which is returned to the caller. 28 | * @param the item class type 29 | * @param the sketch type 30 | * @author Lee Rhodes 31 | */ 32 | public interface SketchFillRequest & PartitioningFeature> { 33 | 34 | /** 35 | * This is a callback request to the data source to fill a quantiles sketch 36 | * with a range of data between upper and lower bounds. Which of these bounds are to be included is determined by 37 | * the BoundsRule. 38 | * 39 | *

This range of data may or may not be subsequently further partitioned.

40 | * @param lowerQuantile the lowest quantile of a range 41 | * @param upperQuantile the highest quantile of a range 42 | * @param boundsRule determines which quantile bounds to include 43 | * @return a quantiles sketch filled from the given upper and lower bounds. 44 | */ 45 | public S getRange(final T lowerQuantile, final T upperQuantile, final BoundsRule boundsRule); 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/partitions/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * 22 | */ 23 | package org.apache.datasketches.partitions; 24 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantiles/CompactDoublesSketch.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantiles; 21 | 22 | import org.apache.datasketches.common.SketchesStateException; 23 | 24 | /** 25 | * Compact sketches are inherently read only. 26 | * @author Jon Malkin 27 | */ 28 | public abstract class CompactDoublesSketch extends DoublesSketch { 29 | CompactDoublesSketch(final int k) { 30 | super(k); 31 | } 32 | 33 | @Override 34 | boolean isCompact() { 35 | return true; 36 | } 37 | 38 | @Override 39 | public boolean isReadOnly() { 40 | return false; 41 | } 42 | 43 | @Override 44 | public void reset() { 45 | throw new SketchesStateException("Cannot reset a compact sketch, which is read-only."); 46 | } 47 | 48 | @Override 49 | public void update(final double quantile) { 50 | throw new SketchesStateException("Cannot update a compact sketch, which is read-only."); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantiles/DoublesArrayAccessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantiles; 21 | 22 | import java.util.Arrays; 23 | 24 | /** 25 | * @author Jon Malkin 26 | */ 27 | final class DoublesArrayAccessor extends DoublesBufferAccessor { 28 | private int numItems_; 29 | private double[] buffer_; 30 | 31 | private DoublesArrayAccessor(final double[] buffer) { 32 | numItems_ = buffer.length; 33 | buffer_ = buffer; 34 | } 35 | 36 | static DoublesArrayAccessor wrap(final double[] buffer) { 37 | return new DoublesArrayAccessor(buffer); 38 | } 39 | 40 | static DoublesArrayAccessor initialize(final int numItems) { 41 | return new DoublesArrayAccessor(new double[numItems]); 42 | } 43 | 44 | @Override 45 | double get(final int index) { 46 | assert index >= 0 && index < numItems_; 47 | return buffer_[index]; 48 | } 49 | 50 | @Override 51 | double set(final int index, final double quantile) { 52 | assert index >= 0 && index < numItems_; 53 | 54 | final double retVal = buffer_[index]; 55 | buffer_[index] = quantile; 56 | return retVal; 57 | } 58 | 59 | @Override 60 | int numItems() { 61 | return numItems_; 62 | } 63 | 64 | @Override 65 | double[] getArray(final int fromIdx, final int numItems) { 66 | return Arrays.copyOfRange(buffer_, fromIdx, fromIdx + numItems); 67 | } 68 | 69 | @Override 70 | void putArray(final double[] srcArray, final int srcIndex, final int dstIndex, final int numItems) { 71 | System.arraycopy(srcArray, srcIndex, buffer_, dstIndex, numItems); 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantiles/DoublesBufferAccessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantiles; 21 | 22 | /** 23 | * @author Jon Malkin 24 | */ 25 | abstract class DoublesBufferAccessor { 26 | 27 | abstract double get(final int index); 28 | 29 | abstract double set(final int index, final double quantile); 30 | 31 | abstract int numItems(); 32 | 33 | abstract double[] getArray(int fromIdx, int numItems); 34 | 35 | abstract void putArray(double[] srcArray, int srcIndex, int dstIndex, int numItems); 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantiles/DoublesSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantiles; 21 | 22 | import java.util.Objects; 23 | 24 | import org.apache.datasketches.common.SketchesStateException; 25 | import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator; 26 | 27 | /** 28 | * Iterator over DoublesSketch. The order is not defined. 29 | */ 30 | public final class DoublesSketchIterator implements QuantilesDoublesSketchIterator { 31 | private DoublesSketchAccessor sketchAccessor; 32 | private long bitPattern; 33 | private int level; 34 | private long weight; 35 | private int index; 36 | 37 | DoublesSketchIterator(final DoublesSketch sketch, final long bitPattern) { 38 | Objects.requireNonNull(sketch, "sketch must not be null"); 39 | sketchAccessor = DoublesSketchAccessor.wrap(sketch); 40 | this.bitPattern = bitPattern; 41 | this.level = -1; 42 | this.weight = 1; 43 | this.index = -1; 44 | } 45 | 46 | @Override 47 | public double getQuantile() { 48 | if (index < 0) { throw new SketchesStateException("index < 0; getQuantile() was called before next()"); } 49 | return sketchAccessor.get(index); 50 | } 51 | 52 | @Override 53 | public long getWeight() { 54 | return weight; 55 | } 56 | 57 | @Override 58 | public boolean next() { 59 | index++; // advance index within the current level 60 | if (index < sketchAccessor.numItems()) { 61 | return true; 62 | } 63 | // go to the next non-empty level 64 | do { 65 | level++; 66 | if (level > 0) { 67 | bitPattern >>>= 1; 68 | } 69 | if (bitPattern == 0L) { 70 | return false; // run out of levels 71 | } 72 | weight *= 2; 73 | } while ((bitPattern & 1L) == 0L); 74 | index = 0; 75 | sketchAccessor.setLevel(level); 76 | return true; 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantiles/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * The quantiles package contains stochastic streaming algorithms that enable single-pass 22 | * analysis of the distribution of a stream of quantiles. 23 | * 24 | * @see org.apache.datasketches.quantiles.DoublesSketch 25 | * @see org.apache.datasketches.quantiles.ItemsSketch 26 | */ 27 | package org.apache.datasketches.quantiles; 28 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedViewIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantilescommon; 21 | 22 | /** 23 | * Iterator over quantile sketches of primitive type double. 24 | */ 25 | public final class DoublesSortedViewIterator extends SortedViewIterator { 26 | private final double[] quantiles; 27 | 28 | /** 29 | * Constructor. 30 | * @param quantiles the given array of quantiles, which must be ordered. 31 | * @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and 32 | * the last value must be equal to N, the total number of items updated to the sketch. 33 | */ 34 | public DoublesSortedViewIterator(final double[] quantiles, final long[] cumWeights) { 35 | super(cumWeights); 36 | this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter 37 | } 38 | 39 | /** 40 | * Gets the quantile at the current index. 41 | * 42 | *

Don't call this before calling next() for the first time 43 | * or after getting false from next().

44 | * 45 | * @return the quantile at the current index. 46 | */ 47 | public double getQuantile() { 48 | return quantiles[index]; 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedViewIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantilescommon; 21 | 22 | /** 23 | * Iterator over quantile sketches of primitive type float. 24 | */ 25 | public final class FloatsSortedViewIterator extends SortedViewIterator { 26 | private final float[] quantiles; 27 | 28 | /** 29 | * Constructor. 30 | * @param quantiles the given array of quantiles, which must be ordered. 31 | * @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and 32 | * the last value must be equal to N, the total number of items updated to the sketch. 33 | */ 34 | public FloatsSortedViewIterator(final float[] quantiles, final long[] cumWeights) { 35 | super(cumWeights); 36 | this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter 37 | } 38 | 39 | /** 40 | * Gets the quantile at the current index. 41 | * 42 | *

Don't call this before calling next() for the first time 43 | * or after getting false from next().

44 | * 45 | * @return the quantile at the current index. 46 | */ 47 | public float getQuantile() { 48 | return quantiles[index]; 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantilescommon; 21 | 22 | /** 23 | * Iterator over quantile sketches of primitive type long. 24 | */ 25 | public final class LongsSortedViewIterator extends SortedViewIterator { 26 | private final long[] quantiles; 27 | 28 | /** 29 | * Constructor. 30 | * @param quantiles the given array of quantiles, which must be ordered. 31 | * @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and 32 | * the last value must be equal to N, the total number of items updated to the sketch. 33 | */ 34 | public LongsSortedViewIterator(final long[] quantiles, final long[] cumWeights) { 35 | super(cumWeights); 36 | this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter 37 | } 38 | 39 | /** 40 | * Gets the quantile at the current index. 41 | * 42 | *

Don't call this before calling next() for the first time 43 | * or after getting false from next().

44 | * 45 | * @return the quantile at the current index. 46 | */ 47 | public long getQuantile() { 48 | return quantiles[index]; 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantilescommon; 21 | 22 | /** 23 | * These search criteria are used by the KLL, REQ and Classic Quantiles sketches in the DataSketches library. 24 | * 25 | * @see 26 | * Sketching Quantiles and Ranks Tutorial 27 | * 28 | * @author Lee Rhodes 29 | */ 30 | public enum QuantileSearchCriteria { 31 | 32 | /** 33 | * Definition of INCLUSIVE getQuantile(r) search:
34 | * Given rank r, return the quantile of the smallest rank that is 35 | * strictly greater than or equal to r. 36 | * 37 | *

Definition of INCLUSIVE getRank(q) search:
38 | * Given quantile q, return the rank, r, of the largest quantile that is 39 | * less than or equal to q.

40 | */ 41 | INCLUSIVE, 42 | 43 | /** 44 | * Definition of EXCLUSIVE getQuantile(r) search:
45 | * Given rank r, return the quantile of the smallest rank that is 46 | * strictly greater than r. 47 | * 48 | *

However, if the given rank is is equal to 1.0, or there is no quantile that satisfies this criterion 49 | * the method will return a NaN or null.

50 | * 51 | *

Definition of EXCLUSIVE getRank(q) search:
52 | * Given quantile q, return the rank, r, of the largest quantile that is 53 | * strictly less than q.

54 | * 55 | *

If there is no quantile value that is strictly less than q, 56 | * the method will return a rank of zero.

57 | * 58 | */ 59 | EXCLUSIVE; 60 | 61 | } 62 | 63 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantilescommon; 21 | 22 | /** 23 | * The quantiles sketch iterator for primitive type double. 24 | * @see QuantilesSketchIterator 25 | * @author Lee Rhodes 26 | */ 27 | public interface QuantilesDoublesSketchIterator extends QuantilesSketchIterator { 28 | 29 | /** 30 | * Gets the double quantile at the current index. 31 | * 32 | *

Don't call this before calling next() for the first time 33 | * or after getting false from next().

34 | * 35 | * @return the double quantile at the current index. 36 | */ 37 | double getQuantile(); 38 | 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantilescommon; 21 | 22 | /** 23 | * The quantiles sketch iterator for primitive type float. 24 | * @see QuantilesSketchIterator 25 | * @author Lee Rhodes 26 | */ 27 | public interface QuantilesFloatsSketchIterator extends QuantilesSketchIterator { 28 | 29 | /** 30 | * Gets the float quantile at the current index. 31 | * 32 | *

Don't call this before calling next() for the first time 33 | * or after getting false from next().

34 | * 35 | * @return the float quantile at the current index. 36 | */ 37 | float getQuantile(); 38 | 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantilescommon; 21 | 22 | /** 23 | * The quantiles sketch iterator for generic types. 24 | * @see QuantilesSketchIterator 25 | * @param The generic quantile type 26 | * @author Lee Rhodes 27 | */ 28 | public interface QuantilesGenericSketchIterator extends QuantilesSketchIterator { 29 | 30 | /** 31 | * Gets the generic quantile at the current index. 32 | * 33 | *

Don't call this before calling next() for the first time 34 | * or after getting false from next().

35 | * 36 | * @return the generic quantile at the current index. 37 | */ 38 | T getQuantile(); 39 | 40 | } 41 | 42 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantilescommon; 21 | 22 | /** 23 | * The quantiles sketch iterator for primitive type long. 24 | * @see QuantilesSketchIterator 25 | * @author Zac Blanco 26 | */ 27 | public interface QuantilesLongsSketchIterator extends QuantilesSketchIterator { 28 | 29 | /** 30 | * Gets the long quantile at the current index. 31 | * 32 | *

Don't call this before calling next() for the first time 33 | * or after getting false from next().

34 | * 35 | * @return the long quantile at the current index. 36 | */ 37 | long getQuantile(); 38 | 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/QuantilesSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantilescommon; 21 | 22 | /** 23 | * This is the base interface for the SketchIterator hierarchy used for viewing the 24 | * non-ordered quantiles retained by a sketch. 25 | * 26 | *

Prototype example of the recommended iteration loop:

27 | *
{@code
28 |  *   SketchIterator itr = sketch.iterator();
29 |  *   while (itr.next()) {
30 |  *     ...get*();
31 |  *   }
32 |  * }
33 | * 34 | * @author Lee Rhodes 35 | */ 36 | 37 | public interface QuantilesSketchIterator { 38 | 39 | /** 40 | * Gets the natural weight at the current index. 41 | * 42 | *

Don't call this before calling next() for the first time 43 | * or after getting false from next().

44 | * 45 | * @return the natural weight at the current index. 46 | */ 47 | long getWeight(); 48 | 49 | /** 50 | * Advances the index and checks if it is valid. 51 | * The state of this iterator is undefined before the first call of this method. 52 | * @return true if the next index is valid. 53 | */ 54 | boolean next(); 55 | 56 | } 57 | 58 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/SketchPartitionLimits.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantilescommon; 21 | 22 | import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; 23 | 24 | import org.apache.datasketches.common.SketchesArgumentException; 25 | 26 | /** 27 | * This defines the methods required to compute the partition limits. 28 | */ 29 | public interface SketchPartitionLimits { 30 | 31 | /** 32 | * Gets the maximum number of partitions this sketch will support based on the configured size K 33 | * and the number of retained values of this sketch. 34 | * @return the maximum number of partitions this sketch will support. 35 | */ 36 | int getMaxPartitions(); 37 | 38 | /** 39 | * Gets the minimum partition size in items this sketch will support based on the configured size K of this 40 | * sketch and the number of retained values of this sketch. 41 | * @return the minimum partition size in items this sketch will support. 42 | */ 43 | default long getMinPartitionSizeItems() { 44 | final long totalN = getN(); 45 | if (totalN <= 0) { throw new SketchesArgumentException(EMPTY_MSG); } 46 | return totalN / getMaxPartitions(); 47 | } 48 | 49 | /** 50 | * Gets the length of the input stream offered to the sketch.. 51 | * @return the length of the input stream offered to the sketch. 52 | */ 53 | long getN(); 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/quantilescommon/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package contains common tools and methods for the quantiles, kll and 22 | * req packages. 23 | */ 24 | package org.apache.datasketches.quantilescommon; 25 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/req/ReqSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.req; 21 | 22 | import java.util.List; 23 | 24 | import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator; 25 | 26 | /** 27 | * Iterator over all retained items of the ReqSketch. The order is not defined. 28 | * 29 | * @author Lee Rhodes 30 | */ 31 | public final class ReqSketchIterator implements QuantilesFloatsSketchIterator { 32 | private List compactors; 33 | private int cIndex; 34 | private int bIndex; 35 | private int numRetainedItems; 36 | private FloatBuffer currentBuf; 37 | 38 | ReqSketchIterator(final ReqSketch sketch) { 39 | compactors = sketch.getCompactors(); 40 | numRetainedItems = sketch.getNumRetained(); 41 | currentBuf = compactors.get(0).getBuffer(); 42 | cIndex = 0; 43 | bIndex = -1; 44 | } 45 | 46 | @Override 47 | public float getQuantile() { 48 | return currentBuf.getItem(bIndex); 49 | } 50 | 51 | @Override 52 | public long getWeight() { 53 | return 1 << cIndex; 54 | } 55 | 56 | @Override 57 | public boolean next() { 58 | if ((numRetainedItems == 0) 59 | || ((cIndex == (compactors.size() - 1)) && (bIndex == (currentBuf.getCount() - 1)))) { 60 | return false; 61 | } 62 | if (bIndex == (currentBuf.getCount() - 1)) { 63 | cIndex++; 64 | currentBuf = compactors.get(cIndex).getBuffer(); 65 | bIndex = 0; 66 | } else { 67 | bIndex++; 68 | } 69 | return true; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/req/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package is for the implementation of the Relative Error Quantiles sketch algorithm. 22 | * @see org.apache.datasketches.req.ReqSketch 23 | */ 24 | package org.apache.datasketches.req; 25 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/sampling/SampleSubsetSummary.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.sampling; 21 | 22 | /** 23 | * A simple object o capture the results of a subset sum query on a sampling sketch. 24 | * 25 | * @author Jon Malkin 26 | */ 27 | public class SampleSubsetSummary { 28 | private double lowerBound; 29 | private double estimate; 30 | private double upperBound; 31 | private double totalSketchWeight; 32 | 33 | SampleSubsetSummary(final double lowerBound, 34 | final double estimate, 35 | final double upperBound, 36 | final double totalSketchWeight) { 37 | this.lowerBound = lowerBound; 38 | this.estimate = estimate; 39 | this.upperBound = upperBound; 40 | this.totalSketchWeight = totalSketchWeight; 41 | } 42 | 43 | /** 44 | * Returns the lower bound 45 | * @return the lower bound 46 | */ 47 | public double getLowerBound() { 48 | return lowerBound; 49 | } 50 | 51 | /** 52 | * Returns the total sketch weight 53 | * @return the total sketch weight 54 | */ 55 | public double getTotalSketchWeight() { 56 | return totalSketchWeight; 57 | } 58 | 59 | /** 60 | * Returns the upper bound 61 | * @return the upper bound 62 | */ 63 | public double getUpperBound() { 64 | return upperBound; 65 | } 66 | 67 | /** 68 | * Returns the unique count estimate 69 | * @return the unique count estimate 70 | */ 71 | public double getEstimate() { 72 | return estimate; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/sampling/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of 22 | * weighted and unweighted items from a stream. 23 | * 24 | *

These sketches are mergeable and can be serialized and deserialized to/from a compact 25 | * form.

26 | * @see org.apache.datasketches.sampling.ReservoirItemsSketch 27 | * @see org.apache.datasketches.sampling.ReservoirLongsSketch 28 | * @see org.apache.datasketches.sampling.VarOptItemsSketch 29 | */ 30 | package org.apache.datasketches.sampling; 31 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tdigest/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * t-Digest for estimating quantiles and ranks. 22 | */ 23 | package org.apache.datasketches.tdigest; 24 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/theta/BytesCompactHashIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.theta; 21 | 22 | import org.apache.datasketches.common.ByteArrayUtil; 23 | 24 | /* 25 | * This is to iterate over serial version 3 sketch representation 26 | */ 27 | class BytesCompactHashIterator implements HashIterator { 28 | final private byte[] bytes; 29 | final private int offset; 30 | final private int numEntries; 31 | private int index; 32 | 33 | BytesCompactHashIterator( 34 | final byte[] bytes, 35 | final int offset, 36 | final int numEntries 37 | ) { 38 | this.bytes = bytes; 39 | this.offset = offset; 40 | this.numEntries = numEntries; 41 | index = -1; 42 | } 43 | 44 | @Override 45 | public long get() { 46 | return ByteArrayUtil.getLongLE(bytes, offset + index * Long.BYTES); 47 | } 48 | 49 | @Override 50 | public boolean next() { 51 | return ++index < numEntries; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/theta/HashIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.theta; 21 | 22 | /** 23 | * This is used to iterate over the retained hash values of the Theta sketch. 24 | * @author Lee Rhodes 25 | */ 26 | public interface HashIterator { 27 | 28 | /** 29 | * Gets the hash value 30 | * @return the hash value 31 | */ 32 | long get(); 33 | 34 | /** 35 | * Returns true at the next hash value in sequence. 36 | * If false, the iteration is done. 37 | * @return true at the next hash value in sequence. 38 | */ 39 | boolean next(); 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/theta/HeapCompactHashIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.theta; 21 | 22 | class HeapCompactHashIterator implements HashIterator { 23 | private long[] cache; 24 | private int index; 25 | 26 | HeapCompactHashIterator(final long[] cache) { 27 | this.cache = cache; 28 | index = -1; 29 | } 30 | 31 | @Override 32 | public long get() { 33 | return cache[index]; 34 | } 35 | 36 | @Override 37 | public boolean next() { 38 | return ++index < cache.length; 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/theta/HeapHashIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.theta; 21 | 22 | /** 23 | * @author Lee Rhodes 24 | */ 25 | class HeapHashIterator implements HashIterator { 26 | private long[] cache; 27 | private long thetaLong; 28 | private int index; 29 | private long hash; 30 | 31 | HeapHashIterator(final long[] cache, final long thetaLong) { 32 | this.cache = cache; 33 | this.thetaLong = thetaLong; 34 | index = -1; 35 | hash = 0; 36 | } 37 | 38 | @Override 39 | public long get() { 40 | return hash; 41 | } 42 | 43 | @Override 44 | public boolean next() { 45 | while (++index < cache.length) { 46 | hash = cache[index]; 47 | if ((hash != 0) && (hash < thetaLong)) { 48 | return true; 49 | } 50 | } 51 | return false; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/theta/MemoryHashIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.theta; 21 | 22 | import org.apache.datasketches.memory.Memory; 23 | 24 | /** 25 | * @author Lee Rhodes 26 | */ 27 | class MemoryHashIterator implements HashIterator { 28 | private Memory mem; 29 | private int arrLongs; 30 | private long thetaLong; 31 | private long offsetBytes; 32 | private int index; 33 | private long hash; 34 | 35 | MemoryHashIterator(final Memory mem, final int arrLongs, final long thetaLong) { 36 | this.mem = mem; 37 | this.arrLongs = arrLongs; 38 | this.thetaLong = thetaLong; 39 | offsetBytes = PreambleUtil.extractPreLongs(mem) << 3; 40 | index = -1; 41 | hash = 0; 42 | } 43 | 44 | @Override 45 | public long get() { 46 | return hash; 47 | } 48 | 49 | @Override 50 | public boolean next() { 51 | while (++index < arrLongs) { 52 | hash = mem.getLong(offsetBytes + (index << 3)); 53 | if ((hash != 0) && (hash < thetaLong)) { 54 | return true; 55 | } 56 | } 57 | return false; 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/theta/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * The theta package contains the basic sketch classes that are members of the 22 | * Theta Sketch Framework. 23 | * 24 | *

There is a separate Tuple package for many of the sketches that are derived from the 25 | * same algorithms defined in the Theta Sketch Framework paper.

26 | */ 27 | package org.apache.datasketches.theta; 28 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/thetacommon/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package contains common tools and methods for the theta, 22 | * tuple, tuple/* and fdt packages. 23 | */ 24 | package org.apache.datasketches.thetacommon; 25 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/DeserializeResult.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | /** 23 | * Returns an object and its size in bytes as a result of a deserialize operation 24 | * @param Type of object 25 | */ 26 | public class DeserializeResult { 27 | private final T object; 28 | private final int size; 29 | 30 | /** 31 | * Creates an instance. 32 | * @param object Deserialized object. 33 | * @param size Deserialized size in bytes. 34 | */ 35 | public DeserializeResult(final T object, final int size) { 36 | this.object = object; 37 | this.size = size; 38 | } 39 | 40 | /** 41 | * Returns Deserialized object 42 | * @return Deserialized object 43 | */ 44 | public T getObject() { 45 | return object; 46 | } 47 | 48 | /** 49 | * Returns size in bytes occupied by the object in the serialized form 50 | * @return size in bytes occupied by the object in the serialized form 51 | */ 52 | public int getSize() { 53 | return size; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/Summary.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | /** 23 | * Interface for user-defined Summary, which is associated with every hash in a tuple sketch 24 | */ 25 | public interface Summary { 26 | 27 | /** 28 | * Deep copy. 29 | * 30 | *

Caution: This must implement a deep copy. 31 | * 32 | * @return deep copy of the Summary 33 | */ 34 | public Summary copy(); 35 | 36 | /** 37 | * This is to serialize a Summary instance to a byte array. 38 | * 39 | *

The user should encode in the byte array its total size, which is used during 40 | * deserialization, especially if the Summary has variable sized elements. 41 | * 42 | * @return serialized representation of the Summary 43 | */ 44 | public byte[] toByteArray(); 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/SummaryDeserializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | import org.apache.datasketches.memory.Memory; 23 | 24 | /** 25 | * Interface for deserializing user-defined Summary 26 | * @param type of Summary 27 | */ 28 | public interface SummaryDeserializer { 29 | 30 | /** 31 | * This is to create an instance of a Summary given a serialized representation. 32 | * The user may assume that the start of the given Memory is the correct place to start 33 | * deserializing. However, the user must be able to determine the number of bytes required to 34 | * deserialize the summary as the capacity of the given Memory may 35 | * include multiple such summaries and may be much larger than required for a single summary. 36 | * @param mem Memory object with serialized representation of a Summary 37 | * @return DeserializedResult object, which contains a Summary object and number of bytes read 38 | * from the Memory 39 | */ 40 | public DeserializeResult heapifySummary(Memory mem); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/SummaryFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | /** 23 | * Interface for user-defined SummaryFactory 24 | * @param type of Summary 25 | */ 26 | public interface SummaryFactory { 27 | 28 | /** 29 | * Returns new instance of Summary 30 | * @return new instance of Summary 31 | */ 32 | public S newSummary(); 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/SummarySetOperations.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | /** 23 | * This is to provide methods of producing unions and intersections of two Summary objects. 24 | * @param type of Summary 25 | */ 26 | public interface SummarySetOperations { 27 | 28 | /** 29 | * This is called by the union operator when both sketches have the same hash value. 30 | * 31 | *

Caution: Do not modify the input Summary objects. Also do not return them directly, 32 | * unless they are immutable (most Summary objects are not). For mutable Summary objects, it is 33 | * important to create a new Summary object with the correct contents to be returned. Do not 34 | * return null summaries. 35 | * 36 | * @param a Summary from sketch A 37 | * @param b Summary from sketch B 38 | * @return union of Summary A and Summary B 39 | */ 40 | public S union(S a, S b); 41 | 42 | /** 43 | * This is called by the intersection operator when both sketches have the same hash value. 44 | * 45 | *

Caution: Do not modify the input Summary objects. Also do not return them directly, 46 | * unless they are immutable (most Summary objects are not). For mutable Summary objects, it is 47 | * important to create a new Summary object with the correct contents to be returned. Do not 48 | * return null summaries. 49 | * 50 | * @param a Summary from sketch A 51 | * @param b Summary from sketch B 52 | * @return intersection of Summary A and Summary B 53 | */ 54 | public S intersection(S a, S b); 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/TupleSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | /** 23 | * Iterator over a generic tuple sketch 24 | * @param Type of Summary 25 | */ 26 | public class TupleSketchIterator { 27 | 28 | private final long[] hashArrTbl_; //could be either hashArr or hashTable 29 | private final S[] summaryArrTbl_; //could be either summaryArr or summaryTable 30 | private int i_; 31 | 32 | TupleSketchIterator(final long[] hashes, final S[] summaries) { 33 | hashArrTbl_ = hashes; 34 | summaryArrTbl_ = summaries; 35 | i_ = -1; 36 | } 37 | 38 | /** 39 | * Advancing the iterator and checking existence of the next entry 40 | * is combined here for efficiency. This results in an undefined 41 | * state of the iterator before the first call of this method. 42 | * @return true if the next element exists 43 | */ 44 | public boolean next() { 45 | if (hashArrTbl_ == null) { return false; } 46 | i_++; 47 | while (i_ < hashArrTbl_.length) { 48 | if (hashArrTbl_[i_] > 0) { return true; } 49 | i_++; 50 | } 51 | return false; 52 | } 53 | 54 | /** 55 | * Gets the hash from the current entry in the sketch, which is a hash 56 | * of the original key passed to update(). The original keys are not 57 | * retained. Don't call this before calling next() for the first time 58 | * or after getting false from next(). 59 | * @return hash from the current entry 60 | */ 61 | public long getHash() { 62 | return hashArrTbl_[i_]; 63 | } 64 | 65 | /** 66 | * Gets a Summary object from the current entry in the sketch. 67 | * Don't call this before calling next() for the first time 68 | * or after getting false from next(). 69 | * @return Summary object for the current entry (this is not a copy!) 70 | */ 71 | public S getSummary() { 72 | return summaryArrTbl_[i_]; 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | /** 23 | * Interface for updating user-defined Summary 24 | * @param type of update value 25 | */ 26 | public interface UpdatableSummary extends Summary { 27 | 28 | /** 29 | * This is to provide a method of updating summaries. 30 | * This is primarily used internally. 31 | * @param value update value 32 | * @return this 33 | */ 34 | UpdatableSummary update(U value); 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryDeserializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.adouble; 21 | 22 | import org.apache.datasketches.memory.Memory; 23 | import org.apache.datasketches.tuple.DeserializeResult; 24 | import org.apache.datasketches.tuple.SummaryDeserializer; 25 | 26 | /** 27 | * Implements SummaryDeserializer<DoubleSummary> 28 | * @author Lee Rhodes 29 | */ 30 | public class DoubleSummaryDeserializer implements SummaryDeserializer { 31 | 32 | @Override 33 | public DeserializeResult heapifySummary(final Memory mem) { 34 | return DoubleSummary.fromMemory(mem); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.adouble; 21 | 22 | import org.apache.datasketches.tuple.SummaryFactory; 23 | 24 | /** 25 | * Factory for DoubleSummary. 26 | * 27 | * @author Lee Rhodes 28 | */ 29 | public final class DoubleSummaryFactory implements SummaryFactory { 30 | 31 | private final DoubleSummary.Mode summaryMode_; 32 | 33 | /** 34 | * Creates an instance of DoubleSummaryFactory with a given mode 35 | * @param summaryMode summary mode 36 | */ 37 | public DoubleSummaryFactory(final DoubleSummary.Mode summaryMode) { 38 | summaryMode_ = summaryMode; 39 | } 40 | 41 | @Override 42 | public DoubleSummary newSummary() { 43 | return new DoubleSummary(summaryMode_); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/adouble/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package is for a generic implementation of the Tuple sketch for single Double value. 22 | */ 23 | package org.apache.datasketches.tuple.adouble; 24 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.aninteger; 21 | 22 | import org.apache.datasketches.memory.Memory; 23 | import org.apache.datasketches.tuple.DeserializeResult; 24 | import org.apache.datasketches.tuple.SummaryDeserializer; 25 | 26 | /** 27 | * Implements SummaryDeserializer<IntegerSummary> 28 | * @author Lee Rhodes 29 | */ 30 | public class IntegerSummaryDeserializer implements SummaryDeserializer { 31 | 32 | @Override 33 | public DeserializeResult heapifySummary(final Memory mem) { 34 | return IntegerSummary.fromMemory(mem); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.aninteger; 21 | 22 | import org.apache.datasketches.tuple.SummaryFactory; 23 | 24 | /** 25 | * Factory for IntegerSummary. 26 | * 27 | * @author Lee Rhodes 28 | */ 29 | public class IntegerSummaryFactory implements SummaryFactory { 30 | 31 | private final IntegerSummary.Mode summaryMode_; 32 | 33 | /** 34 | * Creates an instance of IntegerSummaryFactory with a given mode 35 | * @param summaryMode summary mode 36 | */ 37 | public IntegerSummaryFactory(final IntegerSummary.Mode summaryMode) { 38 | summaryMode_ = summaryMode; 39 | } 40 | 41 | @Override 42 | public IntegerSummary newSummary() { 43 | return new IntegerSummary(summaryMode_); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummarySetOperations.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.aninteger; 21 | 22 | import org.apache.datasketches.tuple.SummarySetOperations; 23 | import org.apache.datasketches.tuple.aninteger.IntegerSummary.Mode; 24 | 25 | /** 26 | * Methods for defining how unions and intersections of two objects of type IntegerSummary 27 | * are performed. 28 | * 29 | * @author Lee Rhodes 30 | */ 31 | public class IntegerSummarySetOperations implements SummarySetOperations { 32 | 33 | private final Mode unionSummaryMode_; 34 | 35 | /** 36 | * Intersection is not well defined or even meaningful between numeric values. 37 | * Nevertheless, this can be defined to be a different type of aggregation for intersecting hashes. 38 | */ 39 | private final Mode intersectionSummaryMode_; 40 | 41 | /** 42 | * Creates a new instance with two modes 43 | * @param unionSummaryMode for unions 44 | * @param intersectionSummaryMode for intersections 45 | */ 46 | public IntegerSummarySetOperations(final Mode unionSummaryMode, final Mode intersectionSummaryMode) { 47 | unionSummaryMode_ = unionSummaryMode; 48 | intersectionSummaryMode_ = intersectionSummaryMode; 49 | } 50 | 51 | @Override 52 | public IntegerSummary union(final IntegerSummary a, final IntegerSummary b) { 53 | final IntegerSummary result = new IntegerSummary(unionSummaryMode_); 54 | result.update(a.getValue()); 55 | result.update(b.getValue()); 56 | return result; 57 | } 58 | 59 | @Override 60 | public IntegerSummary intersection(final IntegerSummary a, final IntegerSummary b) { 61 | final IntegerSummary result = new IntegerSummary(intersectionSummaryMode_); 62 | result.update(a.getValue()); 63 | result.update(b.getValue()); 64 | return result; 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/aninteger/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package is for a generic implementation of the Tuple sketch for single Integer value. 22 | */ 23 | package org.apache.datasketches.tuple.aninteger; 24 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesAnotB.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.arrayofdoubles; 21 | 22 | import org.apache.datasketches.memory.WritableMemory; 23 | 24 | /** 25 | * Computes a set difference of two tuple sketches of type ArrayOfDoubles 26 | */ 27 | public abstract class ArrayOfDoublesAnotB { 28 | 29 | ArrayOfDoublesAnotB() {} 30 | 31 | /** 32 | * Perform A-and-not-B set operation on the two given sketches. 33 | * A null sketch is interpreted as an empty sketch. 34 | * This is not an accumulating update. Calling update() more than once 35 | * without calling getResult() will discard the result of previous update(). 36 | * Both input sketches must have the same numValues. 37 | * 38 | * @param a The incoming sketch for the first argument 39 | * @param b The incoming sketch for the second argument 40 | */ 41 | public abstract void update(ArrayOfDoublesSketch a, ArrayOfDoublesSketch b); 42 | 43 | /** 44 | * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch 45 | * @return compact sketch representing the result of the operation 46 | */ 47 | public abstract ArrayOfDoublesCompactSketch getResult(); 48 | 49 | /** 50 | * Gets the result of this operation in the form of a ArrayOfDoublesCompactSketch 51 | * @param mem memory for the result (can be null) 52 | * @return compact sketch representing the result of the operation (off-heap if memory is 53 | * provided) 54 | */ 55 | public abstract ArrayOfDoublesCompactSketch getResult(WritableMemory mem); 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCombiner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.arrayofdoubles; 21 | 22 | /** 23 | * Combines two arrays of double values for use with ArrayOfDoubles tuple sketches 24 | */ 25 | public interface ArrayOfDoublesCombiner { 26 | 27 | /** 28 | * Method of combining two arrays of double values 29 | * @param a Array A. 30 | * @param b Array B. 31 | * @return Result of combining A and B 32 | */ 33 | public double[] combine(double[] a, double[] b); 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesCompactSketch.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.arrayofdoubles; 21 | 22 | /** 23 | * Top level compact tuple sketch of type ArrayOfDoubles. Compact sketches are never created 24 | * directly. They are created as a result of the compact() method on a QuickSelectSketch 25 | * or the getResult() method of a set operation like Union, Intersection or AnotB. 26 | * Compact sketch consists of a compact list (i.e. no intervening spaces) of hash values, 27 | * corresponding list of double values, and a value for theta. The lists may or may 28 | * not be ordered. A compact sketch is read-only. 29 | */ 30 | public abstract class ArrayOfDoublesCompactSketch extends ArrayOfDoublesSketch { 31 | 32 | static final byte serialVersionUID = 1; 33 | 34 | // Layout of retained entries: 35 | // Long || Start Byte Adr: 36 | // Adr: 37 | // || 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 | 38 | // 3 ||-----------------------------------|----------Retained Entries------------| 39 | 40 | static final int EMPTY_SIZE = 16; 41 | static final int RETAINED_ENTRIES_INT = 16; 42 | // 4 bytes of padding for 8 byte alignment 43 | static final int ENTRIES_START = 24; 44 | 45 | ArrayOfDoublesCompactSketch(final int numValues) { 46 | super(numValues); 47 | } 48 | 49 | @Override 50 | public int getCurrentBytes() { 51 | final int count = getRetainedEntries(); 52 | int sizeBytes = EMPTY_SIZE; 53 | if (count > 0) { 54 | sizeBytes = ENTRIES_START + (SIZE_OF_KEY_BYTES * count) 55 | + (SIZE_OF_VALUE_BYTES * numValues_ * count); 56 | } 57 | return sizeBytes; 58 | } 59 | 60 | @Override 61 | public int getMaxBytes() { 62 | return getCurrentBytes(); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.arrayofdoubles; 21 | 22 | /** 23 | * Interface for iterating over tuple sketches of type ArrayOfDoubles 24 | */ 25 | public interface ArrayOfDoublesSketchIterator { 26 | /** 27 | * Advancing the iterator and checking existence of the next entry 28 | * is combined here for efficiency. This results in an undefined 29 | * state of the iterator before the first call of this method. 30 | * @return true if the next element exists 31 | */ 32 | public boolean next(); 33 | 34 | /** 35 | * Gets a key from the current entry in the sketch, which is a hash 36 | * of the original key passed to update(). The original keys are not 37 | * retained. Don't call this before calling next() for the first time 38 | * or after getting false from next(). 39 | * @return hash key from the current entry 40 | */ 41 | public long getKey(); 42 | 43 | /** 44 | * Gets an array of values from the current entry in the sketch. 45 | * Don't call this before calling next() for the first time 46 | * or after getting false from next(). 47 | * @return array of double values for the current entry (may or may not be a copy) 48 | */ 49 | public double[] getValues(); 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.arrayofdoubles; 21 | 22 | import org.apache.datasketches.memory.WritableMemory; 23 | 24 | /** 25 | * Direct Intersection operation for tuple sketches of type ArrayOfDoubles. 26 | * 27 | *

This implementation uses data in a given Memory that is owned and managed by the caller. 28 | * This Memory can be off-heap, which if managed properly will greatly reduce the need for 29 | * the JVM to perform garbage collection.

30 | */ 31 | final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection { 32 | 33 | private WritableMemory mem_; 34 | 35 | /** 36 | * Creates an instance of a DirectArrayOfDoublesIntersection with a custom update seed 37 | * @param numValues number of double values associated with each key 38 | * @param seed See seed 39 | * @param dstMem See Memory 40 | */ 41 | DirectArrayOfDoublesIntersection(final int numValues, final long seed, final WritableMemory dstMem) { 42 | super(numValues, seed); 43 | mem_ = dstMem; 44 | } 45 | 46 | @Override 47 | protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, 48 | final long seed) { 49 | return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, mem_); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketchR.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.arrayofdoubles; 21 | 22 | import org.apache.datasketches.common.SketchesReadOnlyException; 23 | import org.apache.datasketches.memory.Memory; 24 | import org.apache.datasketches.memory.WritableMemory; 25 | 26 | final class DirectArrayOfDoublesQuickSelectSketchR extends DirectArrayOfDoublesQuickSelectSketch { 27 | 28 | DirectArrayOfDoublesQuickSelectSketchR(final Memory mem, final long seed) { 29 | super((WritableMemory) mem, seed); 30 | } 31 | 32 | @Override 33 | void insertOrIgnore(final long key, final double[] values) { 34 | throw new SketchesReadOnlyException(); 35 | } 36 | 37 | @Override 38 | public void trim() { 39 | throw new SketchesReadOnlyException(); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnionR.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.arrayofdoubles; 21 | 22 | import org.apache.datasketches.common.SketchesReadOnlyException; 23 | import org.apache.datasketches.memory.WritableMemory; 24 | 25 | final class DirectArrayOfDoublesUnionR extends DirectArrayOfDoublesUnion { 26 | 27 | /** 28 | * Wraps the given Memory. 29 | * @param gadget the ArrayOfDoublesQuickSelectSketch 30 | * @param mem See Memory 31 | */ 32 | DirectArrayOfDoublesUnionR(final ArrayOfDoublesQuickSelectSketch gadget, final WritableMemory mem) { 33 | super(gadget, mem); 34 | } 35 | 36 | @Override 37 | public void union(final ArrayOfDoublesSketch tupleSketch) { 38 | throw new SketchesReadOnlyException(); 39 | } 40 | 41 | @Override 42 | public void reset() { 43 | throw new SketchesReadOnlyException(); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesIntersection.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.arrayofdoubles; 21 | 22 | /** 23 | * On-heap implementation of intersection set operation for tuple sketches of type 24 | * ArrayOfDoubles. 25 | */ 26 | final class HeapArrayOfDoublesIntersection extends ArrayOfDoublesIntersection { 27 | 28 | /** 29 | * Creates an instance of a HeapArrayOfDoublesIntersection with a custom update seed 30 | * @param numValues number of double values associated with each key 31 | * @param seed See seed 32 | */ 33 | HeapArrayOfDoublesIntersection(final int numValues, final long seed) { 34 | super(numValues, seed); 35 | } 36 | 37 | @Override 38 | protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, 39 | final long seed) { 40 | return new HeapArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/arrayofdoubles/HeapArrayOfDoublesSketchIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.arrayofdoubles; 21 | 22 | import java.util.Arrays; 23 | 24 | /** 25 | * Iterator over the on-heap ArrayOfDoublesSketch (compact or hash table) 26 | */ 27 | final class HeapArrayOfDoublesSketchIterator implements ArrayOfDoublesSketchIterator { 28 | 29 | private long[] keys_; 30 | private double[] values_; 31 | private int numValues_; 32 | private int i_; 33 | 34 | HeapArrayOfDoublesSketchIterator(final long[] keys, final double[] values, final int numValues) { 35 | keys_ = keys; 36 | values_ = values; 37 | numValues_ = numValues; 38 | i_ = -1; 39 | } 40 | 41 | @Override 42 | public boolean next() { 43 | if (keys_ == null) { return false; } 44 | i_++; 45 | while (i_ < keys_.length) { 46 | if (keys_[i_] != 0) { return true; } 47 | i_++; 48 | } 49 | return false; 50 | } 51 | 52 | @Override 53 | public long getKey() { 54 | return keys_[i_]; 55 | } 56 | 57 | @Override 58 | public double[] getValues() { 59 | if (numValues_ == 1) { 60 | return new double[] { values_[i_] }; 61 | } 62 | return Arrays.copyOfRange(values_, i_ * numValues_, (i_ + 1) * numValues_); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/arrayofdoubles/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package is for a concrete implementation of the Tuple sketch for an array of double values. 22 | */ 23 | 24 | package org.apache.datasketches.tuple.arrayofdoubles; 25 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * The tuple package contains a number of sketches based on the same 22 | * fundamental algorithms of the Theta Sketch Framework and extend these 23 | * concepts for whole new families of sketches. 24 | */ 25 | package org.apache.datasketches.tuple; 26 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryDeserializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.strings; 21 | 22 | import org.apache.datasketches.memory.Memory; 23 | import org.apache.datasketches.tuple.DeserializeResult; 24 | import org.apache.datasketches.tuple.SummaryDeserializer; 25 | 26 | /** 27 | * Implements SummaryDeserializer<ArrayOfStringsSummary> 28 | * @author Lee Rhodes 29 | */ 30 | public class ArrayOfStringsSummaryDeserializer implements SummaryDeserializer { 31 | 32 | @Override 33 | public DeserializeResult heapifySummary(final Memory mem) { 34 | return ArrayOfStringsSummaryDeserializer.fromMemory(mem); 35 | } 36 | 37 | /** 38 | * Also used in test. 39 | * @param mem the given memory 40 | * @return the DeserializeResult 41 | */ 42 | static DeserializeResult fromMemory(final Memory mem) { 43 | final ArrayOfStringsSummary nsum = new ArrayOfStringsSummary(mem); 44 | final int totBytes = mem.getInt(0); 45 | return new DeserializeResult<>(nsum, totBytes); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummaryFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.strings; 21 | 22 | import org.apache.datasketches.tuple.SummaryFactory; 23 | 24 | /** 25 | * Implements SummaryFactory<ArrayOfStringsSummary> 26 | * @author Lee Rhodes 27 | */ 28 | public class ArrayOfStringsSummaryFactory implements SummaryFactory { 29 | 30 | @Override 31 | public ArrayOfStringsSummary newSummary() { 32 | return new ArrayOfStringsSummary(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummarySetOperations.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple.strings; 21 | 22 | import org.apache.datasketches.tuple.SummarySetOperations; 23 | 24 | /** 25 | * Implements SummarySetOperations<ArrayOfStringsSummary> 26 | * @author Lee Rhodes 27 | */ 28 | public class ArrayOfStringsSummarySetOperations implements SummarySetOperations { 29 | 30 | @Override 31 | public ArrayOfStringsSummary union(final ArrayOfStringsSummary a, final ArrayOfStringsSummary b) { 32 | return a.copy(); 33 | } 34 | 35 | @Override 36 | public ArrayOfStringsSummary intersection(final ArrayOfStringsSummary a, final ArrayOfStringsSummary b) { 37 | return a.copy(); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/apache/datasketches/tuple/strings/package-info.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | /** 21 | * This package is for a generic implementation of the Tuple sketch for single String value. 22 | */ 23 | 24 | package org.apache.datasketches.tuple.strings; 25 | -------------------------------------------------------------------------------- /src/main/javadoc/overview.html: -------------------------------------------------------------------------------- 1 | 3 | 21 | 22 | 23 | 24 | 25 | 26 |

Sketching Core Library

27 |

Overview

28 | 29 |

The Sketching Core Library provides a range of stochastic streaming algorithms and closely 30 | related java technologies that are particularly useful when integrating this technology into 31 | systems that must deal with massive data. 32 |

33 | 34 |

This library is divided into packages that constitute distinct groups of functionality:

35 | 36 | Note: In general, if the requirements or promises of any method's contract are not fulfilled 37 | (that is, if there is a bug in either the method or its caller), 38 | then an unchecked exception will be thrown. 39 | The precise type of such an unchecked exception does not form part of any method's contract. 40 | 41 | 42 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/common/BoundsOnRatiosInSampledSetsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.common; 21 | 22 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.checkInputs; 23 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.getEstimateOfA; 24 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.getEstimateOfB; 25 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.getEstimateOfBoverA; 26 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.getLowerBoundForBoverA; 27 | import static org.apache.datasketches.common.BoundsOnRatiosInSampledSets.getUpperBoundForBoverA; 28 | import static org.testng.Assert.assertEquals; 29 | 30 | import org.testng.annotations.Test; 31 | 32 | public class BoundsOnRatiosInSampledSetsTest { 33 | 34 | @Test 35 | public void checkNormalReturns() { 36 | getLowerBoundForBoverA(500, 100, .1); 37 | getLowerBoundForBoverA(500, 100, 0.75); 38 | getLowerBoundForBoverA(500, 100, 1.0); 39 | assertEquals(getLowerBoundForBoverA(0, 0, .1), 0.0, 0.0); 40 | 41 | getUpperBoundForBoverA(500, 100, .1); 42 | getUpperBoundForBoverA(500, 100, 0.75); 43 | getUpperBoundForBoverA(500, 100, 1.0); 44 | assertEquals(getUpperBoundForBoverA(0, 0, .1), 1.0, 0.0); 45 | 46 | getEstimateOfBoverA(500,100); 47 | getEstimateOfA(500, .1); 48 | getEstimateOfB(100, .1); 49 | assertEquals(getEstimateOfBoverA(0, 0), .5, 0.0); 50 | } 51 | 52 | @Test(expectedExceptions = SketchesArgumentException.class) 53 | public void checkInputA() { 54 | checkInputs(-1, 0, .3); 55 | } 56 | 57 | @Test(expectedExceptions = SketchesArgumentException.class) 58 | public void checkInputB() { 59 | checkInputs(500, -1, .3); 60 | } 61 | 62 | @Test(expectedExceptions = SketchesArgumentException.class) 63 | public void checkInputF() { 64 | checkInputs(500, 100, -1); 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/common/ShuffleTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.common; 21 | 22 | import static org.testng.Assert.fail; 23 | 24 | import org.testng.annotations.Test; 25 | 26 | public class ShuffleTest { 27 | 28 | @Test 29 | public void checkFloat() { 30 | float[] array = new float[10]; 31 | for (int i = 0; i < array.length; i++) { array[i] = i; } 32 | Shuffle.shuffle(array); 33 | int neCount = 0; 34 | for (int i = 0; i < array.length; i++) { 35 | if (array[i] != i) { neCount++; } 36 | } 37 | //System.out.println(neCount); 38 | if (neCount == 0) { fail(); } 39 | } 40 | 41 | @Test 42 | public void checkDouble() { 43 | double[] array = new double[10]; 44 | for (int i = 0; i < array.length; i++) { array[i] = i; } 45 | Shuffle.shuffle(array); 46 | int neCount = 0; 47 | for (int i = 0; i < array.length; i++) { 48 | if (array[i] != i) { neCount++; } 49 | } 50 | //System.out.println(neCount); 51 | if (neCount == 0) { fail(); } 52 | } 53 | 54 | @Test 55 | public void checkLong() { 56 | long[] array = new long[10]; 57 | for (int i = 0; i < array.length; i++) { array[i] = i; } 58 | Shuffle.shuffle(array); 59 | int neCount = 0; 60 | for (int i = 0; i < array.length; i++) { 61 | if (array[i] != i) { neCount++; } 62 | } 63 | //System.out.println(neCount); 64 | if (neCount == 0) { fail(); } 65 | } 66 | 67 | @Test 68 | public void checkInt() { 69 | int[] array = new int[10]; 70 | for (int i = 0; i < array.length; i++) { array[i] = i; } 71 | Shuffle.shuffle(array); 72 | int neCount = 0; 73 | for (int i = 0; i < array.length; i++) { 74 | if (array[i] != i) { neCount++; } 75 | } 76 | //System.out.println(neCount); 77 | if (neCount == 0) { fail(); } 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/common/SketchesExceptionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.common; 21 | 22 | import org.testng.annotations.Test; 23 | 24 | public class SketchesExceptionTest { 25 | 26 | @Test(expectedExceptions = SketchesException.class) 27 | public void checkSketchesException() { 28 | throw new SketchesException("This is a test."); 29 | } 30 | 31 | @Test(expectedExceptions = SketchesArgumentException.class) 32 | public void checkSketchesArgumentException() { 33 | throw new SketchesArgumentException("This is a test."); 34 | } 35 | 36 | @Test(expectedExceptions = SketchesStateException.class) 37 | public void checkSketchesStateException() { 38 | throw new SketchesStateException("This is a test."); 39 | } 40 | 41 | @Test 42 | public void checkSketchesExceptionWithThrowable() { 43 | try { 44 | throw new SketchesException("First Exception."); 45 | } catch (final SketchesException se) { 46 | try { 47 | throw new SketchesException("Second Exception. ", se); 48 | } catch (final SketchesException se2) { 49 | //success 50 | } 51 | } 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/cpc/CompressionDataTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.cpc; 21 | 22 | import static org.apache.datasketches.cpc.CompressionData.decodingTablesForHighEntropyByte; 23 | import static org.apache.datasketches.cpc.CompressionData.encodingTablesForHighEntropyByte; 24 | import static org.apache.datasketches.cpc.CompressionData.lengthLimitedUnaryDecodingTable65; 25 | import static org.apache.datasketches.cpc.CompressionData.lengthLimitedUnaryEncodingTable65; 26 | import static org.apache.datasketches.cpc.CompressionData.validateDecodingTable; 27 | 28 | import org.testng.annotations.Test; 29 | 30 | /** 31 | * @author Lee Rhodes 32 | */ 33 | public class CompressionDataTest { 34 | 35 | @Test 36 | public static void checkTables() { 37 | validateDecodingTable(lengthLimitedUnaryDecodingTable65, lengthLimitedUnaryEncodingTable65); 38 | 39 | for (int i = 0; i < (16 + 6); i++) { 40 | validateDecodingTable(decodingTablesForHighEntropyByte[i], encodingTablesForHighEntropyByte[i]); 41 | } 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/fdt/GroupTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.fdt; 21 | 22 | import static org.testng.Assert.assertEquals; 23 | 24 | import org.testng.annotations.Test; 25 | 26 | /** 27 | * @author Lee Rhodes 28 | */ 29 | public class GroupTest { 30 | private static final String LS = System.getProperty("line.separator"); 31 | 32 | @Test 33 | public void checkToString() { //check visually 34 | Group gp = new Group(); 35 | gp.init("AAAAAAAA,BBBBBBBBBB", 100_000_000, 1E8, 1.2E8, 8E7, 0.1, 0.01); 36 | assertEquals(gp.getPrimaryKey(), "AAAAAAAA,BBBBBBBBBB"); 37 | assertEquals(gp.getCount(), 100_000_000); 38 | assertEquals(gp.getEstimate(), 1E8); 39 | assertEquals(gp.getUpperBound(), 1.2E8); 40 | assertEquals(gp.getLowerBound(), 8E7); 41 | assertEquals(gp.getFraction(), 0.1); 42 | assertEquals(gp.getRse(), 0.01); 43 | 44 | println(gp.getHeader()); 45 | println(gp.toString()); 46 | } 47 | 48 | @Test 49 | public void printlnTest() { 50 | println("PRINTING: "+this.getClass().getName()); 51 | } 52 | 53 | /** 54 | * @param s value to print 55 | */ 56 | static void println(String s) { 57 | print(s + LS); 58 | } 59 | 60 | /** 61 | * @param s value to print 62 | */ 63 | static void print(String s) { 64 | //System.out.print(s); //disable here 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/frequencies/HashMapStressTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.frequencies; 21 | 22 | import org.apache.datasketches.hash.MurmurHash3; 23 | //import org.testng.annotations.Test; 24 | 25 | public class HashMapStressTest { 26 | 27 | //@Test 28 | public static void stress() { 29 | println("ReversePurgeLongHashMap Stress Test"); 30 | printf("%12s%15s%n", "Capacity", "TimePerAdjust"); 31 | for (int capacity = 2 << 5; capacity < (2 << 24); capacity *= 2) { 32 | int n = 10000000; 33 | 34 | long[] keys = new long[n]; 35 | long[] values = new long[n]; 36 | 37 | for (int i = 0; i < n; i++) { 38 | keys[i] = murmur(i); 39 | values[i] = (i < (capacity / 2)) ? n : 1; 40 | } 41 | 42 | ReversePurgeLongHashMap hashmap = new ReversePurgeLongHashMap(capacity); 43 | long timePerAdjust = timeOneHashMap(hashmap, keys, values, (int) (.75 * capacity)); 44 | printf("%12d%15d%n", capacity, timePerAdjust); 45 | } 46 | } 47 | 48 | private static long timeOneHashMap(ReversePurgeLongHashMap hashMap, long[] keys, long[] values, 49 | int sizeToShift) { 50 | final long startTime = System.nanoTime(); 51 | int n = keys.length; 52 | assert (n == values.length); 53 | for (int i = 0; i < n; i++) { 54 | hashMap.adjustOrPutValue(keys[i], values[i]); 55 | if (hashMap.getNumActive() == sizeToShift) { 56 | hashMap.adjustAllValuesBy(-1); 57 | hashMap.keepOnlyPositiveCounts(); 58 | } 59 | } 60 | final long endTime = System.nanoTime(); 61 | return (endTime - startTime) / n; 62 | } 63 | 64 | private static long murmur(long key) { 65 | long[] keyArr = { key }; 66 | return MurmurHash3.hash(keyArr, 0)[0]; 67 | } 68 | 69 | private static void println(Object obj) { System.out.println(obj.toString()); } 70 | 71 | private static void printf(String fmt, Object ... args) { System.out.printf(fmt, args); } 72 | 73 | } -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/frequencies/ReversePurgeLongHashMapTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.frequencies; 21 | 22 | import static org.testng.Assert.assertNull; 23 | 24 | import org.apache.datasketches.common.SketchesArgumentException; 25 | import org.testng.annotations.Test; 26 | 27 | public class ReversePurgeLongHashMapTest { 28 | 29 | @Test(expectedExceptions = SketchesArgumentException.class) 30 | public void checkgetInstanceString() { 31 | ReversePurgeLongHashMap.getInstance(""); 32 | } 33 | 34 | @Test 35 | public void checkActiveNull() { 36 | ReversePurgeLongHashMap map = new ReversePurgeLongHashMap(4); 37 | assertNull(map.getActiveKeys()); 38 | assertNull(map.getActiveValues()); 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/hash/XxHashTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.hash; 21 | 22 | import static org.testng.Assert.assertEquals; 23 | 24 | import org.testng.annotations.Test; 25 | 26 | import org.apache.datasketches.memory.Memory; 27 | 28 | /** 29 | * @author Lee Rhodes 30 | */ 31 | public class XxHashTest { 32 | 33 | @Test 34 | public void longCheck() { 35 | long seed = 0; 36 | long hash1 = XxHash.hash(123L, seed); 37 | long[] arr = new long[1]; 38 | arr[0] = 123L; 39 | Memory mem = Memory.wrap(arr); 40 | long hash2 = XxHash.hash(mem, 0, 8, 0); 41 | assertEquals(hash2, hash1); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/hll/TablesTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.hll; 21 | 22 | import static org.apache.datasketches.hll.CouponMapping.xArr; 23 | import static org.apache.datasketches.hll.CouponMapping.yArr; 24 | import static org.apache.datasketches.hll.CubicInterpolation.usingXAndYTables; 25 | import static org.testng.Assert.assertEquals; 26 | import static org.testng.Assert.fail; 27 | 28 | import org.apache.datasketches.common.SketchesArgumentException; 29 | import org.testng.annotations.Test; 30 | 31 | /** 32 | * @author Lee Rhodes 33 | * 34 | */ 35 | public class TablesTest { 36 | 37 | @Test 38 | public void checkInterpolationExceptions() { 39 | try { 40 | usingXAndYTables(xArr, yArr, -1); 41 | fail(); 42 | } catch (SketchesArgumentException e) { 43 | //expected 44 | } 45 | try { 46 | usingXAndYTables(xArr, yArr, 11000000.0); 47 | fail(); 48 | } catch (SketchesArgumentException e) { 49 | //expected 50 | } 51 | } 52 | 53 | @Test 54 | public void checkCornerCases() { 55 | int len = xArr.length; 56 | double x = xArr[len - 1]; 57 | double y = usingXAndYTables(xArr, yArr, x); 58 | double yExp = yArr[len - 1]; 59 | assertEquals(y, yExp, 0.0); 60 | } 61 | 62 | @Test 63 | public void printlnTest() { 64 | println("PRINTING: "+this.getClass().getName()); 65 | } 66 | 67 | /** 68 | * @param s value to print 69 | */ 70 | static void println(String s) { 71 | //System.out.println(s); //disable here 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/kll/KllSketchTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.kll; 21 | 22 | import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_EMPTY; 23 | import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_FULL; 24 | import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_SINGLE; 25 | import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; 26 | import static org.apache.datasketches.kll.KllSketch.SketchStructure.getSketchStructure; 27 | import static org.testng.Assert.assertEquals; 28 | import static org.testng.Assert.fail; 29 | 30 | import org.apache.datasketches.common.SketchesArgumentException; 31 | import org.testng.annotations.Test; 32 | 33 | public class KllSketchTest { 34 | 35 | @Test 36 | public void checkSketchStructureEnum() { 37 | assertEquals(getSketchStructure(2,1), COMPACT_EMPTY); 38 | assertEquals(getSketchStructure(2,2), COMPACT_SINGLE); 39 | assertEquals(getSketchStructure(5,1), COMPACT_FULL); 40 | assertEquals(getSketchStructure(5,3), UPDATABLE); 41 | try { getSketchStructure(5,2); fail(); } catch (SketchesArgumentException e) { } 42 | try { getSketchStructure(2,3); fail(); } catch (SketchesArgumentException e) { } 43 | } 44 | 45 | private final static boolean enablePrinting = false; 46 | 47 | /** 48 | * @param o the Object to println 49 | */ 50 | static final void println(final Object o) { 51 | if (enablePrinting) { System.out.println(o.toString()); } 52 | } 53 | 54 | } 55 | 56 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/quantiles/DoublesSketchBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantiles; 21 | 22 | import static org.testng.Assert.assertEquals; 23 | import static org.testng.Assert.assertFalse; 24 | import static org.testng.Assert.assertTrue; 25 | 26 | import org.testng.annotations.Test; 27 | 28 | import org.apache.datasketches.memory.WritableMemory; 29 | 30 | public class DoublesSketchBuilderTest { 31 | 32 | @Test 33 | public void checkBuilder() { 34 | int k = 256; //default is 128 35 | DoublesSketchBuilder bldr = DoublesSketch.builder(); 36 | bldr.setK(k); 37 | assertEquals(bldr.getK(), k); //confirms new k 38 | println(bldr.toString()); 39 | int bytes = DoublesSketch.getUpdatableStorageBytes(k, 0); 40 | byte[] byteArr = new byte[bytes]; 41 | WritableMemory mem = WritableMemory.writableWrap(byteArr); 42 | DoublesSketch ds = bldr.build(mem); 43 | assertTrue(ds.hasMemory()); 44 | assertFalse(ds.isDirect()); 45 | println(bldr.toString()); 46 | 47 | bldr = DoublesSketch.builder(); 48 | assertEquals(bldr.getK(), PreambleUtil.DEFAULT_K); 49 | } 50 | 51 | @Test 52 | public void printlnTest() { 53 | println("PRINTING: "+this.getClass().getName()); 54 | } 55 | 56 | /** 57 | * @param s value to print 58 | */ 59 | static void println(String s) { 60 | //System.out.println(s); //disable here 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/quantiles/DoublesSketchIteratorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantiles; 21 | 22 | import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator; 23 | import org.testng.Assert; 24 | import org.testng.annotations.Test; 25 | 26 | public class DoublesSketchIteratorTest { 27 | 28 | @Test 29 | public void emptySketch() { 30 | DoublesSketch sketch = DoublesSketch.builder().build(); 31 | QuantilesDoublesSketchIterator it = sketch.iterator(); 32 | Assert.assertFalse(it.next()); 33 | } 34 | 35 | @Test 36 | public void oneItemSketch() { 37 | UpdateDoublesSketch sketch = DoublesSketch.builder().build(); 38 | sketch.update(0); 39 | QuantilesDoublesSketchIterator it = sketch.iterator(); 40 | Assert.assertTrue(it.next()); 41 | Assert.assertEquals(it.getQuantile(), 0.0); 42 | Assert.assertEquals(it.getWeight(), 1); 43 | Assert.assertFalse(it.next()); 44 | } 45 | 46 | @Test 47 | public void bigSketches() { 48 | for (int n = 1000; n < 100000; n += 2000) { 49 | UpdateDoublesSketch sketch = DoublesSketch.builder().build(); 50 | for (int i = 0; i < n; i++) { 51 | sketch.update(i); 52 | } 53 | QuantilesDoublesSketchIterator it = sketch.iterator(); 54 | int count = 0; 55 | int weight = 0; 56 | while (it.next()) { 57 | count++; 58 | weight += (int)it.getWeight(); 59 | } 60 | Assert.assertEquals(count, sketch.getNumRetained()); 61 | Assert.assertEquals(weight, n); 62 | } 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/quantiles/ItemsSketchIteratorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.quantiles; 21 | 22 | import java.util.Comparator; 23 | 24 | import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator; 25 | import org.testng.Assert; 26 | import org.testng.annotations.Test; 27 | 28 | public class ItemsSketchIteratorTest { 29 | 30 | @Test 31 | public void emptySketch() { 32 | ItemsSketch sketch = ItemsSketch.getInstance(Integer.class, 128, Comparator.naturalOrder()); 33 | QuantilesGenericSketchIterator it = sketch.iterator(); 34 | Assert.assertFalse(it.next()); 35 | } 36 | 37 | @Test 38 | public void oneItemSketch() { 39 | ItemsSketch sketch = ItemsSketch.getInstance(Integer.class, 128, Comparator.naturalOrder()); 40 | sketch.update(0); 41 | QuantilesGenericSketchIterator it = sketch.iterator(); 42 | Assert.assertTrue(it.next()); 43 | Assert.assertEquals(it.getQuantile(), Integer.valueOf(0)); 44 | Assert.assertEquals(it.getWeight(), 1); 45 | Assert.assertFalse(it.next()); 46 | } 47 | 48 | @Test 49 | public void bigSketches() { 50 | for (int n = 1000; n < 100000; n += 2000) { 51 | ItemsSketch sketch = ItemsSketch.getInstance(Integer.class, 128, Comparator.naturalOrder()); 52 | for (int i = 0; i < n; i++) { 53 | sketch.update(i); 54 | } 55 | QuantilesGenericSketchIterator it = sketch.iterator(); 56 | int count = 0; 57 | int weight = 0; 58 | while (it.next()) { 59 | count++; 60 | weight += (int)it.getWeight(); 61 | } 62 | Assert.assertEquals(count, sketch.getNumRetained()); 63 | Assert.assertEquals(weight, n); 64 | } 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/req/ReqSketchBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.req; 21 | 22 | import static org.testng.Assert.assertEquals; 23 | import static org.testng.Assert.assertTrue; 24 | 25 | import org.testng.annotations.Test; 26 | 27 | /** 28 | * @author Lee Rhodes 29 | */ 30 | public class ReqSketchBuilderTest { 31 | 32 | @Test 33 | public void checkBldr() { 34 | final ReqSketchBuilder bldr = new ReqSketchBuilder(); 35 | final ReqDebugImplTest rdi = new ReqDebugImplTest(2, "%4.0f"); 36 | bldr.setK(50).setHighRankAccuracy(true).setReqDebug(rdi); 37 | assertEquals(bldr.getK(), 50); 38 | assertEquals(bldr.getHighRankAccuracy(), true); 39 | assertTrue(bldr.getReqDebug() != null); 40 | println(bldr.toString()); 41 | bldr.setReqDebug(null); 42 | println(bldr.toString()); 43 | } 44 | 45 | /** 46 | * @param o object to be printed 47 | */ 48 | static void println(final Object o) { 49 | //System.out.println(o.toString()); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/tdigest/SortTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tdigest; 21 | 22 | import static org.testng.Assert.assertEquals; 23 | 24 | import org.apache.datasketches.common.Shuffle; 25 | import org.testng.annotations.Test; 26 | 27 | public class SortTest { 28 | 29 | @Test 30 | public void smallWithRepetition() { 31 | final double[] keys = {3, 1, 4, 2, 1}; 32 | final long[] values = {4, 1, 5, 3, 2}; 33 | Sort.stableSort(keys, values, keys.length); 34 | assertEquals(keys[0], 1); 35 | assertEquals(keys[1], 1); 36 | assertEquals(keys[2], 2); 37 | assertEquals(keys[3], 3); 38 | assertEquals(keys[4], 4); 39 | assertEquals(values[0], 1); 40 | assertEquals(values[1], 2); 41 | assertEquals(values[2], 3); 42 | assertEquals(values[3], 4); 43 | assertEquals(values[4], 5); 44 | } 45 | 46 | @Test 47 | public void large() { 48 | final int n = 1000; 49 | final double[] keys = new double[n]; 50 | final long[] values = new long[n]; 51 | for (int i = 0; i < n; i++) values[i] = i; 52 | Shuffle.shuffle(values); 53 | for (int i = 0; i < n; i++) keys[i] = values[i]; 54 | Sort.stableSort(keys, values, n); 55 | for (int i = 0; i < n; i++) { 56 | assertEquals(keys[i], i); 57 | assertEquals(values[i], i); 58 | } 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/thetacommon/ThetaUtilTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.thetacommon; 21 | 22 | import org.apache.datasketches.quantilescommon.QuantilesUtil; 23 | import org.testng.Assert; 24 | import org.testng.annotations.Test; 25 | 26 | /** 27 | * @author Lee Rhodes 28 | */ 29 | public class ThetaUtilTest { 30 | 31 | @Test 32 | public void checkStartingSubMultiple() { 33 | Assert.assertEquals(ThetaUtil.startingSubMultiple(8, 3, 4), 5); 34 | Assert.assertEquals(ThetaUtil.startingSubMultiple(7, 3, 4), 4); 35 | Assert.assertEquals(ThetaUtil.startingSubMultiple(6, 3, 4), 6); 36 | } 37 | 38 | @Test(expectedExceptions = NullPointerException.class) 39 | public void checkValidateValuesNullException() { 40 | QuantilesUtil.checkDoublesSplitPointsOrder(null); 41 | } 42 | 43 | } 44 | 45 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/tuple/IntegerSummary.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | import org.apache.datasketches.common.ByteArrayUtil; 23 | import org.apache.datasketches.memory.Memory; 24 | 25 | /** 26 | * Summary for generic tuple sketches of type Integer. 27 | * This summary keeps an Integer value. 28 | */ 29 | public class IntegerSummary implements UpdatableSummary { 30 | private int value_; 31 | 32 | /** 33 | * Creates an instance of IntegerSummary with a given starting value. 34 | * @param value starting value 35 | */ 36 | public IntegerSummary(final int value) { 37 | value_ = value; 38 | } 39 | 40 | @Override 41 | public IntegerSummary update(final Integer value) { 42 | value_ += value; 43 | return this; 44 | } 45 | 46 | @Override 47 | public IntegerSummary copy() { 48 | return new IntegerSummary(value_); 49 | } 50 | 51 | /** 52 | * @return current value of the IntegerSummary 53 | */ 54 | public int getValue() { 55 | return value_; 56 | } 57 | 58 | private static final int SERIALIZED_SIZE_BYTES = 4; 59 | private static final int VALUE_INDEX = 0; 60 | 61 | @Override 62 | public byte[] toByteArray() { 63 | final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES]; 64 | ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_); 65 | return bytes; 66 | } 67 | 68 | /** 69 | * Creates an instance of the IntegerSummary given a serialized representation 70 | * @param mem Memory object with serialized IntegerSummary 71 | * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes 72 | * read from the Memory 73 | */ 74 | public static DeserializeResult fromMemory(final Memory mem) { 75 | return new DeserializeResult<>(new IntegerSummary(mem.getInt(VALUE_INDEX)), SERIALIZED_SIZE_BYTES); 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/tuple/IntegerSummaryDeserializer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | import org.apache.datasketches.memory.Memory; 23 | 24 | public class IntegerSummaryDeserializer implements SummaryDeserializer { 25 | 26 | @Override 27 | public DeserializeResult heapifySummary(final Memory mem) { 28 | return IntegerSummary.fromMemory(mem); 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/tuple/IntegerSummaryFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | /** 23 | * Factory for IntegerSummary. 24 | */ 25 | public class IntegerSummaryFactory implements SummaryFactory { 26 | 27 | @Override 28 | public IntegerSummary newSummary() { 29 | return new IntegerSummary(0); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/test/java/org/apache/datasketches/tuple/SerializerDeserializerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package org.apache.datasketches.tuple; 21 | 22 | import org.apache.datasketches.common.Family; 23 | import org.apache.datasketches.common.SketchesArgumentException; 24 | import org.apache.datasketches.memory.Memory; 25 | import org.testng.Assert; 26 | import org.testng.annotations.Test; 27 | 28 | public class SerializerDeserializerTest { 29 | 30 | @Test 31 | public void validSketchType() { 32 | byte[] bytes = new byte[4]; 33 | bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = (byte) SerializerDeserializer.SketchType.CompactSketch.ordinal(); 34 | Assert.assertEquals(SerializerDeserializer.getSketchType(Memory.wrap(bytes)), SerializerDeserializer.SketchType.CompactSketch); 35 | } 36 | 37 | @Test(expectedExceptions = SketchesArgumentException.class) 38 | public void invalidSketchType() { 39 | byte[] bytes = new byte[4]; 40 | bytes[SerializerDeserializer.TYPE_BYTE_OFFSET] = 33; 41 | SerializerDeserializer.getSketchType(Memory.wrap(bytes)); 42 | } 43 | 44 | // @Test(expectedExceptions = SketchesArgumentException.class) 45 | // public void deserializeFromMemoryUsupportedClass() { 46 | // Memory mem = null; 47 | // SerializerDeserializer.deserializeFromMemory(mem, 0, "bogus"); 48 | // } 49 | 50 | @Test(expectedExceptions = SketchesArgumentException.class) 51 | public void validateFamilyNotTuple() { 52 | SerializerDeserializer.validateFamily((byte) 1, (byte) 0); 53 | } 54 | 55 | @Test(expectedExceptions = SketchesArgumentException.class) 56 | public void validateFamilyWrongPreambleLength() { 57 | SerializerDeserializer.validateFamily((byte) Family.TUPLE.getID(), (byte) 0); 58 | } 59 | 60 | @Test(expectedExceptions = SketchesArgumentException.class) 61 | public void checkBadSeedHash() { 62 | org.apache.datasketches.tuple.Util.computeSeedHash(50541); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/test/resources/ArrayOfDoublesUnion_v0.9.1.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/ArrayOfDoublesUnion_v0.9.1.sk -------------------------------------------------------------------------------- /src/test/resources/CompactSketchWithDoubleSummary4K_serialVersion1.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/CompactSketchWithDoubleSummary4K_serialVersion1.sk -------------------------------------------------------------------------------- /src/test/resources/Qk128_n1000_v0.3.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n1000_v0.3.0.sk -------------------------------------------------------------------------------- /src/test/resources/Qk128_n1000_v0.6.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n1000_v0.6.0.sk -------------------------------------------------------------------------------- /src/test/resources/Qk128_n1000_v0.8.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n1000_v0.8.0.sk -------------------------------------------------------------------------------- /src/test/resources/Qk128_n1000_v0.8.3.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n1000_v0.8.3.sk -------------------------------------------------------------------------------- /src/test/resources/Qk128_n50_v0.3.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n50_v0.3.0.sk -------------------------------------------------------------------------------- /src/test/resources/Qk128_n50_v0.6.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n50_v0.6.0.sk -------------------------------------------------------------------------------- /src/test/resources/Qk128_n50_v0.8.0.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n50_v0.8.0.sk -------------------------------------------------------------------------------- /src/test/resources/Qk128_n50_v0.8.3.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/Qk128_n50_v0.8.3.sk -------------------------------------------------------------------------------- /src/test/resources/TupleWithTestIntegerSummary4kTrimmedSerVer2.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/TupleWithTestIntegerSummary4kTrimmedSerVer2.sk -------------------------------------------------------------------------------- /src/test/resources/kll_double_n1.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/kll_double_n1.sk -------------------------------------------------------------------------------- /src/test/resources/kll_sketch_double_one_item_v1.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/kll_sketch_double_one_item_v1.sk -------------------------------------------------------------------------------- /src/test/resources/kll_sketch_float_one_item_v1.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/kll_sketch_float_one_item_v1.sk -------------------------------------------------------------------------------- /src/test/resources/tdigest_ref_k100_n10000_double.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/tdigest_ref_k100_n10000_double.sk -------------------------------------------------------------------------------- /src/test/resources/tdigest_ref_k100_n10000_float.sk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/datasketches-java/f6bdeb010c041bae131ab13efcc0427966a61130/src/test/resources/tdigest_ref_k100_n10000_float.sk -------------------------------------------------------------------------------- /tools/FindBugsExcludeFilter.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | --------------------------------------------------------------------------------